Atlas - SDL_audiotypecvt.c

Home / ext / SDL / src / audio Lines: 1 | Size: 37260 bytes [Download] [Show on GitHub] [Search similar files] [Raw] [Raw (proxy)]
[FILE BEGIN]
1/* 2 Simple DirectMedia Layer 3 Copyright (C) 1997-2025 Sam Lantinga <[email protected]> 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any damages 7 arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any purpose, 10 including commercial applications, and to alter it and redistribute it 11 freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must not 14 claim that you wrote the original software. If you use this software 15 in a product, an acknowledgment in the product documentation would be 16 appreciated but is not required. 17 2. Altered source versions must be plainly marked as such, and must not be 18 misrepresented as being the original software. 19 3. This notice may not be removed or altered from any source distribution. 20*/ 21#include "SDL_internal.h" 22 23#include "SDL_sysaudio.h" 24 25#ifdef SDL_NEON_INTRINSICS 26#include <fenv.h> 27#endif 28 29#define DIVBY2147483648 0.0000000004656612873077392578125f // 0x1p-31f 30 31// start fallback scalar converters 32 33// This code requires that floats are in the IEEE-754 binary32 format 34SDL_COMPILE_TIME_ASSERT(float_bits, sizeof(float) == sizeof(Uint32)); 35 36union float_bits { 37 Uint32 u32; 38 float f32; 39}; 40 41static void SDL_Convert_S8_to_F32_Scalar(float *dst, const Sint8 *src, int num_samples) 42{ 43 int i; 44 45 LOG_DEBUG_AUDIO_CONVERT("S8", "F32"); 46 47 for (i = num_samples - 1; i >= 0; --i) { 48 /* 1) Construct a float in the range [65536.0, 65538.0) 49 * 2) Shift the float range to [-1.0, 1.0) */ 50 union float_bits x; 51 x.u32 = (Uint8)src[i] ^ 0x47800080u; 52 dst[i] = x.f32 - 65537.0f; 53 } 54} 55 56static void SDL_Convert_U8_to_F32_Scalar(float *dst, const Uint8 *src, int num_samples) 57{ 58 int i; 59 60 LOG_DEBUG_AUDIO_CONVERT("U8", "F32"); 61 62 for (i = num_samples - 1; i >= 0; --i) { 63 /* 1) Construct a float in the range [65536.0, 65538.0) 64 * 2) Shift the float range to [-1.0, 1.0) */ 65 union float_bits x; 66 x.u32 = src[i] ^ 0x47800000u; 67 dst[i] = x.f32 - 65537.0f; 68 } 69} 70 71static void SDL_Convert_S16_to_F32_Scalar(float *dst, const Sint16 *src, int num_samples) 72{ 73 int i; 74 75 LOG_DEBUG_AUDIO_CONVERT("S16", "F32"); 76 77 for (i = num_samples - 1; i >= 0; --i) { 78 /* 1) Construct a float in the range [256.0, 258.0) 79 * 2) Shift the float range to [-1.0, 1.0) */ 80 union float_bits x; 81 x.u32 = (Uint16)src[i] ^ 0x43808000u; 82 dst[i] = x.f32 - 257.0f; 83 } 84} 85 86static void SDL_Convert_S32_to_F32_Scalar(float *dst, const Sint32 *src, int num_samples) 87{ 88 int i; 89 90 LOG_DEBUG_AUDIO_CONVERT("S32", "F32"); 91 92 for (i = num_samples - 1; i >= 0; --i) { 93 dst[i] = (float)src[i] * DIVBY2147483648; 94 } 95} 96 97// Create a bit-mask based on the sign-bit. Should optimize to a single arithmetic-shift-right 98#define SIGNMASK(x) (Uint32)(0u - ((Uint32)(x) >> 31)) 99 100static void SDL_Convert_F32_to_S8_Scalar(Sint8 *dst, const float *src, int num_samples) 101{ 102 int i; 103 104 LOG_DEBUG_AUDIO_CONVERT("F32", "S8"); 105 106 for (i = 0; i < num_samples; ++i) { 107 /* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0] 108 * 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128] 109 * 3) Clamp the value to [-128, 127] */ 110 union float_bits x; 111 x.f32 = src[i] + 98304.0f; 112 113 Uint32 y = x.u32 - 0x47C00000u; 114 Uint32 z = 0x7Fu - (y ^ SIGNMASK(y)); 115 y = y ^ (z & SIGNMASK(z)); 116 117 dst[i] = (Sint8)(y & 0xFF); 118 } 119} 120 121static void SDL_Convert_F32_to_U8_Scalar(Uint8 *dst, const float *src, int num_samples) 122{ 123 int i; 124 125 LOG_DEBUG_AUDIO_CONVERT("F32", "U8"); 126 127 for (i = 0; i < num_samples; ++i) { 128 /* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0] 129 * 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128] 130 * 3) Clamp the value to [-128, 127] 131 * 4) Shift the integer range from [-128, 127] to [0, 255] */ 132 union float_bits x; 133 x.f32 = src[i] + 98304.0f; 134 135 Uint32 y = x.u32 - 0x47C00000u; 136 Uint32 z = 0x7Fu - (y ^ SIGNMASK(y)); 137 y = (y ^ 0x80u) ^ (z & SIGNMASK(z)); 138 139 dst[i] = (Uint8)(y & 0xFF); 140 } 141} 142 143static void SDL_Convert_F32_to_S16_Scalar(Sint16 *dst, const float *src, int num_samples) 144{ 145 int i; 146 147 LOG_DEBUG_AUDIO_CONVERT("F32", "S16"); 148 149 for (i = 0; i < num_samples; ++i) { 150 /* 1) Shift the float range from [-1.0, 1.0] to [383.0, 385.0] 151 * 2) Shift the integer range from [0x43BF8000, 0x43C08000] to [-32768, 32768] 152 * 3) Clamp values outside the [-32768, 32767] range */ 153 union float_bits x; 154 x.f32 = src[i] + 384.0f; 155 156 Uint32 y = x.u32 - 0x43C00000u; 157 Uint32 z = 0x7FFFu - (y ^ SIGNMASK(y)); 158 y = y ^ (z & SIGNMASK(z)); 159 160 dst[i] = (Sint16)(y & 0xFFFF); 161 } 162} 163 164static void SDL_Convert_F32_to_S32_Scalar(Sint32 *dst, const float *src, int num_samples) 165{ 166 int i; 167 168 LOG_DEBUG_AUDIO_CONVERT("F32", "S32"); 169 170 for (i = 0; i < num_samples; ++i) { 171 /* 1) Shift the float range from [-1.0, 1.0] to [-2147483648.0, 2147483648.0] 172 * 2) Set values outside the [-2147483648.0, 2147483647.0] range to -2147483648.0 173 * 3) Convert the float to an integer, and fixup values outside the valid range */ 174 union float_bits x; 175 x.f32 = src[i]; 176 177 Uint32 y = x.u32 + 0x0F800000u; 178 Uint32 z = y - 0xCF000000u; 179 z &= SIGNMASK(y ^ z); 180 x.u32 = y - z; 181 182 dst[i] = (Sint32)x.f32 ^ (Sint32)SIGNMASK(z); 183 } 184} 185 186#undef SIGNMASK 187 188static void SDL_Convert_Swap16_Scalar(Uint16 *dst, const Uint16 *src, int num_samples) 189{ 190 int i; 191 192 for (i = 0; i < num_samples; ++i) { 193 dst[i] = SDL_Swap16(src[i]); 194 } 195} 196 197static void SDL_Convert_Swap32_Scalar(Uint32 *dst, const Uint32 *src, int num_samples) 198{ 199 int i; 200 201 for (i = 0; i < num_samples; ++i) { 202 dst[i] = SDL_Swap32(src[i]); 203 } 204} 205 206// end fallback scalar converters 207 208// Convert forwards, when sizeof(*src) >= sizeof(*dst) 209#define CONVERT_16_FWD(CVT1, CVT16) \ 210 int i = 0; \ 211 if (num_samples >= 16) { \ 212 while ((uintptr_t)(&dst[i]) & 15) { CVT1 ++i; } \ 213 while ((i + 16) <= num_samples) { CVT16 i += 16; } \ 214 } \ 215 while (i < num_samples) { CVT1 ++i; } 216 217// Convert backwards, when sizeof(*src) <= sizeof(*dst) 218#define CONVERT_16_REV(CVT1, CVT16) \ 219 int i = num_samples; \ 220 if (i >= 16) { \ 221 while ((uintptr_t)(&dst[i]) & 15) { --i; CVT1 } \ 222 while (i >= 16) { i -= 16; CVT16 } \ 223 } \ 224 while (i > 0) { --i; CVT1 } 225 226#ifdef SDL_SSE2_INTRINSICS 227static void SDL_TARGETING("sse2") SDL_Convert_S8_to_F32_SSE2(float *dst, const Sint8 *src, int num_samples) 228{ 229 /* 1) Flip the sign bit to convert from S8 to U8 format 230 * 2) Construct a float in the range [65536.0, 65538.0) 231 * 3) Shift the float range to [-1.0, 1.0) 232 * dst[i] = i2f((src[i] ^ 0x80) | 0x47800000) - 65537.0 */ 233 const __m128i zero = _mm_setzero_si128(); 234 const __m128i flipper = _mm_set1_epi8(-0x80); 235 const __m128i caster = _mm_set1_epi16(0x4780 /* 0x47800000 = f2i(65536.0) */); 236 const __m128 offset = _mm_set1_ps(-65537.0); 237 238 LOG_DEBUG_AUDIO_CONVERT("S8", "F32 (using SSE2)"); 239 240 CONVERT_16_REV({ 241 _mm_store_ss(&dst[i], _mm_add_ss(_mm_castsi128_ps(_mm_cvtsi32_si128((Uint8)src[i] ^ 0x47800080u)), offset)); 242 }, { 243 const __m128i bytes = _mm_xor_si128(_mm_loadu_si128((const __m128i *)&src[i]), flipper); 244 245 const __m128i shorts0 = _mm_unpacklo_epi8(bytes, zero); 246 const __m128i shorts1 = _mm_unpackhi_epi8(bytes, zero); 247 248 const __m128 floats0 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts0, caster)), offset); 249 const __m128 floats1 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts0, caster)), offset); 250 const __m128 floats2 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts1, caster)), offset); 251 const __m128 floats3 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts1, caster)), offset); 252 253 _mm_store_ps(&dst[i], floats0); 254 _mm_store_ps(&dst[i + 4], floats1); 255 _mm_store_ps(&dst[i + 8], floats2); 256 _mm_store_ps(&dst[i + 12], floats3); 257 }) 258} 259 260static void SDL_TARGETING("sse2") SDL_Convert_U8_to_F32_SSE2(float *dst, const Uint8 *src, int num_samples) 261{ 262 /* 1) Construct a float in the range [65536.0, 65538.0) 263 * 2) Shift the float range to [-1.0, 1.0) 264 * dst[i] = i2f(src[i] | 0x47800000) - 65537.0 */ 265 const __m128i zero = _mm_setzero_si128(); 266 const __m128i caster = _mm_set1_epi16(0x4780 /* 0x47800000 = f2i(65536.0) */); 267 const __m128 offset = _mm_set1_ps(-65537.0); 268 269 LOG_DEBUG_AUDIO_CONVERT("U8", "F32 (using SSE2)"); 270 271 CONVERT_16_REV({ 272 _mm_store_ss(&dst[i], _mm_add_ss(_mm_castsi128_ps(_mm_cvtsi32_si128((Uint8)src[i] ^ 0x47800000u)), offset)); 273 }, { 274 const __m128i bytes = _mm_loadu_si128((const __m128i *)&src[i]); 275 276 const __m128i shorts0 = _mm_unpacklo_epi8(bytes, zero); 277 const __m128i shorts1 = _mm_unpackhi_epi8(bytes, zero); 278 279 const __m128 floats0 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts0, caster)), offset); 280 const __m128 floats1 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts0, caster)), offset); 281 const __m128 floats2 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts1, caster)), offset); 282 const __m128 floats3 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts1, caster)), offset); 283 284 _mm_store_ps(&dst[i], floats0); 285 _mm_store_ps(&dst[i + 4], floats1); 286 _mm_store_ps(&dst[i + 8], floats2); 287 _mm_store_ps(&dst[i + 12], floats3); 288 }) 289} 290 291static void SDL_TARGETING("sse2") SDL_Convert_S16_to_F32_SSE2(float *dst, const Sint16 *src, int num_samples) 292{ 293 /* 1) Flip the sign bit to convert from S16 to U16 format 294 * 2) Construct a float in the range [256.0, 258.0) 295 * 3) Shift the float range to [-1.0, 1.0) 296 * dst[i] = i2f((src[i] ^ 0x8000) | 0x43800000) - 257.0 */ 297 const __m128i flipper = _mm_set1_epi16(-0x8000); 298 const __m128i caster = _mm_set1_epi16(0x4380 /* 0x43800000 = f2i(256.0) */); 299 const __m128 offset = _mm_set1_ps(-257.0f); 300 301 LOG_DEBUG_AUDIO_CONVERT("S16", "F32 (using SSE2)"); 302 303 CONVERT_16_REV({ 304 _mm_store_ss(&dst[i], _mm_add_ss(_mm_castsi128_ps(_mm_cvtsi32_si128((Uint16)src[i] ^ 0x43808000u)), offset)); 305 }, { 306 const __m128i shorts0 = _mm_xor_si128(_mm_loadu_si128((const __m128i *)&src[i]), flipper); 307 const __m128i shorts1 = _mm_xor_si128(_mm_loadu_si128((const __m128i *)&src[i + 8]), flipper); 308 309 const __m128 floats0 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts0, caster)), offset); 310 const __m128 floats1 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts0, caster)), offset); 311 const __m128 floats2 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts1, caster)), offset); 312 const __m128 floats3 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts1, caster)), offset); 313 314 _mm_store_ps(&dst[i], floats0); 315 _mm_store_ps(&dst[i + 4], floats1); 316 _mm_store_ps(&dst[i + 8], floats2); 317 _mm_store_ps(&dst[i + 12], floats3); 318 }) 319} 320 321static void SDL_TARGETING("sse2") SDL_Convert_S32_to_F32_SSE2(float *dst, const Sint32 *src, int num_samples) 322{ 323 // dst[i] = f32(src[i]) / f32(0x80000000) 324 const __m128 scaler = _mm_set1_ps(DIVBY2147483648); 325 326 LOG_DEBUG_AUDIO_CONVERT("S32", "F32 (using SSE2)"); 327 328 CONVERT_16_FWD({ 329 _mm_store_ss(&dst[i], _mm_mul_ss(_mm_cvt_si2ss(_mm_setzero_ps(), src[i]), scaler)); 330 }, { 331 const __m128i ints0 = _mm_loadu_si128((const __m128i *)&src[i]); 332 const __m128i ints1 = _mm_loadu_si128((const __m128i *)&src[i + 4]); 333 const __m128i ints2 = _mm_loadu_si128((const __m128i *)&src[i + 8]); 334 const __m128i ints3 = _mm_loadu_si128((const __m128i *)&src[i + 12]); 335 336 const __m128 floats0 = _mm_mul_ps(_mm_cvtepi32_ps(ints0), scaler); 337 const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(ints1), scaler); 338 const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(ints2), scaler); 339 const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(ints3), scaler); 340 341 _mm_store_ps(&dst[i], floats0); 342 _mm_store_ps(&dst[i + 4], floats1); 343 _mm_store_ps(&dst[i + 8], floats2); 344 _mm_store_ps(&dst[i + 12], floats3); 345 }) 346} 347 348static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S8_SSE2(Sint8 *dst, const float *src, int num_samples) 349{ 350 /* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0] 351 * 2) Extract the lowest 16 bits and clamp to [-128, 127] 352 * Overflow is correctly handled for inputs between roughly [-255.0, 255.0] 353 * dst[i] = clamp(i16(f2i(src[i] + 98304.0) & 0xFFFF), -128, 127) */ 354 const __m128 offset = _mm_set1_ps(98304.0f); 355 const __m128i mask = _mm_set1_epi16(0xFF); 356 357 LOG_DEBUG_AUDIO_CONVERT("F32", "S8 (using SSE2)"); 358 359 CONVERT_16_FWD({ 360 const __m128i ints = _mm_castps_si128(_mm_add_ss(_mm_load_ss(&src[i]), offset)); 361 dst[i] = (Sint8)(_mm_cvtsi128_si32(_mm_packs_epi16(ints, ints)) & 0xFF); 362 }, { 363 const __m128 floats0 = _mm_loadu_ps(&src[i]); 364 const __m128 floats1 = _mm_loadu_ps(&src[i + 4]); 365 const __m128 floats2 = _mm_loadu_ps(&src[i + 8]); 366 const __m128 floats3 = _mm_loadu_ps(&src[i + 12]); 367 368 const __m128i ints0 = _mm_castps_si128(_mm_add_ps(floats0, offset)); 369 const __m128i ints1 = _mm_castps_si128(_mm_add_ps(floats1, offset)); 370 const __m128i ints2 = _mm_castps_si128(_mm_add_ps(floats2, offset)); 371 const __m128i ints3 = _mm_castps_si128(_mm_add_ps(floats3, offset)); 372 373 const __m128i shorts0 = _mm_and_si128(_mm_packs_epi16(ints0, ints1), mask); 374 const __m128i shorts1 = _mm_and_si128(_mm_packs_epi16(ints2, ints3), mask); 375 376 const __m128i bytes = _mm_packus_epi16(shorts0, shorts1); 377 378 _mm_store_si128((__m128i *)&dst[i], bytes); 379 }) 380} 381 382static void SDL_TARGETING("sse2") SDL_Convert_F32_to_U8_SSE2(Uint8 *dst, const float *src, int num_samples) 383{ 384 /* 1) Shift the float range from [-1.0, 1.0] to [98304.0, 98306.0] 385 * 2) Extract the lowest 16 bits and clamp to [0, 255] 386 * Overflow is correctly handled for inputs between roughly [-254.0, 254.0] 387 * dst[i] = clamp(i16(f2i(src[i] + 98305.0) & 0xFFFF), 0, 255) */ 388 const __m128 offset = _mm_set1_ps(98305.0f); 389 const __m128i mask = _mm_set1_epi16(0xFF); 390 391 LOG_DEBUG_AUDIO_CONVERT("F32", "U8 (using SSE2)"); 392 393 CONVERT_16_FWD({ 394 const __m128i ints = _mm_castps_si128(_mm_add_ss(_mm_load_ss(&src[i]), offset)); 395 dst[i] = (Uint8)(_mm_cvtsi128_si32(_mm_packus_epi16(ints, ints)) & 0xFF); 396 }, { 397 const __m128 floats0 = _mm_loadu_ps(&src[i]); 398 const __m128 floats1 = _mm_loadu_ps(&src[i + 4]); 399 const __m128 floats2 = _mm_loadu_ps(&src[i + 8]); 400 const __m128 floats3 = _mm_loadu_ps(&src[i + 12]); 401 402 const __m128i ints0 = _mm_castps_si128(_mm_add_ps(floats0, offset)); 403 const __m128i ints1 = _mm_castps_si128(_mm_add_ps(floats1, offset)); 404 const __m128i ints2 = _mm_castps_si128(_mm_add_ps(floats2, offset)); 405 const __m128i ints3 = _mm_castps_si128(_mm_add_ps(floats3, offset)); 406 407 const __m128i shorts0 = _mm_and_si128(_mm_packus_epi16(ints0, ints1), mask); 408 const __m128i shorts1 = _mm_and_si128(_mm_packus_epi16(ints2, ints3), mask); 409 410 const __m128i bytes = _mm_packus_epi16(shorts0, shorts1); 411 412 _mm_store_si128((__m128i *)&dst[i], bytes); 413 }) 414} 415 416static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S16_SSE2(Sint16 *dst, const float *src, int num_samples) 417{ 418 /* 1) Shift the float range from [-1.0, 1.0] to [256.0, 258.0] 419 * 2) Shift the int range from [0x43800000, 0x43810000] to [-32768,32768] 420 * 3) Clamp to range [-32768,32767] 421 * Overflow is correctly handled for inputs between roughly [-257.0, +inf) 422 * dst[i] = clamp(f2i(src[i] + 257.0) - 0x43808000, -32768, 32767) */ 423 const __m128 offset = _mm_set1_ps(257.0f); 424 425 LOG_DEBUG_AUDIO_CONVERT("F32", "S16 (using SSE2)"); 426 427 CONVERT_16_FWD({ 428 const __m128i ints = _mm_sub_epi32(_mm_castps_si128(_mm_add_ss(_mm_load_ss(&src[i]), offset)), _mm_castps_si128(offset)); 429 dst[i] = (Sint16)(_mm_cvtsi128_si32(_mm_packs_epi32(ints, ints)) & 0xFFFF); 430 }, { 431 const __m128 floats0 = _mm_loadu_ps(&src[i]); 432 const __m128 floats1 = _mm_loadu_ps(&src[i + 4]); 433 const __m128 floats2 = _mm_loadu_ps(&src[i + 8]); 434 const __m128 floats3 = _mm_loadu_ps(&src[i + 12]); 435 436 const __m128i ints0 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats0, offset)), _mm_castps_si128(offset)); 437 const __m128i ints1 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats1, offset)), _mm_castps_si128(offset)); 438 const __m128i ints2 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats2, offset)), _mm_castps_si128(offset)); 439 const __m128i ints3 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats3, offset)), _mm_castps_si128(offset)); 440 441 const __m128i shorts0 = _mm_packs_epi32(ints0, ints1); 442 const __m128i shorts1 = _mm_packs_epi32(ints2, ints3); 443 444 _mm_store_si128((__m128i *)&dst[i], shorts0); 445 _mm_store_si128((__m128i *)&dst[i + 8], shorts1); 446 }) 447} 448 449static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S32_SSE2(Sint32 *dst, const float *src, int num_samples) 450{ 451 /* 1) Scale the float range from [-1.0, 1.0] to [-2147483648.0, 2147483648.0] 452 * 2) Convert to integer (values too small/large become 0x80000000 = -2147483648) 453 * 3) Fixup values which were too large (0x80000000 ^ 0xFFFFFFFF = 2147483647) 454 * dst[i] = i32(src[i] * 2147483648.0) ^ ((src[i] >= 2147483648.0) ? 0xFFFFFFFF : 0x00000000) */ 455 const __m128 limit = _mm_set1_ps(2147483648.0f); 456 457 LOG_DEBUG_AUDIO_CONVERT("F32", "S32 (using SSE2)"); 458 459 CONVERT_16_FWD({ 460 const __m128 floats = _mm_load_ss(&src[i]); 461 const __m128 values = _mm_mul_ss(floats, limit); 462 const __m128i ints = _mm_xor_si128(_mm_cvttps_epi32(values), _mm_castps_si128(_mm_cmpge_ss(values, limit))); 463 dst[i] = (Sint32)_mm_cvtsi128_si32(ints); 464 }, { 465 const __m128 floats0 = _mm_loadu_ps(&src[i]); 466 const __m128 floats1 = _mm_loadu_ps(&src[i + 4]); 467 const __m128 floats2 = _mm_loadu_ps(&src[i + 8]); 468 const __m128 floats3 = _mm_loadu_ps(&src[i + 12]); 469 470 const __m128 values1 = _mm_mul_ps(floats0, limit); 471 const __m128 values2 = _mm_mul_ps(floats1, limit); 472 const __m128 values3 = _mm_mul_ps(floats2, limit); 473 const __m128 values4 = _mm_mul_ps(floats3, limit); 474 475 const __m128i ints0 = _mm_xor_si128(_mm_cvttps_epi32(values1), _mm_castps_si128(_mm_cmpge_ps(values1, limit))); 476 const __m128i ints1 = _mm_xor_si128(_mm_cvttps_epi32(values2), _mm_castps_si128(_mm_cmpge_ps(values2, limit))); 477 const __m128i ints2 = _mm_xor_si128(_mm_cvttps_epi32(values3), _mm_castps_si128(_mm_cmpge_ps(values3, limit))); 478 const __m128i ints3 = _mm_xor_si128(_mm_cvttps_epi32(values4), _mm_castps_si128(_mm_cmpge_ps(values4, limit))); 479 480 _mm_store_si128((__m128i *)&dst[i], ints0); 481 _mm_store_si128((__m128i *)&dst[i + 4], ints1); 482 _mm_store_si128((__m128i *)&dst[i + 8], ints2); 483 _mm_store_si128((__m128i *)&dst[i + 12], ints3); 484 }) 485} 486#endif 487 488// FIXME: SDL doesn't have SSSE3 detection, so use the next one up 489#ifdef SDL_SSE4_1_INTRINSICS 490static void SDL_TARGETING("ssse3") SDL_Convert_Swap16_SSSE3(Uint16 *dst, const Uint16 *src, int num_samples) 491{ 492 const __m128i shuffle = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); 493 494 CONVERT_16_FWD({ 495 dst[i] = SDL_Swap16(src[i]); 496 }, { 497 __m128i ints0 = _mm_loadu_si128((const __m128i *)&src[i]); 498 __m128i ints1 = _mm_loadu_si128((const __m128i *)&src[i + 8]); 499 500 ints0 = _mm_shuffle_epi8(ints0, shuffle); 501 ints1 = _mm_shuffle_epi8(ints1, shuffle); 502 503 _mm_store_si128((__m128i *)&dst[i], ints0); 504 _mm_store_si128((__m128i *)&dst[i + 8], ints1); 505 }) 506} 507 508static void SDL_TARGETING("ssse3") SDL_Convert_Swap32_SSSE3(Uint32 *dst, const Uint32 *src, int num_samples) 509{ 510 const __m128i shuffle = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); 511 512 CONVERT_16_FWD({ 513 dst[i] = SDL_Swap32(src[i]); 514 }, { 515 __m128i ints0 = _mm_loadu_si128((const __m128i *)&src[i]); 516 __m128i ints1 = _mm_loadu_si128((const __m128i *)&src[i + 4]); 517 __m128i ints2 = _mm_loadu_si128((const __m128i *)&src[i + 8]); 518 __m128i ints3 = _mm_loadu_si128((const __m128i *)&src[i + 12]); 519 520 ints0 = _mm_shuffle_epi8(ints0, shuffle); 521 ints1 = _mm_shuffle_epi8(ints1, shuffle); 522 ints2 = _mm_shuffle_epi8(ints2, shuffle); 523 ints3 = _mm_shuffle_epi8(ints3, shuffle); 524 525 _mm_store_si128((__m128i *)&dst[i], ints0); 526 _mm_store_si128((__m128i *)&dst[i + 4], ints1); 527 _mm_store_si128((__m128i *)&dst[i + 8], ints2); 528 _mm_store_si128((__m128i *)&dst[i + 12], ints3); 529 }) 530} 531#endif 532 533#ifdef SDL_NEON_INTRINSICS 534 535// C99 requires that all code modifying floating point environment should 536// be guarded by the STDC FENV_ACCESS pragma; otherwise, it's undefined 537// behavior. However, the compiler support for this pragma is bad. 538#if defined(__clang__) 539#if __clang_major__ >= 12 540#if defined(__aarch64__) 541#pragma STDC FENV_ACCESS ON 542#endif 543#endif 544#elif defined(_MSC_VER) 545#pragma fenv_access (on) 546#elif defined(__GNUC__) 547// GCC does not support the pragma at all 548#else 549#pragma STDC FENV_ACCESS ON 550#endif 551 552static void SDL_Convert_S8_to_F32_NEON(float *dst, const Sint8 *src, int num_samples) 553{ 554 LOG_DEBUG_AUDIO_CONVERT("S8", "F32 (using NEON)"); 555 fenv_t fenv; 556 feholdexcept(&fenv); 557 558 CONVERT_16_REV({ 559 vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vdup_n_s32(src[i]), 7), 0); 560 }, { 561 int8x16_t bytes = vld1q_s8(&src[i]); 562 563 int16x8_t shorts0 = vmovl_s8(vget_low_s8(bytes)); 564 int16x8_t shorts1 = vmovl_s8(vget_high_s8(bytes)); 565 566 float32x4_t floats0 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts0)), 7); 567 float32x4_t floats1 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts0)), 7); 568 float32x4_t floats2 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts1)), 7); 569 float32x4_t floats3 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts1)), 7); 570 571 vst1q_f32(&dst[i], floats0); 572 vst1q_f32(&dst[i + 4], floats1); 573 vst1q_f32(&dst[i + 8], floats2); 574 vst1q_f32(&dst[i + 12], floats3); 575 }) 576 fesetenv(&fenv); 577} 578 579static void SDL_Convert_U8_to_F32_NEON(float *dst, const Uint8 *src, int num_samples) 580{ 581 LOG_DEBUG_AUDIO_CONVERT("U8", "F32 (using NEON)"); 582 fenv_t fenv; 583 feholdexcept(&fenv); 584 585 uint8x16_t flipper = vdupq_n_u8(0x80); 586 587 CONVERT_16_REV({ 588 vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vdup_n_s32((Sint8)(src[i] ^ 0x80)), 7), 0); 589 }, { 590 int8x16_t bytes = vreinterpretq_s8_u8(veorq_u8(vld1q_u8(&src[i]), flipper)); 591 592 int16x8_t shorts0 = vmovl_s8(vget_low_s8(bytes)); 593 int16x8_t shorts1 = vmovl_s8(vget_high_s8(bytes)); 594 595 float32x4_t floats0 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts0)), 7); 596 float32x4_t floats1 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts0)), 7); 597 float32x4_t floats2 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts1)), 7); 598 float32x4_t floats3 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts1)), 7); 599 600 vst1q_f32(&dst[i], floats0); 601 vst1q_f32(&dst[i + 4], floats1); 602 vst1q_f32(&dst[i + 8], floats2); 603 vst1q_f32(&dst[i + 12], floats3); 604 }) 605 fesetenv(&fenv); 606} 607 608static void SDL_Convert_S16_to_F32_NEON(float *dst, const Sint16 *src, int num_samples) 609{ 610 LOG_DEBUG_AUDIO_CONVERT("S16", "F32 (using NEON)"); 611 fenv_t fenv; 612 feholdexcept(&fenv); 613 614 CONVERT_16_REV({ 615 vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vdup_n_s32(src[i]), 15), 0); 616 }, { 617 int16x8_t shorts0 = vld1q_s16(&src[i]); 618 int16x8_t shorts1 = vld1q_s16(&src[i + 8]); 619 620 float32x4_t floats0 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts0)), 15); 621 float32x4_t floats1 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts0)), 15); 622 float32x4_t floats2 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts1)), 15); 623 float32x4_t floats3 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts1)), 15); 624 625 vst1q_f32(&dst[i], floats0); 626 vst1q_f32(&dst[i + 4], floats1); 627 vst1q_f32(&dst[i + 8], floats2); 628 vst1q_f32(&dst[i + 12], floats3); 629 }) 630 fesetenv(&fenv); 631} 632 633static void SDL_Convert_S32_to_F32_NEON(float *dst, const Sint32 *src, int num_samples) 634{ 635 LOG_DEBUG_AUDIO_CONVERT("S32", "F32 (using NEON)"); 636 fenv_t fenv; 637 feholdexcept(&fenv); 638 639 CONVERT_16_FWD({ 640 vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vld1_dup_s32(&src[i]), 31), 0); 641 }, { 642 int32x4_t ints0 = vld1q_s32(&src[i]); 643 int32x4_t ints1 = vld1q_s32(&src[i + 4]); 644 int32x4_t ints2 = vld1q_s32(&src[i + 8]); 645 int32x4_t ints3 = vld1q_s32(&src[i + 12]); 646 647 float32x4_t floats0 = vcvtq_n_f32_s32(ints0, 31); 648 float32x4_t floats1 = vcvtq_n_f32_s32(ints1, 31); 649 float32x4_t floats2 = vcvtq_n_f32_s32(ints2, 31); 650 float32x4_t floats3 = vcvtq_n_f32_s32(ints3, 31); 651 652 vst1q_f32(&dst[i], floats0); 653 vst1q_f32(&dst[i + 4], floats1); 654 vst1q_f32(&dst[i + 8], floats2); 655 vst1q_f32(&dst[i + 12], floats3); 656 }) 657 fesetenv(&fenv); 658} 659 660static void SDL_Convert_F32_to_S8_NEON(Sint8 *dst, const float *src, int num_samples) 661{ 662 LOG_DEBUG_AUDIO_CONVERT("F32", "S8 (using NEON)"); 663 fenv_t fenv; 664 feholdexcept(&fenv); 665 666 CONVERT_16_FWD({ 667 vst1_lane_s8(&dst[i], vreinterpret_s8_s32(vcvt_n_s32_f32(vld1_dup_f32(&src[i]), 31)), 3); 668 }, { 669 float32x4_t floats0 = vld1q_f32(&src[i]); 670 float32x4_t floats1 = vld1q_f32(&src[i + 4]); 671 float32x4_t floats2 = vld1q_f32(&src[i + 8]); 672 float32x4_t floats3 = vld1q_f32(&src[i + 12]); 673 674 int32x4_t ints0 = vcvtq_n_s32_f32(floats0, 31); 675 int32x4_t ints1 = vcvtq_n_s32_f32(floats1, 31); 676 int32x4_t ints2 = vcvtq_n_s32_f32(floats2, 31); 677 int32x4_t ints3 = vcvtq_n_s32_f32(floats3, 31); 678 679 int16x8_t shorts0 = vcombine_s16(vshrn_n_s32(ints0, 16), vshrn_n_s32(ints1, 16)); 680 int16x8_t shorts1 = vcombine_s16(vshrn_n_s32(ints2, 16), vshrn_n_s32(ints3, 16)); 681 682 int8x16_t bytes = vcombine_s8(vshrn_n_s16(shorts0, 8), vshrn_n_s16(shorts1, 8)); 683 684 vst1q_s8(&dst[i], bytes); 685 }) 686 fesetenv(&fenv); 687} 688 689static void SDL_Convert_F32_to_U8_NEON(Uint8 *dst, const float *src, int num_samples) 690{ 691 LOG_DEBUG_AUDIO_CONVERT("F32", "U8 (using NEON)"); 692 fenv_t fenv; 693 feholdexcept(&fenv); 694 695 uint8x16_t flipper = vdupq_n_u8(0x80); 696 697 CONVERT_16_FWD({ 698 vst1_lane_u8(&dst[i], 699 veor_u8(vreinterpret_u8_s32(vcvt_n_s32_f32(vld1_dup_f32(&src[i]), 31)), 700 vget_low_u8(flipper)), 3); 701 }, { 702 float32x4_t floats0 = vld1q_f32(&src[i]); 703 float32x4_t floats1 = vld1q_f32(&src[i + 4]); 704 float32x4_t floats2 = vld1q_f32(&src[i + 8]); 705 float32x4_t floats3 = vld1q_f32(&src[i + 12]); 706 707 int32x4_t ints0 = vcvtq_n_s32_f32(floats0, 31); 708 int32x4_t ints1 = vcvtq_n_s32_f32(floats1, 31); 709 int32x4_t ints2 = vcvtq_n_s32_f32(floats2, 31); 710 int32x4_t ints3 = vcvtq_n_s32_f32(floats3, 31); 711 712 int16x8_t shorts0 = vcombine_s16(vshrn_n_s32(ints0, 16), vshrn_n_s32(ints1, 16)); 713 int16x8_t shorts1 = vcombine_s16(vshrn_n_s32(ints2, 16), vshrn_n_s32(ints3, 16)); 714 715 uint8x16_t bytes = veorq_u8(vreinterpretq_u8_s8( 716 vcombine_s8(vshrn_n_s16(shorts0, 8), vshrn_n_s16(shorts1, 8))), 717 flipper); 718 719 vst1q_u8(&dst[i], bytes); 720 }) 721 fesetenv(&fenv); 722} 723 724static void SDL_Convert_F32_to_S16_NEON(Sint16 *dst, const float *src, int num_samples) 725{ 726 LOG_DEBUG_AUDIO_CONVERT("F32", "S16 (using NEON)"); 727 fenv_t fenv; 728 feholdexcept(&fenv); 729 730 CONVERT_16_FWD({ 731 vst1_lane_s16(&dst[i], vreinterpret_s16_s32(vcvt_n_s32_f32(vld1_dup_f32(&src[i]), 31)), 1); 732 }, { 733 float32x4_t floats0 = vld1q_f32(&src[i]); 734 float32x4_t floats1 = vld1q_f32(&src[i + 4]); 735 float32x4_t floats2 = vld1q_f32(&src[i + 8]); 736 float32x4_t floats3 = vld1q_f32(&src[i + 12]); 737 738 int32x4_t ints0 = vcvtq_n_s32_f32(floats0, 31); 739 int32x4_t ints1 = vcvtq_n_s32_f32(floats1, 31); 740 int32x4_t ints2 = vcvtq_n_s32_f32(floats2, 31); 741 int32x4_t ints3 = vcvtq_n_s32_f32(floats3, 31); 742 743 int16x8_t shorts0 = vcombine_s16(vshrn_n_s32(ints0, 16), vshrn_n_s32(ints1, 16)); 744 int16x8_t shorts1 = vcombine_s16(vshrn_n_s32(ints2, 16), vshrn_n_s32(ints3, 16)); 745 746 vst1q_s16(&dst[i], shorts0); 747 vst1q_s16(&dst[i + 8], shorts1); 748 }) 749 fesetenv(&fenv); 750} 751 752static void SDL_Convert_F32_to_S32_NEON(Sint32 *dst, const float *src, int num_samples) 753{ 754 LOG_DEBUG_AUDIO_CONVERT("F32", "S32 (using NEON)"); 755 fenv_t fenv; 756 feholdexcept(&fenv); 757 758 CONVERT_16_FWD({ 759 vst1_lane_s32(&dst[i], vcvt_n_s32_f32(vld1_dup_f32(&src[i]), 31), 0); 760 }, { 761 float32x4_t floats0 = vld1q_f32(&src[i]); 762 float32x4_t floats1 = vld1q_f32(&src[i + 4]); 763 float32x4_t floats2 = vld1q_f32(&src[i + 8]); 764 float32x4_t floats3 = vld1q_f32(&src[i + 12]); 765 766 int32x4_t ints0 = vcvtq_n_s32_f32(floats0, 31); 767 int32x4_t ints1 = vcvtq_n_s32_f32(floats1, 31); 768 int32x4_t ints2 = vcvtq_n_s32_f32(floats2, 31); 769 int32x4_t ints3 = vcvtq_n_s32_f32(floats3, 31); 770 771 vst1q_s32(&dst[i], ints0); 772 vst1q_s32(&dst[i + 4], ints1); 773 vst1q_s32(&dst[i + 8], ints2); 774 vst1q_s32(&dst[i + 12], ints3); 775 }) 776 fesetenv(&fenv); 777} 778 779static void SDL_Convert_Swap16_NEON(Uint16 *dst, const Uint16 *src, int num_samples) 780{ 781 CONVERT_16_FWD({ 782 dst[i] = SDL_Swap16(src[i]); 783 }, { 784 uint8x16_t ints0 = vld1q_u8((const Uint8 *)&src[i]); 785 uint8x16_t ints1 = vld1q_u8((const Uint8 *)&src[i + 8]); 786 787 ints0 = vrev16q_u8(ints0); 788 ints1 = vrev16q_u8(ints1); 789 790 vst1q_u8((Uint8 *)&dst[i], ints0); 791 vst1q_u8((Uint8 *)&dst[i + 8], ints1); 792 }) 793} 794 795static void SDL_Convert_Swap32_NEON(Uint32 *dst, const Uint32 *src, int num_samples) 796{ 797 CONVERT_16_FWD({ 798 dst[i] = SDL_Swap32(src[i]); 799 }, { 800 uint8x16_t ints0 = vld1q_u8((const Uint8 *)&src[i]); 801 uint8x16_t ints1 = vld1q_u8((const Uint8 *)&src[i + 4]); 802 uint8x16_t ints2 = vld1q_u8((const Uint8 *)&src[i + 8]); 803 uint8x16_t ints3 = vld1q_u8((const Uint8 *)&src[i + 12]); 804 805 ints0 = vrev32q_u8(ints0); 806 ints1 = vrev32q_u8(ints1); 807 ints2 = vrev32q_u8(ints2); 808 ints3 = vrev32q_u8(ints3); 809 810 vst1q_u8((Uint8 *)&dst[i], ints0); 811 vst1q_u8((Uint8 *)&dst[i + 4], ints1); 812 vst1q_u8((Uint8 *)&dst[i + 8], ints2); 813 vst1q_u8((Uint8 *)&dst[i + 12], ints3); 814 }) 815} 816 817#if defined(__clang__) 818#if __clang_major__ >= 12 819#if defined(__aarch64__) 820#pragma STDC FENV_ACCESS DEFAULT 821#endif 822#endif 823#elif defined(_MSC_VER) 824#pragma fenv_access (off) 825#elif defined(__GNUC__) 826// 827#else 828#pragma STDC FENV_ACCESS DEFAULT 829#endif 830 831#endif 832 833#undef CONVERT_16_FWD 834#undef CONVERT_16_REV 835 836// Function pointers set to a CPU-specific implementation. 837static void (*SDL_Convert_S8_to_F32)(float *dst, const Sint8 *src, int num_samples) = NULL; 838static void (*SDL_Convert_U8_to_F32)(float *dst, const Uint8 *src, int num_samples) = NULL; 839static void (*SDL_Convert_S16_to_F32)(float *dst, const Sint16 *src, int num_samples) = NULL; 840static void (*SDL_Convert_S32_to_F32)(float *dst, const Sint32 *src, int num_samples) = NULL; 841static void (*SDL_Convert_F32_to_S8)(Sint8 *dst, const float *src, int num_samples) = NULL; 842static void (*SDL_Convert_F32_to_U8)(Uint8 *dst, const float *src, int num_samples) = NULL; 843static void (*SDL_Convert_F32_to_S16)(Sint16 *dst, const float *src, int num_samples) = NULL; 844static void (*SDL_Convert_F32_to_S32)(Sint32 *dst, const float *src, int num_samples) = NULL; 845 846static void (*SDL_Convert_Swap16)(Uint16 *dst, const Uint16 *src, int num_samples) = NULL; 847static void (*SDL_Convert_Swap32)(Uint32 *dst, const Uint32 *src, int num_samples) = NULL; 848 849void ConvertAudioToFloat(float *dst, const void *src, int num_samples, SDL_AudioFormat src_fmt) 850{ 851 switch (src_fmt) { 852 case SDL_AUDIO_S8: 853 SDL_Convert_S8_to_F32(dst, (const Sint8 *) src, num_samples); 854 break; 855 856 case SDL_AUDIO_U8: 857 SDL_Convert_U8_to_F32(dst, (const Uint8 *) src, num_samples); 858 break; 859 860 case SDL_AUDIO_S16: 861 SDL_Convert_S16_to_F32(dst, (const Sint16 *) src, num_samples); 862 break; 863 864 case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN: 865 SDL_Convert_Swap16((Uint16 *)dst, (const Uint16 *)src, num_samples); 866 SDL_Convert_S16_to_F32(dst, (const Sint16 *) dst, num_samples); 867 break; 868 869 case SDL_AUDIO_S32: 870 SDL_Convert_S32_to_F32(dst, (const Sint32 *) src, num_samples); 871 break; 872 873 case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN: 874 SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)src, num_samples); 875 SDL_Convert_S32_to_F32(dst, (const Sint32 *) dst, num_samples); 876 break; 877 878 case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN: 879 SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)src, num_samples); 880 break; 881 882 default: SDL_assert(!"Unexpected audio format!"); break; 883 } 884} 885 886void ConvertAudioFromFloat(void *dst, const float *src, int num_samples, SDL_AudioFormat dst_fmt) 887{ 888 switch (dst_fmt) { 889 case SDL_AUDIO_S8: 890 SDL_Convert_F32_to_S8((Sint8 *) dst, src, num_samples); 891 break; 892 893 case SDL_AUDIO_U8: 894 SDL_Convert_F32_to_U8((Uint8 *) dst, src, num_samples); 895 break; 896 897 case SDL_AUDIO_S16: 898 SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples); 899 break; 900 901 case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN: 902 SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples); 903 SDL_Convert_Swap16((Uint16 *)dst, (const Uint16 *)dst, num_samples); 904 break; 905 906 case SDL_AUDIO_S32: 907 SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples); 908 break; 909 910 case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN: 911 SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples); 912 SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)dst, num_samples); 913 break; 914 915 case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN: 916 SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)src, num_samples); 917 break; 918 919 default: SDL_assert(!"Unexpected audio format!"); break; 920 } 921} 922 923void ConvertAudioSwapEndian(void *dst, const void *src, int num_samples, int bitsize) 924{ 925 switch (bitsize) { 926 case 16: SDL_Convert_Swap16((Uint16 *)dst, (const Uint16 *)src, num_samples); break; 927 case 32: SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)src, num_samples); break; 928 default: SDL_assert(!"Unexpected audio format!"); break; 929 } 930} 931 932void SDL_ChooseAudioConverters(void) 933{ 934 static bool converters_chosen = false; 935 if (converters_chosen) { 936 return; 937 } 938 939#define SET_CONVERTER_FUNCS(fntype) \ 940 SDL_Convert_Swap16 = SDL_Convert_Swap16_##fntype; \ 941 SDL_Convert_Swap32 = SDL_Convert_Swap32_##fntype; 942 943#ifdef SDL_SSE4_1_INTRINSICS 944 if (SDL_HasSSE41()) { 945 SET_CONVERTER_FUNCS(SSSE3); 946 } else 947#endif 948#ifdef SDL_NEON_INTRINSICS 949 if (SDL_HasNEON()) { 950 SET_CONVERTER_FUNCS(NEON); 951 } else 952#endif 953 { 954 SET_CONVERTER_FUNCS(Scalar); 955 } 956 957#undef SET_CONVERTER_FUNCS 958 959#define SET_CONVERTER_FUNCS(fntype) \ 960 SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \ 961 SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \ 962 SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \ 963 SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \ 964 SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \ 965 SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \ 966 SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \ 967 SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \ 968 969#ifdef SDL_SSE2_INTRINSICS 970 if (SDL_HasSSE2()) { 971 SET_CONVERTER_FUNCS(SSE2); 972 } else 973#endif 974#ifdef SDL_NEON_INTRINSICS 975 if (SDL_HasNEON()) { 976 SET_CONVERTER_FUNCS(NEON); 977 } else 978#endif 979 { 980 SET_CONVERTER_FUNCS(Scalar); 981 } 982 983#undef SET_CONVERTER_FUNCS 984 985 converters_chosen = true; 986} 987
[FILE END]
(C) 2025 0x4248 (C) 2025 4248 Media and 4248 Systems, All part of 0x4248 See LICENCE files for more information. Not all files are by 0x4248 always check Licencing.