Atlas - SDL_audiotypecvt.c

Atlas - SDL_audiotypecvt.c
Home / ext / SDL / src / audio
Lines: 1 | Size: 37260 bytes
[Download] [Show on GitHub] [Search similar files] [Raw] [Raw (proxy)] 
[FILE BEGIN]
1/*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2025 Sam Lantinga <[email protected]>
4
5  This software is provided 'as-is', without any express or implied
6  warranty.  In no event will the authors be held liable for any damages
7  arising from the use of this software.
8
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12
13  1. The origin of this software must not be misrepresented; you must not
14     claim that you wrote the original software. If you use this software
15     in a product, an acknowledgment in the product documentation would be
16     appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18     misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20*/
21#include "SDL_internal.h"
22
23#include "SDL_sysaudio.h"
24
25#ifdef SDL_NEON_INTRINSICS
26#include <fenv.h>
27#endif
28
29#define DIVBY2147483648 0.0000000004656612873077392578125f // 0x1p-31f
30
31// start fallback scalar converters
32
33// This code requires that floats are in the IEEE-754 binary32 format
34SDL_COMPILE_TIME_ASSERT(float_bits, sizeof(float) == sizeof(Uint32));
35
36union float_bits {
37    Uint32 u32;
38    float f32;
39};
40
41static void SDL_Convert_S8_to_F32_Scalar(float *dst, const Sint8 *src, int num_samples)
42{
43    int i;
44
45    LOG_DEBUG_AUDIO_CONVERT("S8", "F32");
46
47    for (i = num_samples - 1; i >= 0; --i) {
48        /* 1) Construct a float in the range [65536.0, 65538.0)
49         * 2) Shift the float range to [-1.0, 1.0) */
50        union float_bits x;
51        x.u32 = (Uint8)src[i] ^ 0x47800080u;
52        dst[i] = x.f32 - 65537.0f;
53    }
54}
55
56static void SDL_Convert_U8_to_F32_Scalar(float *dst, const Uint8 *src, int num_samples)
57{
58    int i;
59
60    LOG_DEBUG_AUDIO_CONVERT("U8", "F32");
61
62    for (i = num_samples - 1; i >= 0; --i) {
63        /* 1) Construct a float in the range [65536.0, 65538.0)
64         * 2) Shift the float range to [-1.0, 1.0) */
65        union float_bits x;
66        x.u32 = src[i] ^ 0x47800000u;
67        dst[i] = x.f32 - 65537.0f;
68    }
69}
70
71static void SDL_Convert_S16_to_F32_Scalar(float *dst, const Sint16 *src, int num_samples)
72{
73    int i;
74
75    LOG_DEBUG_AUDIO_CONVERT("S16", "F32");
76
77    for (i = num_samples - 1; i >= 0; --i) {
78        /* 1) Construct a float in the range [256.0, 258.0)
79         * 2) Shift the float range to [-1.0, 1.0) */
80        union float_bits x;
81        x.u32 = (Uint16)src[i] ^ 0x43808000u;
82        dst[i] = x.f32 - 257.0f;
83    }
84}
85
86static void SDL_Convert_S32_to_F32_Scalar(float *dst, const Sint32 *src, int num_samples)
87{
88    int i;
89
90    LOG_DEBUG_AUDIO_CONVERT("S32", "F32");
91
92    for (i = num_samples - 1; i >= 0; --i) {
93        dst[i] = (float)src[i] * DIVBY2147483648;
94    }
95}
96
97// Create a bit-mask based on the sign-bit. Should optimize to a single arithmetic-shift-right
98#define SIGNMASK(x) (Uint32)(0u - ((Uint32)(x) >> 31))
99
100static void SDL_Convert_F32_to_S8_Scalar(Sint8 *dst, const float *src, int num_samples)
101{
102    int i;
103
104    LOG_DEBUG_AUDIO_CONVERT("F32", "S8");
105
106    for (i = 0; i < num_samples; ++i) {
107        /* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0]
108         * 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128]
109         * 3) Clamp the value to [-128, 127] */
110        union float_bits x;
111        x.f32 = src[i] + 98304.0f;
112
113        Uint32 y = x.u32 - 0x47C00000u;
114        Uint32 z = 0x7Fu - (y ^ SIGNMASK(y));
115        y = y ^ (z & SIGNMASK(z));
116
117        dst[i] = (Sint8)(y & 0xFF);
118    }
119}
120
121static void SDL_Convert_F32_to_U8_Scalar(Uint8 *dst, const float *src, int num_samples)
122{
123    int i;
124
125    LOG_DEBUG_AUDIO_CONVERT("F32", "U8");
126
127    for (i = 0; i < num_samples; ++i) {
128        /* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0]
129         * 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128]
130         * 3) Clamp the value to [-128, 127]
131         * 4) Shift the integer range from [-128, 127] to [0, 255] */
132        union float_bits x;
133        x.f32 = src[i] + 98304.0f;
134
135        Uint32 y = x.u32 - 0x47C00000u;
136        Uint32 z = 0x7Fu - (y ^ SIGNMASK(y));
137        y = (y ^ 0x80u) ^ (z & SIGNMASK(z));
138
139        dst[i] = (Uint8)(y & 0xFF);
140    }
141}
142
143static void SDL_Convert_F32_to_S16_Scalar(Sint16 *dst, const float *src, int num_samples)
144{
145    int i;
146
147    LOG_DEBUG_AUDIO_CONVERT("F32", "S16");
148
149    for (i = 0; i < num_samples; ++i) {
150        /* 1) Shift the float range from [-1.0, 1.0] to [383.0, 385.0]
151         * 2) Shift the integer range from [0x43BF8000, 0x43C08000] to [-32768, 32768]
152         * 3) Clamp values outside the [-32768, 32767] range */
153        union float_bits x;
154        x.f32 = src[i] + 384.0f;
155
156        Uint32 y = x.u32 - 0x43C00000u;
157        Uint32 z = 0x7FFFu - (y ^ SIGNMASK(y));
158        y = y ^ (z & SIGNMASK(z));
159
160        dst[i] = (Sint16)(y & 0xFFFF);
161    }
162}
163
164static void SDL_Convert_F32_to_S32_Scalar(Sint32 *dst, const float *src, int num_samples)
165{
166    int i;
167
168    LOG_DEBUG_AUDIO_CONVERT("F32", "S32");
169
170    for (i = 0; i < num_samples; ++i) {
171        /* 1) Shift the float range from [-1.0, 1.0] to [-2147483648.0, 2147483648.0]
172         * 2) Set values outside the [-2147483648.0, 2147483647.0] range to -2147483648.0
173         * 3) Convert the float to an integer, and fixup values outside the valid range */
174        union float_bits x;
175        x.f32 = src[i];
176
177        Uint32 y = x.u32 + 0x0F800000u;
178        Uint32 z = y - 0xCF000000u;
179        z &= SIGNMASK(y ^ z);
180        x.u32 = y - z;
181
182        dst[i] = (Sint32)x.f32 ^ (Sint32)SIGNMASK(z);
183    }
184}
185
186#undef SIGNMASK
187
188static void SDL_Convert_Swap16_Scalar(Uint16 *dst, const Uint16 *src, int num_samples)
189{
190    int i;
191
192    for (i = 0; i < num_samples; ++i) {
193        dst[i] = SDL_Swap16(src[i]);
194    }
195}
196
197static void SDL_Convert_Swap32_Scalar(Uint32 *dst, const Uint32 *src, int num_samples)
198{
199    int i;
200
201    for (i = 0; i < num_samples; ++i) {
202        dst[i] = SDL_Swap32(src[i]);
203    }
204}
205
206// end fallback scalar converters
207
208// Convert forwards, when sizeof(*src) >= sizeof(*dst)
209#define CONVERT_16_FWD(CVT1, CVT16)                          \
210    int i = 0;                                               \
211    if (num_samples >= 16) {                                 \
212        while ((uintptr_t)(&dst[i]) & 15) { CVT1  ++i;     } \
213        while ((i + 16) <= num_samples)   { CVT16 i += 16; } \
214    }                                                        \
215    while (i < num_samples)               { CVT1  ++i;     }
216
217// Convert backwards, when sizeof(*src) <= sizeof(*dst)
218#define CONVERT_16_REV(CVT1, CVT16)                          \
219    int i = num_samples;                                     \
220    if (i >= 16) {                                           \
221        while ((uintptr_t)(&dst[i]) & 15) { --i;     CVT1  } \
222        while (i >= 16)                   { i -= 16; CVT16 } \
223    }                                                        \
224    while (i > 0)                         { --i;     CVT1  }
225
226#ifdef SDL_SSE2_INTRINSICS
227static void SDL_TARGETING("sse2") SDL_Convert_S8_to_F32_SSE2(float *dst, const Sint8 *src, int num_samples)
228{
229    /* 1) Flip the sign bit to convert from S8 to U8 format
230     * 2) Construct a float in the range [65536.0, 65538.0)
231     * 3) Shift the float range to [-1.0, 1.0)
232     * dst[i] = i2f((src[i] ^ 0x80) | 0x47800000) - 65537.0 */
233    const __m128i zero = _mm_setzero_si128();
234    const __m128i flipper = _mm_set1_epi8(-0x80);
235    const __m128i caster = _mm_set1_epi16(0x4780 /* 0x47800000 = f2i(65536.0) */);
236    const __m128 offset = _mm_set1_ps(-65537.0);
237
238    LOG_DEBUG_AUDIO_CONVERT("S8", "F32 (using SSE2)");
239
240    CONVERT_16_REV({
241        _mm_store_ss(&dst[i], _mm_add_ss(_mm_castsi128_ps(_mm_cvtsi32_si128((Uint8)src[i] ^ 0x47800080u)), offset));
242    }, {
243        const __m128i bytes = _mm_xor_si128(_mm_loadu_si128((const __m128i *)&src[i]), flipper);
244
245        const __m128i shorts0 = _mm_unpacklo_epi8(bytes, zero);
246        const __m128i shorts1 = _mm_unpackhi_epi8(bytes, zero);
247
248        const __m128 floats0 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts0, caster)), offset);
249        const __m128 floats1 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts0, caster)), offset);
250        const __m128 floats2 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts1, caster)), offset);
251        const __m128 floats3 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts1, caster)), offset);
252
253        _mm_store_ps(&dst[i], floats0);
254        _mm_store_ps(&dst[i + 4], floats1);
255        _mm_store_ps(&dst[i + 8], floats2);
256        _mm_store_ps(&dst[i + 12], floats3);
257    })
258}
259
260static void SDL_TARGETING("sse2") SDL_Convert_U8_to_F32_SSE2(float *dst, const Uint8 *src, int num_samples)
261{
262    /* 1) Construct a float in the range [65536.0, 65538.0)
263     * 2) Shift the float range to [-1.0, 1.0)
264     * dst[i] = i2f(src[i] | 0x47800000) - 65537.0 */
265    const __m128i zero = _mm_setzero_si128();
266    const __m128i caster = _mm_set1_epi16(0x4780 /* 0x47800000 = f2i(65536.0) */);
267    const __m128 offset = _mm_set1_ps(-65537.0);
268
269    LOG_DEBUG_AUDIO_CONVERT("U8", "F32 (using SSE2)");
270
271    CONVERT_16_REV({
272        _mm_store_ss(&dst[i], _mm_add_ss(_mm_castsi128_ps(_mm_cvtsi32_si128((Uint8)src[i] ^ 0x47800000u)), offset));
273    }, {
274        const __m128i bytes = _mm_loadu_si128((const __m128i *)&src[i]);
275
276        const __m128i shorts0 = _mm_unpacklo_epi8(bytes, zero);
277        const __m128i shorts1 = _mm_unpackhi_epi8(bytes, zero);
278
279        const __m128 floats0 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts0, caster)), offset);
280        const __m128 floats1 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts0, caster)), offset);
281        const __m128 floats2 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts1, caster)), offset);
282        const __m128 floats3 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts1, caster)), offset);
283
284        _mm_store_ps(&dst[i], floats0);
285        _mm_store_ps(&dst[i + 4], floats1);
286        _mm_store_ps(&dst[i + 8], floats2);
287        _mm_store_ps(&dst[i + 12], floats3);
288    })
289}
290
291static void SDL_TARGETING("sse2") SDL_Convert_S16_to_F32_SSE2(float *dst, const Sint16 *src, int num_samples)
292{
293    /* 1) Flip the sign bit to convert from S16 to U16 format
294     * 2) Construct a float in the range [256.0, 258.0)
295     * 3) Shift the float range to [-1.0, 1.0)
296     * dst[i] = i2f((src[i] ^ 0x8000) | 0x43800000) - 257.0 */
297    const __m128i flipper = _mm_set1_epi16(-0x8000);
298    const __m128i caster = _mm_set1_epi16(0x4380 /* 0x43800000 = f2i(256.0) */);
299    const __m128 offset = _mm_set1_ps(-257.0f);
300
301    LOG_DEBUG_AUDIO_CONVERT("S16", "F32 (using SSE2)");
302
303    CONVERT_16_REV({
304        _mm_store_ss(&dst[i], _mm_add_ss(_mm_castsi128_ps(_mm_cvtsi32_si128((Uint16)src[i] ^ 0x43808000u)), offset));
305    }, {
306        const __m128i shorts0 = _mm_xor_si128(_mm_loadu_si128((const __m128i *)&src[i]), flipper);
307        const __m128i shorts1 = _mm_xor_si128(_mm_loadu_si128((const __m128i *)&src[i + 8]), flipper);
308
309        const __m128 floats0 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts0, caster)), offset);
310        const __m128 floats1 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts0, caster)), offset);
311        const __m128 floats2 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts1, caster)), offset);
312        const __m128 floats3 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts1, caster)), offset);
313
314        _mm_store_ps(&dst[i], floats0);
315        _mm_store_ps(&dst[i + 4], floats1);
316        _mm_store_ps(&dst[i + 8], floats2);
317        _mm_store_ps(&dst[i + 12], floats3);
318    })
319}
320
321static void SDL_TARGETING("sse2") SDL_Convert_S32_to_F32_SSE2(float *dst, const Sint32 *src, int num_samples)
322{
323    // dst[i] = f32(src[i]) / f32(0x80000000)
324    const __m128 scaler = _mm_set1_ps(DIVBY2147483648);
325
326    LOG_DEBUG_AUDIO_CONVERT("S32", "F32 (using SSE2)");
327
328    CONVERT_16_FWD({
329        _mm_store_ss(&dst[i], _mm_mul_ss(_mm_cvt_si2ss(_mm_setzero_ps(), src[i]), scaler));
330    }, {
331        const __m128i ints0 = _mm_loadu_si128((const __m128i *)&src[i]);
332        const __m128i ints1 = _mm_loadu_si128((const __m128i *)&src[i + 4]);
333        const __m128i ints2 = _mm_loadu_si128((const __m128i *)&src[i + 8]);
334        const __m128i ints3 = _mm_loadu_si128((const __m128i *)&src[i + 12]);
335
336        const __m128 floats0 = _mm_mul_ps(_mm_cvtepi32_ps(ints0), scaler);
337        const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(ints1), scaler);
338        const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(ints2), scaler);
339        const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(ints3), scaler);
340
341        _mm_store_ps(&dst[i], floats0);
342        _mm_store_ps(&dst[i + 4], floats1);
343        _mm_store_ps(&dst[i + 8], floats2);
344        _mm_store_ps(&dst[i + 12], floats3);
345    })
346}
347
348static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S8_SSE2(Sint8 *dst, const float *src, int num_samples)
349{
350    /* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0]
351     * 2) Extract the lowest 16 bits and clamp to [-128, 127]
352     * Overflow is correctly handled for inputs between roughly [-255.0, 255.0]
353     * dst[i] = clamp(i16(f2i(src[i] + 98304.0) & 0xFFFF), -128, 127) */
354    const __m128 offset = _mm_set1_ps(98304.0f);
355    const __m128i mask = _mm_set1_epi16(0xFF);
356
357    LOG_DEBUG_AUDIO_CONVERT("F32", "S8 (using SSE2)");
358
359    CONVERT_16_FWD({
360        const __m128i ints = _mm_castps_si128(_mm_add_ss(_mm_load_ss(&src[i]), offset));
361        dst[i] = (Sint8)(_mm_cvtsi128_si32(_mm_packs_epi16(ints, ints)) & 0xFF);
362    }, {
363        const __m128 floats0 = _mm_loadu_ps(&src[i]);
364        const __m128 floats1 = _mm_loadu_ps(&src[i + 4]);
365        const __m128 floats2 = _mm_loadu_ps(&src[i + 8]);
366        const __m128 floats3 = _mm_loadu_ps(&src[i + 12]);
367
368        const __m128i ints0 = _mm_castps_si128(_mm_add_ps(floats0, offset));
369        const __m128i ints1 = _mm_castps_si128(_mm_add_ps(floats1, offset));
370        const __m128i ints2 = _mm_castps_si128(_mm_add_ps(floats2, offset));
371        const __m128i ints3 = _mm_castps_si128(_mm_add_ps(floats3, offset));
372
373        const __m128i shorts0 = _mm_and_si128(_mm_packs_epi16(ints0, ints1), mask);
374        const __m128i shorts1 = _mm_and_si128(_mm_packs_epi16(ints2, ints3), mask);
375
376        const __m128i bytes = _mm_packus_epi16(shorts0, shorts1);
377
378        _mm_store_si128((__m128i *)&dst[i], bytes);
379    })
380}
381
382static void SDL_TARGETING("sse2") SDL_Convert_F32_to_U8_SSE2(Uint8 *dst, const float *src, int num_samples)
383{
384    /* 1) Shift the float range from [-1.0, 1.0] to [98304.0, 98306.0]
385     * 2) Extract the lowest 16 bits and clamp to [0, 255]
386     * Overflow is correctly handled for inputs between roughly [-254.0, 254.0]
387     * dst[i] = clamp(i16(f2i(src[i] + 98305.0) & 0xFFFF), 0, 255) */
388    const __m128 offset = _mm_set1_ps(98305.0f);
389    const __m128i mask = _mm_set1_epi16(0xFF);
390
391    LOG_DEBUG_AUDIO_CONVERT("F32", "U8 (using SSE2)");
392
393    CONVERT_16_FWD({
394        const __m128i ints = _mm_castps_si128(_mm_add_ss(_mm_load_ss(&src[i]), offset));
395        dst[i] = (Uint8)(_mm_cvtsi128_si32(_mm_packus_epi16(ints, ints)) & 0xFF);
396    }, {
397        const __m128 floats0 = _mm_loadu_ps(&src[i]);
398        const __m128 floats1 = _mm_loadu_ps(&src[i + 4]);
399        const __m128 floats2 = _mm_loadu_ps(&src[i + 8]);
400        const __m128 floats3 = _mm_loadu_ps(&src[i + 12]);
401
402        const __m128i ints0 = _mm_castps_si128(_mm_add_ps(floats0, offset));
403        const __m128i ints1 = _mm_castps_si128(_mm_add_ps(floats1, offset));
404        const __m128i ints2 = _mm_castps_si128(_mm_add_ps(floats2, offset));
405        const __m128i ints3 = _mm_castps_si128(_mm_add_ps(floats3, offset));
406
407        const __m128i shorts0 = _mm_and_si128(_mm_packus_epi16(ints0, ints1), mask);
408        const __m128i shorts1 = _mm_and_si128(_mm_packus_epi16(ints2, ints3), mask);
409
410        const __m128i bytes = _mm_packus_epi16(shorts0, shorts1);
411
412        _mm_store_si128((__m128i *)&dst[i], bytes);
413    })
414}
415
416static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S16_SSE2(Sint16 *dst, const float *src, int num_samples)
417{
418    /* 1) Shift the float range from [-1.0, 1.0] to [256.0, 258.0]
419     * 2) Shift the int range from [0x43800000, 0x43810000] to [-32768,32768]
420     * 3) Clamp to range [-32768,32767]
421     * Overflow is correctly handled for inputs between roughly [-257.0, +inf)
422     * dst[i] = clamp(f2i(src[i] + 257.0) - 0x43808000, -32768, 32767) */
423    const __m128 offset = _mm_set1_ps(257.0f);
424
425    LOG_DEBUG_AUDIO_CONVERT("F32", "S16 (using SSE2)");
426
427    CONVERT_16_FWD({
428        const __m128i ints = _mm_sub_epi32(_mm_castps_si128(_mm_add_ss(_mm_load_ss(&src[i]), offset)), _mm_castps_si128(offset));
429        dst[i] = (Sint16)(_mm_cvtsi128_si32(_mm_packs_epi32(ints, ints)) & 0xFFFF);
430    }, {
431        const __m128 floats0 = _mm_loadu_ps(&src[i]);
432        const __m128 floats1 = _mm_loadu_ps(&src[i + 4]);
433        const __m128 floats2 = _mm_loadu_ps(&src[i + 8]);
434        const __m128 floats3 = _mm_loadu_ps(&src[i + 12]);
435
436        const __m128i ints0 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats0, offset)), _mm_castps_si128(offset));
437        const __m128i ints1 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats1, offset)), _mm_castps_si128(offset));
438        const __m128i ints2 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats2, offset)), _mm_castps_si128(offset));
439        const __m128i ints3 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats3, offset)), _mm_castps_si128(offset));
440
441        const __m128i shorts0 = _mm_packs_epi32(ints0, ints1);
442        const __m128i shorts1 = _mm_packs_epi32(ints2, ints3);
443
444        _mm_store_si128((__m128i *)&dst[i], shorts0);
445        _mm_store_si128((__m128i *)&dst[i + 8], shorts1);
446    })
447}
448
449static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S32_SSE2(Sint32 *dst, const float *src, int num_samples)
450{
451    /* 1) Scale the float range from [-1.0, 1.0] to [-2147483648.0, 2147483648.0]
452     * 2) Convert to integer (values too small/large become 0x80000000 = -2147483648)
453     * 3) Fixup values which were too large (0x80000000 ^ 0xFFFFFFFF = 2147483647)
454     * dst[i] = i32(src[i] * 2147483648.0) ^ ((src[i] >= 2147483648.0) ? 0xFFFFFFFF : 0x00000000) */
455    const __m128 limit = _mm_set1_ps(2147483648.0f);
456
457    LOG_DEBUG_AUDIO_CONVERT("F32", "S32 (using SSE2)");
458
459    CONVERT_16_FWD({
460        const __m128 floats = _mm_load_ss(&src[i]);
461        const __m128 values = _mm_mul_ss(floats, limit);
462        const __m128i ints = _mm_xor_si128(_mm_cvttps_epi32(values), _mm_castps_si128(_mm_cmpge_ss(values, limit)));
463        dst[i] = (Sint32)_mm_cvtsi128_si32(ints);
464    }, {
465        const __m128 floats0 = _mm_loadu_ps(&src[i]);
466        const __m128 floats1 = _mm_loadu_ps(&src[i + 4]);
467        const __m128 floats2 = _mm_loadu_ps(&src[i + 8]);
468        const __m128 floats3 = _mm_loadu_ps(&src[i + 12]);
469
470        const __m128 values1 = _mm_mul_ps(floats0, limit);
471        const __m128 values2 = _mm_mul_ps(floats1, limit);
472        const __m128 values3 = _mm_mul_ps(floats2, limit);
473        const __m128 values4 = _mm_mul_ps(floats3, limit);
474
475        const __m128i ints0 = _mm_xor_si128(_mm_cvttps_epi32(values1), _mm_castps_si128(_mm_cmpge_ps(values1, limit)));
476        const __m128i ints1 = _mm_xor_si128(_mm_cvttps_epi32(values2), _mm_castps_si128(_mm_cmpge_ps(values2, limit)));
477        const __m128i ints2 = _mm_xor_si128(_mm_cvttps_epi32(values3), _mm_castps_si128(_mm_cmpge_ps(values3, limit)));
478        const __m128i ints3 = _mm_xor_si128(_mm_cvttps_epi32(values4), _mm_castps_si128(_mm_cmpge_ps(values4, limit)));
479
480        _mm_store_si128((__m128i *)&dst[i], ints0);
481        _mm_store_si128((__m128i *)&dst[i + 4], ints1);
482        _mm_store_si128((__m128i *)&dst[i + 8], ints2);
483        _mm_store_si128((__m128i *)&dst[i + 12], ints3);
484    })
485}
486#endif
487
488// FIXME: SDL doesn't have SSSE3 detection, so use the next one up
489#ifdef SDL_SSE4_1_INTRINSICS
490static void SDL_TARGETING("ssse3") SDL_Convert_Swap16_SSSE3(Uint16 *dst, const Uint16 *src, int num_samples)
491{
492    const __m128i shuffle = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
493
494    CONVERT_16_FWD({
495        dst[i] = SDL_Swap16(src[i]);
496    }, {
497        __m128i ints0 = _mm_loadu_si128((const __m128i *)&src[i]);
498        __m128i ints1 = _mm_loadu_si128((const __m128i *)&src[i + 8]);
499
500        ints0 = _mm_shuffle_epi8(ints0, shuffle);
501        ints1 = _mm_shuffle_epi8(ints1, shuffle);
502
503        _mm_store_si128((__m128i *)&dst[i], ints0);
504        _mm_store_si128((__m128i *)&dst[i + 8], ints1);
505    })
506}
507
508static void SDL_TARGETING("ssse3") SDL_Convert_Swap32_SSSE3(Uint32 *dst, const Uint32 *src, int num_samples)
509{
510    const __m128i shuffle = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
511
512    CONVERT_16_FWD({
513        dst[i] = SDL_Swap32(src[i]);
514    }, {
515        __m128i ints0 = _mm_loadu_si128((const __m128i *)&src[i]);
516        __m128i ints1 = _mm_loadu_si128((const __m128i *)&src[i + 4]);
517        __m128i ints2 = _mm_loadu_si128((const __m128i *)&src[i + 8]);
518        __m128i ints3 = _mm_loadu_si128((const __m128i *)&src[i + 12]);
519
520        ints0 = _mm_shuffle_epi8(ints0, shuffle);
521        ints1 = _mm_shuffle_epi8(ints1, shuffle);
522        ints2 = _mm_shuffle_epi8(ints2, shuffle);
523        ints3 = _mm_shuffle_epi8(ints3, shuffle);
524
525        _mm_store_si128((__m128i *)&dst[i], ints0);
526        _mm_store_si128((__m128i *)&dst[i + 4], ints1);
527        _mm_store_si128((__m128i *)&dst[i + 8], ints2);
528        _mm_store_si128((__m128i *)&dst[i + 12], ints3);
529    })
530}
531#endif
532
533#ifdef SDL_NEON_INTRINSICS
534
535// C99 requires that all code modifying floating point environment should
536// be guarded by the STDC FENV_ACCESS pragma; otherwise, it's undefined
537// behavior. However, the compiler support for this pragma is bad.
538#if defined(__clang__)
539#if __clang_major__ >= 12
540#if defined(__aarch64__)
541#pragma STDC FENV_ACCESS ON
542#endif
543#endif
544#elif defined(_MSC_VER)
545#pragma fenv_access (on)
546#elif defined(__GNUC__)
547// GCC does not support the pragma at all
548#else
549#pragma STDC FENV_ACCESS ON
550#endif
551
552static void SDL_Convert_S8_to_F32_NEON(float *dst, const Sint8 *src, int num_samples)
553{
554    LOG_DEBUG_AUDIO_CONVERT("S8", "F32 (using NEON)");
555    fenv_t fenv;
556    feholdexcept(&fenv);
557
558    CONVERT_16_REV({
559        vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vdup_n_s32(src[i]), 7), 0);
560    }, {
561        int8x16_t bytes = vld1q_s8(&src[i]);
562
563        int16x8_t shorts0 = vmovl_s8(vget_low_s8(bytes));
564        int16x8_t shorts1 = vmovl_s8(vget_high_s8(bytes));
565
566        float32x4_t floats0 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts0)), 7);
567        float32x4_t floats1 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts0)), 7);
568        float32x4_t floats2 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts1)), 7);
569        float32x4_t floats3 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts1)), 7);
570
571        vst1q_f32(&dst[i], floats0);
572        vst1q_f32(&dst[i + 4], floats1);
573        vst1q_f32(&dst[i + 8], floats2);
574        vst1q_f32(&dst[i + 12], floats3);
575    })
576    fesetenv(&fenv);
577}
578
579static void SDL_Convert_U8_to_F32_NEON(float *dst, const Uint8 *src, int num_samples)
580{
581    LOG_DEBUG_AUDIO_CONVERT("U8", "F32 (using NEON)");
582    fenv_t fenv;
583    feholdexcept(&fenv);
584
585    uint8x16_t flipper = vdupq_n_u8(0x80);
586
587    CONVERT_16_REV({
588        vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vdup_n_s32((Sint8)(src[i] ^ 0x80)), 7), 0);
589    }, {
590        int8x16_t bytes = vreinterpretq_s8_u8(veorq_u8(vld1q_u8(&src[i]), flipper));
591
592        int16x8_t shorts0 = vmovl_s8(vget_low_s8(bytes));
593        int16x8_t shorts1 = vmovl_s8(vget_high_s8(bytes));
594
595        float32x4_t floats0 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts0)), 7);
596        float32x4_t floats1 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts0)), 7);
597        float32x4_t floats2 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts1)), 7);
598        float32x4_t floats3 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts1)), 7);
599
600        vst1q_f32(&dst[i], floats0);
601        vst1q_f32(&dst[i + 4], floats1);
602        vst1q_f32(&dst[i + 8], floats2);
603        vst1q_f32(&dst[i + 12], floats3);
604    })
605    fesetenv(&fenv);
606}
607
608static void SDL_Convert_S16_to_F32_NEON(float *dst, const Sint16 *src, int num_samples)
609{
610    LOG_DEBUG_AUDIO_CONVERT("S16", "F32 (using NEON)");
611    fenv_t fenv;
612    feholdexcept(&fenv);
613
614    CONVERT_16_REV({
615        vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vdup_n_s32(src[i]), 15), 0);
616    }, {
617        int16x8_t shorts0 = vld1q_s16(&src[i]);
618        int16x8_t shorts1 = vld1q_s16(&src[i + 8]);
619
620        float32x4_t floats0 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts0)), 15);
621        float32x4_t floats1 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts0)), 15);
622        float32x4_t floats2 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts1)), 15);
623        float32x4_t floats3 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts1)), 15);
624
625        vst1q_f32(&dst[i], floats0);
626        vst1q_f32(&dst[i + 4], floats1);
627        vst1q_f32(&dst[i + 8], floats2);
628        vst1q_f32(&dst[i + 12], floats3);
629    })
630    fesetenv(&fenv);
631}
632
633static void SDL_Convert_S32_to_F32_NEON(float *dst, const Sint32 *src, int num_samples)
634{
635    LOG_DEBUG_AUDIO_CONVERT("S32", "F32 (using NEON)");
636    fenv_t fenv;
637    feholdexcept(&fenv);
638
639    CONVERT_16_FWD({
640        vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vld1_dup_s32(&src[i]), 31), 0);
641    }, {
642        int32x4_t ints0 = vld1q_s32(&src[i]);
643        int32x4_t ints1 = vld1q_s32(&src[i + 4]);
644        int32x4_t ints2 = vld1q_s32(&src[i + 8]);
645        int32x4_t ints3 = vld1q_s32(&src[i + 12]);
646
647        float32x4_t floats0 = vcvtq_n_f32_s32(ints0, 31);
648        float32x4_t floats1 = vcvtq_n_f32_s32(ints1, 31);
649        float32x4_t floats2 = vcvtq_n_f32_s32(ints2, 31);
650        float32x4_t floats3 = vcvtq_n_f32_s32(ints3, 31);
651
652        vst1q_f32(&dst[i], floats0);
653        vst1q_f32(&dst[i + 4], floats1);
654        vst1q_f32(&dst[i + 8], floats2);
655        vst1q_f32(&dst[i + 12], floats3);
656    })
657    fesetenv(&fenv);
658}
659
660static void SDL_Convert_F32_to_S8_NEON(Sint8 *dst, const float *src, int num_samples)
661{
662    LOG_DEBUG_AUDIO_CONVERT("F32", "S8 (using NEON)");
663    fenv_t fenv;
664    feholdexcept(&fenv);
665
666    CONVERT_16_FWD({
667        vst1_lane_s8(&dst[i], vreinterpret_s8_s32(vcvt_n_s32_f32(vld1_dup_f32(&src[i]), 31)), 3);
668    }, {
669        float32x4_t floats0 = vld1q_f32(&src[i]);
670        float32x4_t floats1 = vld1q_f32(&src[i + 4]);
671        float32x4_t floats2 = vld1q_f32(&src[i + 8]);
672        float32x4_t floats3 = vld1q_f32(&src[i + 12]);
673
674        int32x4_t ints0 = vcvtq_n_s32_f32(floats0, 31);
675        int32x4_t ints1 = vcvtq_n_s32_f32(floats1, 31);
676        int32x4_t ints2 = vcvtq_n_s32_f32(floats2, 31);
677        int32x4_t ints3 = vcvtq_n_s32_f32(floats3, 31);
678
679        int16x8_t shorts0 = vcombine_s16(vshrn_n_s32(ints0, 16), vshrn_n_s32(ints1, 16));
680        int16x8_t shorts1 = vcombine_s16(vshrn_n_s32(ints2, 16), vshrn_n_s32(ints3, 16));
681
682        int8x16_t bytes = vcombine_s8(vshrn_n_s16(shorts0, 8), vshrn_n_s16(shorts1, 8));
683
684        vst1q_s8(&dst[i], bytes);
685    })
686    fesetenv(&fenv);
687}
688
689static void SDL_Convert_F32_to_U8_NEON(Uint8 *dst, const float *src, int num_samples)
690{
691    LOG_DEBUG_AUDIO_CONVERT("F32", "U8 (using NEON)");
692    fenv_t fenv;
693    feholdexcept(&fenv);
694
695    uint8x16_t flipper = vdupq_n_u8(0x80);
696
697    CONVERT_16_FWD({
698        vst1_lane_u8(&dst[i],
699            veor_u8(vreinterpret_u8_s32(vcvt_n_s32_f32(vld1_dup_f32(&src[i]), 31)),
700                vget_low_u8(flipper)), 3);
701    }, {
702        float32x4_t floats0 = vld1q_f32(&src[i]);
703        float32x4_t floats1 = vld1q_f32(&src[i + 4]);
704        float32x4_t floats2 = vld1q_f32(&src[i + 8]);
705        float32x4_t floats3 = vld1q_f32(&src[i + 12]);
706
707        int32x4_t ints0 = vcvtq_n_s32_f32(floats0, 31);
708        int32x4_t ints1 = vcvtq_n_s32_f32(floats1, 31);
709        int32x4_t ints2 = vcvtq_n_s32_f32(floats2, 31);
710        int32x4_t ints3 = vcvtq_n_s32_f32(floats3, 31);
711
712        int16x8_t shorts0 = vcombine_s16(vshrn_n_s32(ints0, 16), vshrn_n_s32(ints1, 16));
713        int16x8_t shorts1 = vcombine_s16(vshrn_n_s32(ints2, 16), vshrn_n_s32(ints3, 16));
714
715        uint8x16_t bytes = veorq_u8(vreinterpretq_u8_s8(
716            vcombine_s8(vshrn_n_s16(shorts0, 8), vshrn_n_s16(shorts1, 8))),
717            flipper);
718
719        vst1q_u8(&dst[i], bytes);
720    })
721    fesetenv(&fenv);
722}
723
724static void SDL_Convert_F32_to_S16_NEON(Sint16 *dst, const float *src, int num_samples)
725{
726    LOG_DEBUG_AUDIO_CONVERT("F32", "S16 (using NEON)");
727    fenv_t fenv;
728    feholdexcept(&fenv);
729
730    CONVERT_16_FWD({
731        vst1_lane_s16(&dst[i], vreinterpret_s16_s32(vcvt_n_s32_f32(vld1_dup_f32(&src[i]), 31)), 1);
732    }, {
733        float32x4_t floats0 = vld1q_f32(&src[i]);
734        float32x4_t floats1 = vld1q_f32(&src[i + 4]);
735        float32x4_t floats2 = vld1q_f32(&src[i + 8]);
736        float32x4_t floats3 = vld1q_f32(&src[i + 12]);
737
738        int32x4_t ints0 = vcvtq_n_s32_f32(floats0, 31);
739        int32x4_t ints1 = vcvtq_n_s32_f32(floats1, 31);
740        int32x4_t ints2 = vcvtq_n_s32_f32(floats2, 31);
741        int32x4_t ints3 = vcvtq_n_s32_f32(floats3, 31);
742
743        int16x8_t shorts0 = vcombine_s16(vshrn_n_s32(ints0, 16), vshrn_n_s32(ints1, 16));
744        int16x8_t shorts1 = vcombine_s16(vshrn_n_s32(ints2, 16), vshrn_n_s32(ints3, 16));
745
746        vst1q_s16(&dst[i], shorts0);
747        vst1q_s16(&dst[i + 8], shorts1);
748    })
749    fesetenv(&fenv);
750}
751
752static void SDL_Convert_F32_to_S32_NEON(Sint32 *dst, const float *src, int num_samples)
753{
754    LOG_DEBUG_AUDIO_CONVERT("F32", "S32 (using NEON)");
755    fenv_t fenv;
756    feholdexcept(&fenv);
757
758    CONVERT_16_FWD({
759        vst1_lane_s32(&dst[i], vcvt_n_s32_f32(vld1_dup_f32(&src[i]), 31), 0);
760    }, {
761        float32x4_t floats0 = vld1q_f32(&src[i]);
762        float32x4_t floats1 = vld1q_f32(&src[i + 4]);
763        float32x4_t floats2 = vld1q_f32(&src[i + 8]);
764        float32x4_t floats3 = vld1q_f32(&src[i + 12]);
765
766        int32x4_t ints0 = vcvtq_n_s32_f32(floats0, 31);
767        int32x4_t ints1 = vcvtq_n_s32_f32(floats1, 31);
768        int32x4_t ints2 = vcvtq_n_s32_f32(floats2, 31);
769        int32x4_t ints3 = vcvtq_n_s32_f32(floats3, 31);
770
771        vst1q_s32(&dst[i], ints0);
772        vst1q_s32(&dst[i + 4], ints1);
773        vst1q_s32(&dst[i + 8], ints2);
774        vst1q_s32(&dst[i + 12], ints3);
775    })
776    fesetenv(&fenv);
777}
778
779static void SDL_Convert_Swap16_NEON(Uint16 *dst, const Uint16 *src, int num_samples)
780{
781    CONVERT_16_FWD({
782        dst[i] = SDL_Swap16(src[i]);
783    }, {
784        uint8x16_t ints0 = vld1q_u8((const Uint8 *)&src[i]);
785        uint8x16_t ints1 = vld1q_u8((const Uint8 *)&src[i + 8]);
786
787        ints0 = vrev16q_u8(ints0);
788        ints1 = vrev16q_u8(ints1);
789
790        vst1q_u8((Uint8 *)&dst[i], ints0);
791        vst1q_u8((Uint8 *)&dst[i + 8], ints1);
792    })
793}
794
795static void SDL_Convert_Swap32_NEON(Uint32 *dst, const Uint32 *src, int num_samples)
796{
797    CONVERT_16_FWD({
798        dst[i] = SDL_Swap32(src[i]);
799    }, {
800        uint8x16_t ints0 = vld1q_u8((const Uint8 *)&src[i]);
801        uint8x16_t ints1 = vld1q_u8((const Uint8 *)&src[i + 4]);
802        uint8x16_t ints2 = vld1q_u8((const Uint8 *)&src[i + 8]);
803        uint8x16_t ints3 = vld1q_u8((const Uint8 *)&src[i + 12]);
804
805        ints0 = vrev32q_u8(ints0);
806        ints1 = vrev32q_u8(ints1);
807        ints2 = vrev32q_u8(ints2);
808        ints3 = vrev32q_u8(ints3);
809
810        vst1q_u8((Uint8 *)&dst[i], ints0);
811        vst1q_u8((Uint8 *)&dst[i + 4], ints1);
812        vst1q_u8((Uint8 *)&dst[i + 8], ints2);
813        vst1q_u8((Uint8 *)&dst[i + 12], ints3);
814    })
815}
816
817#if defined(__clang__)
818#if __clang_major__ >= 12
819#if defined(__aarch64__)
820#pragma STDC FENV_ACCESS DEFAULT
821#endif
822#endif
823#elif defined(_MSC_VER)
824#pragma fenv_access (off)
825#elif defined(__GNUC__)
826//
827#else
828#pragma STDC FENV_ACCESS DEFAULT
829#endif
830
831#endif
832
833#undef CONVERT_16_FWD
834#undef CONVERT_16_REV
835
836// Function pointers set to a CPU-specific implementation.
837static void (*SDL_Convert_S8_to_F32)(float *dst, const Sint8 *src, int num_samples) = NULL;
838static void (*SDL_Convert_U8_to_F32)(float *dst, const Uint8 *src, int num_samples) = NULL;
839static void (*SDL_Convert_S16_to_F32)(float *dst, const Sint16 *src, int num_samples) = NULL;
840static void (*SDL_Convert_S32_to_F32)(float *dst, const Sint32 *src, int num_samples) = NULL;
841static void (*SDL_Convert_F32_to_S8)(Sint8 *dst, const float *src, int num_samples) = NULL;
842static void (*SDL_Convert_F32_to_U8)(Uint8 *dst, const float *src, int num_samples) = NULL;
843static void (*SDL_Convert_F32_to_S16)(Sint16 *dst, const float *src, int num_samples) = NULL;
844static void (*SDL_Convert_F32_to_S32)(Sint32 *dst, const float *src, int num_samples) = NULL;
845
846static void (*SDL_Convert_Swap16)(Uint16 *dst, const Uint16 *src, int num_samples) = NULL;
847static void (*SDL_Convert_Swap32)(Uint32 *dst, const Uint32 *src, int num_samples) = NULL;
848
849void ConvertAudioToFloat(float *dst, const void *src, int num_samples, SDL_AudioFormat src_fmt)
850{
851    switch (src_fmt) {
852        case SDL_AUDIO_S8:
853            SDL_Convert_S8_to_F32(dst, (const Sint8 *) src, num_samples);
854            break;
855
856        case SDL_AUDIO_U8:
857            SDL_Convert_U8_to_F32(dst, (const Uint8 *) src, num_samples);
858            break;
859
860        case SDL_AUDIO_S16:
861            SDL_Convert_S16_to_F32(dst, (const Sint16 *) src, num_samples);
862            break;
863
864        case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
865            SDL_Convert_Swap16((Uint16 *)dst, (const Uint16 *)src, num_samples);
866            SDL_Convert_S16_to_F32(dst, (const Sint16 *) dst, num_samples);
867            break;
868
869        case SDL_AUDIO_S32:
870            SDL_Convert_S32_to_F32(dst, (const Sint32 *) src, num_samples);
871            break;
872
873        case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
874            SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)src, num_samples);
875            SDL_Convert_S32_to_F32(dst, (const Sint32 *) dst, num_samples);
876            break;
877
878        case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
879            SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)src, num_samples);
880            break;
881
882        default: SDL_assert(!"Unexpected audio format!"); break;
883    }
884}
885
886void ConvertAudioFromFloat(void *dst, const float *src, int num_samples, SDL_AudioFormat dst_fmt)
887{
888    switch (dst_fmt) {
889        case SDL_AUDIO_S8:
890            SDL_Convert_F32_to_S8((Sint8 *) dst, src, num_samples);
891            break;
892
893        case SDL_AUDIO_U8:
894            SDL_Convert_F32_to_U8((Uint8 *) dst, src, num_samples);
895            break;
896
897        case SDL_AUDIO_S16:
898            SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples);
899            break;
900
901        case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
902            SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples);
903            SDL_Convert_Swap16((Uint16 *)dst, (const Uint16 *)dst, num_samples);
904            break;
905
906        case SDL_AUDIO_S32:
907            SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples);
908            break;
909
910        case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
911            SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples);
912            SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)dst, num_samples);
913            break;
914
915        case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
916            SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)src, num_samples);
917            break;
918
919        default: SDL_assert(!"Unexpected audio format!"); break;
920    }
921}
922
923void ConvertAudioSwapEndian(void *dst, const void *src, int num_samples, int bitsize)
924{
925    switch (bitsize) {
926        case 16: SDL_Convert_Swap16((Uint16 *)dst, (const Uint16 *)src, num_samples); break;
927        case 32: SDL_Convert_Swap32((Uint32 *)dst, (const Uint32 *)src, num_samples); break;
928        default: SDL_assert(!"Unexpected audio format!"); break;
929    }
930}
931
932void SDL_ChooseAudioConverters(void)
933{
934    static bool converters_chosen = false;
935    if (converters_chosen) {
936        return;
937    }
938
939#define SET_CONVERTER_FUNCS(fntype) \
940    SDL_Convert_Swap16 = SDL_Convert_Swap16_##fntype; \
941    SDL_Convert_Swap32 = SDL_Convert_Swap32_##fntype;
942
943#ifdef SDL_SSE4_1_INTRINSICS
944    if (SDL_HasSSE41()) {
945        SET_CONVERTER_FUNCS(SSSE3);
946    } else
947#endif
948#ifdef SDL_NEON_INTRINSICS
949    if (SDL_HasNEON()) {
950        SET_CONVERTER_FUNCS(NEON);
951    } else
952#endif
953    {
954        SET_CONVERTER_FUNCS(Scalar);
955    }
956
957#undef SET_CONVERTER_FUNCS
958
959#define SET_CONVERTER_FUNCS(fntype) \
960    SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
961    SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
962    SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
963    SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
964    SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
965    SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
966    SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
967    SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
968
969#ifdef SDL_SSE2_INTRINSICS
970    if (SDL_HasSSE2()) {
971        SET_CONVERTER_FUNCS(SSE2);
972    } else
973#endif
974#ifdef SDL_NEON_INTRINSICS
975    if (SDL_HasNEON()) {
976        SET_CONVERTER_FUNCS(NEON);
977    } else
978#endif
979    {
980        SET_CONVERTER_FUNCS(Scalar);
981    }
982
983#undef SET_CONVERTER_FUNCS
984
985    converters_chosen = true;
986}
987
[FILE END]
(C) 2025 0x4248
(C) 2025 4248 Media and 4248 Systems, All part of 0x4248
See LICENCE files for more information. Not all files are by 0x4248 always check Licencing.