Atlas - SDL_audioresample.c

Atlas - SDL_audioresample.c
Home / ext / SDL / src / audio
Lines: 1 | Size: 22872 bytes
[Download] [Show on GitHub] [Search similar files] [Raw] [Raw (proxy)] 
[FILE BEGIN]
1/*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2025 Sam Lantinga <[email protected]>
4
5  This software is provided 'as-is', without any express or implied
6  warranty.  In no event will the authors be held liable for any damages
7  arising from the use of this software.
8
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12
13  1. The origin of this software must not be misrepresented; you must not
14     claim that you wrote the original software. If you use this software
15     in a product, an acknowledgment in the product documentation would be
16     appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18     misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20*/
21#include "SDL_internal.h"
22
23#include "SDL_sysaudio.h"
24
25#include "SDL_audioresample.h"
26
27// SDL's resampler uses a "bandlimited interpolation" algorithm:
28//     https://ccrma.stanford.edu/~jos/resample/
29
30// TODO: Support changing this at runtime?
31#if defined(SDL_SSE_INTRINSICS) || defined(SDL_NEON_INTRINSICS)
32// In <current year>, SSE is basically mandatory anyway
33// We want RESAMPLER_SAMPLES_PER_FRAME to be a multiple of 4, to make SIMD easier
34#define RESAMPLER_ZERO_CROSSINGS 6
35#else
36#define RESAMPLER_ZERO_CROSSINGS 5
37#endif
38
39#define RESAMPLER_SAMPLES_PER_FRAME (RESAMPLER_ZERO_CROSSINGS * 2)
40
41// For a given srcpos, `srcpos + frame` are sampled, where `-RESAMPLER_ZERO_CROSSINGS < frame <= RESAMPLER_ZERO_CROSSINGS`.
42// Note, when upsampling, it is also possible to start sampling from `srcpos = -1`.
43#define RESAMPLER_MAX_PADDING_FRAMES (RESAMPLER_ZERO_CROSSINGS + 1)
44
45// More bits gives more precision, at the cost of a larger table.
46#define RESAMPLER_BITS_PER_ZERO_CROSSING    3
47#define RESAMPLER_SAMPLES_PER_ZERO_CROSSING (1 << RESAMPLER_BITS_PER_ZERO_CROSSING)
48#define RESAMPLER_FILTER_INTERP_BITS        (32 - RESAMPLER_BITS_PER_ZERO_CROSSING)
49#define RESAMPLER_FILTER_INTERP_RANGE       (1 << RESAMPLER_FILTER_INTERP_BITS)
50
51// ResampleFrame is just a vector/matrix/matrix multiplication.
52// It performs cubic interpolation of the filter, then multiplies that with the input.
53// dst = [1, frac, frac^2, frac^3] * filter * src
54
55// Cubic Polynomial
56typedef union Cubic
57{
58    float v[4];
59
60#ifdef SDL_SSE_INTRINSICS
61    // Aligned loads can be used directly as memory operands for mul/add
62    __m128 v128;
63#endif
64
65#ifdef SDL_NEON_INTRINSICS
66    float32x4_t v128;
67#endif
68
69} Cubic;
70
71static void ResampleFrame_Generic(const float *src, float *dst, const Cubic *filter, float frac, int chans)
72{
73    const float frac2 = frac * frac;
74    const float frac3 = frac * frac2;
75
76    int i, chan;
77    float scales[RESAMPLER_SAMPLES_PER_FRAME];
78
79    for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) {
80        scales[i] = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3);
81    }
82
83    for (chan = 0; chan < chans; ++chan) {
84        float out = 0.0f;
85
86        for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i) {
87            out += src[i * chans + chan] * scales[i];
88        }
89
90        dst[chan] = out;
91    }
92}
93
94static void ResampleFrame_Mono(const float *src, float *dst, const Cubic *filter, float frac, int chans)
95{
96    const float frac2 = frac * frac;
97    const float frac3 = frac * frac2;
98
99    int i;
100    float out = 0.0f;
101
102    for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) {
103        // Interpolate between the nearest two filters
104        const float scale = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3);
105
106        out += src[i] * scale;
107    }
108
109    dst[0] = out;
110}
111
112static void ResampleFrame_Stereo(const float *src, float *dst, const Cubic *filter, float frac, int chans)
113{
114    const float frac2 = frac * frac;
115    const float frac3 = frac * frac2;
116
117    int i;
118    float out0 = 0.0f;
119    float out1 = 0.0f;
120
121    for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) {
122        // Interpolate between the nearest two filters
123        const float scale = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3);
124
125        out0 += src[i * 2 + 0] * scale;
126        out1 += src[i * 2 + 1] * scale;
127    }
128
129    dst[0] = out0;
130    dst[1] = out1;
131}
132
133#ifdef SDL_SSE_INTRINSICS
134#define sdl_madd_ps(a, b, c) _mm_add_ps(a, _mm_mul_ps(b, c)) // Not-so-fused multiply-add
135
136static void SDL_TARGETING("sse") ResampleFrame_Generic_SSE(const float *src, float *dst, const Cubic *filter, float frac, int chans)
137{
138#if RESAMPLER_SAMPLES_PER_FRAME != 12
139#error Invalid samples per frame
140#endif
141
142    __m128 f0, f1, f2;
143
144    {
145        const __m128 frac1 = _mm_set1_ps(frac);
146        const __m128 frac2 = _mm_mul_ps(frac1, frac1);
147        const __m128 frac3 = _mm_mul_ps(frac1, frac2);
148
149// Transposed in SetupAudioResampler
150// Explicitly use _mm_load_ps to workaround ICE in GCC 4.9.4 accessing Cubic.v128
151#define X(out)                                               \
152    out = _mm_load_ps(filter[0].v);                          \
153    out = sdl_madd_ps(out, frac1, _mm_load_ps(filter[1].v)); \
154    out = sdl_madd_ps(out, frac2, _mm_load_ps(filter[2].v)); \
155    out = sdl_madd_ps(out, frac3, _mm_load_ps(filter[3].v)); \
156    filter += 4
157
158        X(f0);
159        X(f1);
160        X(f2);
161
162#undef X
163    }
164
165    if (chans == 2) {
166        // Duplicate each of the filter elements and multiply by the input
167        // Use two accumulators to improve throughput
168        __m128 out0 = _mm_mul_ps(_mm_loadu_ps(src + 0), _mm_unpacklo_ps(f0, f0));
169        __m128 out1 = _mm_mul_ps(_mm_loadu_ps(src + 4), _mm_unpackhi_ps(f0, f0));
170        out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + 8), _mm_unpacklo_ps(f1, f1));
171        out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + 12), _mm_unpackhi_ps(f1, f1));
172        out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + 16), _mm_unpacklo_ps(f2, f2));
173        out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + 20), _mm_unpackhi_ps(f2, f2));
174
175        // Add the accumulators together
176        __m128 out = _mm_add_ps(out0, out1);
177
178        // Add the lower and upper pairs together
179        out = _mm_add_ps(out, _mm_movehl_ps(out, out));
180
181        // Store the result
182        _mm_storel_pi((__m64 *)dst, out);
183        return;
184    }
185
186    if (chans == 1) {
187        // Multiply the filter by the input
188        __m128 out = _mm_mul_ps(f0, _mm_loadu_ps(src + 0));
189        out = sdl_madd_ps(out, f1, _mm_loadu_ps(src + 4));
190        out = sdl_madd_ps(out, f2, _mm_loadu_ps(src + 8));
191
192        // Horizontal sum
193        __m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(2, 3, 0, 1));
194        out = _mm_add_ps(out, shuf);
195        out = _mm_add_ss(out, _mm_movehl_ps(shuf, out));
196
197        _mm_store_ss(dst, out);
198        return;
199    }
200
201    int chan = 0;
202
203    // Process 4 channels at once
204    for (; chan + 4 <= chans; chan += 4) {
205        const float *in = &src[chan];
206        __m128 out0 = _mm_setzero_ps();
207        __m128 out1 = _mm_setzero_ps();
208
209#define X(a, b, out)                                                                         \
210    out = sdl_madd_ps(out, _mm_loadu_ps(in), _mm_shuffle_ps(a, a, _MM_SHUFFLE(b, b, b, b))); \
211    in += chans
212
213#define Y(a)       \
214    X(a, 0, out0); \
215    X(a, 1, out1); \
216    X(a, 2, out0); \
217    X(a, 3, out1)
218
219        Y(f0);
220        Y(f1);
221        Y(f2);
222
223#undef X
224#undef Y
225
226        // Add the accumulators together
227        __m128 out = _mm_add_ps(out0, out1);
228
229        _mm_storeu_ps(&dst[chan], out);
230    }
231
232    // Process the remaining channels one at a time.
233    // Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times).
234    // Without vgatherdps (AVX2), this gets quite messy.
235    for (; chan < chans; ++chan) {
236        const float *in = &src[chan];
237        __m128 v0, v1, v2;
238
239#define X(x)                                                                         \
240    x = _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans));                   \
241    in += chans + chans;                                                             \
242    x = _mm_movelh_ps(x, _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans))); \
243    in += chans + chans
244
245        X(v0);
246        X(v1);
247        X(v2);
248
249#undef X
250
251        __m128 out = _mm_mul_ps(f0, v0);
252        out = sdl_madd_ps(out, f1, v1);
253        out = sdl_madd_ps(out, f2, v2);
254
255        // Horizontal sum
256        __m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(2, 3, 0, 1));
257        out = _mm_add_ps(out, shuf);
258        out = _mm_add_ss(out, _mm_movehl_ps(shuf, out));
259
260        _mm_store_ss(&dst[chan], out);
261    }
262}
263
264#undef sdl_madd_ps
265#endif
266
267#ifdef SDL_NEON_INTRINSICS
268static void ResampleFrame_Generic_NEON(const float *src, float *dst, const Cubic *filter, float frac, int chans)
269{
270#if RESAMPLER_SAMPLES_PER_FRAME != 12
271#error Invalid samples per frame
272#endif
273
274    float32x4_t f0, f1, f2;
275
276    {
277        const float32x4_t frac1 = vdupq_n_f32(frac);
278        const float32x4_t frac2 = vmulq_f32(frac1, frac1);
279        const float32x4_t frac3 = vmulq_f32(frac1, frac2);
280
281// Transposed in SetupAudioResampler
282#define X(out)                                                                                                                  \
283    out = vmlaq_f32(vmlaq_f32(vmlaq_f32(filter[0].v128, filter[1].v128, frac1), filter[2].v128, frac2), filter[3].v128, frac3); \
284    filter += 4
285
286        X(f0);
287        X(f1);
288        X(f2);
289
290#undef X
291    }
292
293    if (chans == 2) {
294        float32x4x2_t g0 = vzipq_f32(f0, f0);
295        float32x4x2_t g1 = vzipq_f32(f1, f1);
296        float32x4x2_t g2 = vzipq_f32(f2, f2);
297
298        // Duplicate each of the filter elements and multiply by the input
299        // Use two accumulators to improve throughput
300        float32x4_t out0 = vmulq_f32(vld1q_f32(src + 0), g0.val[0]);
301        float32x4_t out1 = vmulq_f32(vld1q_f32(src + 4), g0.val[1]);
302        out0 = vmlaq_f32(out0, vld1q_f32(src + 8), g1.val[0]);
303        out1 = vmlaq_f32(out1, vld1q_f32(src + 12), g1.val[1]);
304        out0 = vmlaq_f32(out0, vld1q_f32(src + 16), g2.val[0]);
305        out1 = vmlaq_f32(out1, vld1q_f32(src + 20), g2.val[1]);
306
307        // Add the accumulators together
308        out0 = vaddq_f32(out0, out1);
309
310        // Add the lower and upper pairs together
311        float32x2_t out = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
312
313        // Store the result
314        vst1_f32(dst, out);
315        return;
316    }
317
318    if (chans == 1) {
319        // Multiply the filter by the input
320        float32x4_t out = vmulq_f32(f0, vld1q_f32(src + 0));
321        out = vmlaq_f32(out, f1, vld1q_f32(src + 4));
322        out = vmlaq_f32(out, f2, vld1q_f32(src + 8));
323
324        // Horizontal sum
325        float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out));
326        sum = vpadd_f32(sum, sum);
327
328        vst1_lane_f32(dst, sum, 0);
329        return;
330    }
331
332    int chan = 0;
333
334    // Process 4 channels at once
335    for (; chan + 4 <= chans; chan += 4) {
336        const float *in = &src[chan];
337        float32x4_t out0 = vdupq_n_f32(0);
338        float32x4_t out1 = vdupq_n_f32(0);
339
340#define X(a, b, out)                                           \
341    out = vmlaq_f32(out, vld1q_f32(in), vdupq_lane_f32(a, b)); \
342    in += chans
343
344#define Y(a)                      \
345    X(vget_low_f32(a), 0, out0);  \
346    X(vget_low_f32(a), 1, out1);  \
347    X(vget_high_f32(a), 0, out0); \
348    X(vget_high_f32(a), 1, out1)
349
350        Y(f0);
351        Y(f1);
352        Y(f2);
353
354#undef X
355#undef Y
356
357        // Add the accumulators together
358        float32x4_t out = vaddq_f32(out0, out1);
359
360        vst1q_f32(&dst[chan], out);
361    }
362
363    // Process the remaining channels one at a time.
364    // Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times).
365    for (; chan < chans; ++chan) {
366        const float *in = &src[chan];
367        float32x4_t v0, v1, v2;
368
369#define X(x)                      \
370    x = vld1q_dup_f32(in);        \
371    in += chans;                  \
372    x = vld1q_lane_f32(in, x, 1); \
373    in += chans;                  \
374    x = vld1q_lane_f32(in, x, 2); \
375    in += chans;                  \
376    x = vld1q_lane_f32(in, x, 3); \
377    in += chans
378
379        X(v0);
380        X(v1);
381        X(v2);
382
383#undef X
384
385        float32x4_t out = vmulq_f32(f0, v0);
386        out = vmlaq_f32(out, f1, v1);
387        out = vmlaq_f32(out, f2, v2);
388
389        // Horizontal sum
390        float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out));
391        sum = vpadd_f32(sum, sum);
392
393        vst1_lane_f32(&dst[chan], sum, 0);
394    }
395}
396#endif
397
398// Calculate the cubic equation which passes through all four points.
399// https://en.wikipedia.org/wiki/Ordinary_least_squares
400// https://en.wikipedia.org/wiki/Polynomial_regression
401static void CubicLeastSquares(Cubic *coeffs, float y0, float y1, float y2, float y3)
402{
403    // Least squares matrix for xs = [0, 1/3, 2/3, 1]
404    // [  1.0   0.0   0.0  0.0 ]
405    // [ -5.5   9.0  -4.5  1.0 ]
406    // [  9.0 -22.5  18.0 -4.5 ]
407    // [ -4.5  13.5 -13.5  4.5 ]
408
409    coeffs->v[0] = y0;
410    coeffs->v[1] = -5.5f * y0 + 9.0f * y1 - 4.5f * y2 + y3;
411    coeffs->v[2] = 9.0f * y0 - 22.5f * y1 + 18.0f * y2 - 4.5f * y3;
412    coeffs->v[3] = -4.5f * y0 + 13.5f * y1 - 13.5f * y2 + 4.5f * y3;
413}
414
415// Zeroth-order modified Bessel function of the first kind
416// https://mathworld.wolfram.com/ModifiedBesselFunctionoftheFirstKind.html
417static float BesselI0(float x)
418{
419    float sum = 0.0f;
420    float i = 1.0f;
421    float t = 1.0f;
422    x *= x * 0.25f;
423
424    while (t >= sum * SDL_FLT_EPSILON) {
425        sum += t;
426        t *= x / (i * i);
427        ++i;
428    }
429
430    return sum;
431}
432
433// Pre-calculate 180 degrees of sin(pi * x) / pi
434// The speedup from this isn't huge, but it also avoids precision issues.
435// If sinf isn't available, SDL_sinf just calls SDL_sin.
436// Know what SDL_sin(SDL_PI_F) equals? Not quite zero.
437static void SincTable(float *table, int len)
438{
439    int i;
440
441    for (i = 0; i < len; ++i) {
442        table[i] = SDL_sinf(i * (SDL_PI_F / len)) / SDL_PI_F;
443    }
444}
445
446// Calculate Sinc(x/y), using a lookup table
447static float Sinc(const float *table, int x, int y)
448{
449    float s = table[x % y];
450    s = ((x / y) & 1) ? -s : s;
451    return (s * y) / x;
452}
453
454static Cubic ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING][RESAMPLER_SAMPLES_PER_FRAME];
455
456static void GenerateResamplerFilter(void)
457{
458    enum
459    {
460        // Generate samples at 3x the target resolution, so that we have samples at [0, 1/3, 2/3, 1] of each position
461        TABLE_SAMPLES_PER_ZERO_CROSSING = RESAMPLER_SAMPLES_PER_ZERO_CROSSING * 3,
462        TABLE_SIZE = RESAMPLER_ZERO_CROSSINGS * TABLE_SAMPLES_PER_ZERO_CROSSING,
463    };
464
465    // if dB > 50, beta=(0.1102 * (dB - 8.7)), according to Matlab.
466    const float dB = 80.0f;
467    const float beta = 0.1102f * (dB - 8.7f);
468    const float bessel_beta = BesselI0(beta);
469    const float lensqr = TABLE_SIZE * TABLE_SIZE;
470
471    int i, j;
472
473    float sinc[TABLE_SAMPLES_PER_ZERO_CROSSING];
474    SincTable(sinc, TABLE_SAMPLES_PER_ZERO_CROSSING);
475
476    // Generate one wing of the filter
477    // https://en.wikipedia.org/wiki/Kaiser_window
478    // https://en.wikipedia.org/wiki/Whittaker%E2%80%93Shannon_interpolation_formula
479    float filter[TABLE_SIZE + 1];
480    filter[0] = 1.0f;
481
482    for (i = 1; i <= TABLE_SIZE; ++i) {
483        float b = BesselI0(beta * SDL_sqrtf((lensqr - (i * i)) / lensqr)) / bessel_beta;
484        float s = Sinc(sinc, i, TABLE_SAMPLES_PER_ZERO_CROSSING);
485        filter[i] = b * s;
486    }
487
488    // Generate the coefficients for each point
489    // When interpolating, the fraction represents how far we are between input samples,
490    // so we need to align the filter by "moving" it to the right.
491    //
492    // For the left wing, this means interpolating "forwards" (away from the center)
493    // For the right wing, this means interpolating "backwards" (towards the center)
494    //
495    // The center of the filter is at the end of the left wing (RESAMPLER_ZERO_CROSSINGS - 1)
496    // The left wing is the filter, but reversed
497    // The right wing is the filter, but offset by 1
498    //
499    // Since the right wing is offset by 1, this just means we interpolate backwards
500    // between the same points, instead of forwards
501    // interp(p[n], p[n+1], t) = interp(p[n+1], p[n+1-1], 1 - t) = interp(p[n+1], p[n], 1 - t)
502    for (i = 0; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) {
503        for (j = 0; j < RESAMPLER_ZERO_CROSSINGS; ++j) {
504            const float *ys = &filter[((j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING) + i) * 3];
505
506            Cubic *fwd = &ResamplerFilter[i][RESAMPLER_ZERO_CROSSINGS - j - 1];
507            Cubic *rev = &ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING - i - 1][RESAMPLER_ZERO_CROSSINGS + j];
508
509            // Calculate the cubic equation of the 4 points
510            CubicLeastSquares(fwd, ys[0], ys[1], ys[2], ys[3]);
511            CubicLeastSquares(rev, ys[3], ys[2], ys[1], ys[0]);
512        }
513    }
514}
515
516typedef void (*ResampleFrameFunc)(const float *src, float *dst, const Cubic *filter, float frac, int chans);
517static ResampleFrameFunc ResampleFrame[8];
518
519// Transpose 4x4 floats
520static void Transpose4x4(Cubic *data)
521{
522    int i, j;
523
524    Cubic temp[4] = { data[0], data[1], data[2], data[3] };
525
526    for (i = 0; i < 4; ++i) {
527        for (j = 0; j < 4; ++j) {
528            data[i].v[j] = temp[j].v[i];
529        }
530    }
531}
532
533static void SetupAudioResampler(void)
534{
535    int i, j;
536    bool transpose = false;
537
538    GenerateResamplerFilter();
539
540#ifdef SDL_SSE_INTRINSICS
541    if (SDL_HasSSE()) {
542        for (i = 0; i < 8; ++i) {
543            ResampleFrame[i] = ResampleFrame_Generic_SSE;
544        }
545        transpose = true;
546    } else
547#endif
548#ifdef SDL_NEON_INTRINSICS
549    if (SDL_HasNEON()) {
550        for (i = 0; i < 8; ++i) {
551            ResampleFrame[i] = ResampleFrame_Generic_NEON;
552        }
553        transpose = true;
554    } else
555#endif
556    {
557        for (i = 0; i < 8; ++i) {
558            ResampleFrame[i] = ResampleFrame_Generic;
559        }
560
561        ResampleFrame[0] = ResampleFrame_Mono;
562        ResampleFrame[1] = ResampleFrame_Stereo;
563    }
564
565    if (transpose) {
566        // Transpose each set of 4 coefficients, to reduce work when resampling
567        for (i = 0; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) {
568            for (j = 0; j + 4 <= RESAMPLER_SAMPLES_PER_FRAME; j += 4) {
569                Transpose4x4(&ResamplerFilter[i][j]);
570            }
571        }
572    }
573}
574
575void SDL_SetupAudioResampler(void)
576{
577    static SDL_InitState init;
578
579    if (SDL_ShouldInit(&init)) {
580        SetupAudioResampler();
581        SDL_SetInitialized(&init, true);
582    }
583}
584
585Sint64 SDL_GetResampleRate(int src_rate, int dst_rate)
586{
587    SDL_assert(src_rate > 0);
588    SDL_assert(dst_rate > 0);
589
590    Sint64 numerator = (Sint64)src_rate << 32;
591    Sint64 denominator = (Sint64)dst_rate;
592
593    // Generally it's expected that `dst_frames = (src_frames * dst_rate) / src_rate`
594    // To match this as closely as possible without infinite precision, always round up the resample rate.
595    // For example, without rounding up, a sample ratio of 2:3 would have `sample_rate = 0xAAAAAAAA`
596    // After 3 frames, the position would be 0x1.FFFFFFFE, meaning we haven't fully consumed the second input frame.
597    // By rounding up to 0xAAAAAAAB, we would instead reach 0x2.00000001, fulling consuming the second frame.
598    // Technically you could say this is kicking the can 0x100000000 steps down the road, but I'm fine with that :)
599    // sample_rate = div_ceil(numerator, denominator)
600    Sint64 sample_rate = ((numerator - 1) / denominator) + 1;
601
602    SDL_assert(sample_rate > 0);
603
604    return sample_rate;
605}
606
607int SDL_GetResamplerHistoryFrames(void)
608{
609    // Even if we aren't currently resampling, make sure to keep enough history in case we need to later.
610
611    return RESAMPLER_MAX_PADDING_FRAMES;
612}
613
614int SDL_GetResamplerPaddingFrames(Sint64 resample_rate)
615{
616    // This must always be <= SDL_GetResamplerHistoryFrames()
617
618    return resample_rate ? RESAMPLER_MAX_PADDING_FRAMES : 0;
619}
620
621// These are not general purpose. They do not check for all possible underflow/overflow
622SDL_FORCE_INLINE bool ResamplerAdd(Sint64 a, Sint64 b, Sint64 *ret)
623{
624    if ((b > 0) && (a > SDL_MAX_SINT64 - b)) {
625        return false;
626    }
627
628    *ret = a + b;
629    return true;
630}
631
632SDL_FORCE_INLINE bool ResamplerMul(Sint64 a, Sint64 b, Sint64 *ret)
633{
634    if ((b > 0) && (a > SDL_MAX_SINT64 / b)) {
635        return false;
636    }
637
638    *ret = a * b;
639    return true;
640}
641
642Sint64 SDL_GetResamplerInputFrames(Sint64 output_frames, Sint64 resample_rate, Sint64 resample_offset)
643{
644    // Calculate the index of the last input frame, then add 1.
645    // ((((output_frames - 1) * resample_rate) + resample_offset) >> 32) + 1
646
647    Sint64 output_offset;
648    if (!ResamplerMul(output_frames, resample_rate, &output_offset) ||
649        !ResamplerAdd(output_offset, -resample_rate + resample_offset + 0x100000000, &output_offset)) {
650        output_offset = SDL_MAX_SINT64;
651    }
652
653    Sint64 input_frames = (Sint64)(Sint32)(output_offset >> 32);
654    input_frames = SDL_max(input_frames, 0);
655
656    return input_frames;
657}
658
659Sint64 SDL_GetResamplerOutputFrames(Sint64 input_frames, Sint64 resample_rate, Sint64 *inout_resample_offset)
660{
661    Sint64 resample_offset = *inout_resample_offset;
662
663    // input_offset = (input_frames << 32) - resample_offset;
664    Sint64 input_offset;
665    if (!ResamplerMul(input_frames, 0x100000000, &input_offset) ||
666        !ResamplerAdd(input_offset, -resample_offset, &input_offset)) {
667        input_offset = SDL_MAX_SINT64;
668    }
669
670    // output_frames = div_ceil(input_offset, resample_rate)
671    Sint64 output_frames = (input_offset > 0) ? ((input_offset - 1) / resample_rate) + 1 : 0;
672
673    *inout_resample_offset = (output_frames * resample_rate) - input_offset;
674
675    return output_frames;
676}
677
678void SDL_ResampleAudio(int chans, const float *src, int inframes, float *dst, int outframes,
679                       Sint64 resample_rate, Sint64 *inout_resample_offset)
680{
681    int i;
682    Sint64 srcpos = *inout_resample_offset;
683    ResampleFrameFunc resample_frame = ResampleFrame[chans - 1];
684
685    SDL_assert(resample_rate > 0);
686
687    src -= (RESAMPLER_ZERO_CROSSINGS - 1) * chans;
688
689    for (i = 0; i < outframes; ++i) {
690        int srcindex = (int)(Sint32)(srcpos >> 32);
691        Uint32 srcfraction = (Uint32)(srcpos & 0xFFFFFFFF);
692        srcpos += resample_rate;
693
694        SDL_assert(srcindex >= -1 && srcindex < inframes);
695
696        const Cubic *filter = ResamplerFilter[srcfraction >> RESAMPLER_FILTER_INTERP_BITS];
697        const float frac = (float)(srcfraction & (RESAMPLER_FILTER_INTERP_RANGE - 1)) * (1.0f / RESAMPLER_FILTER_INTERP_RANGE);
698
699        const float *frame = &src[srcindex * chans];
700        resample_frame(frame, dst, filter, frac, chans);
701
702        dst += chans;
703    }
704
705    *inout_resample_offset = srcpos - ((Sint64)inframes << 32);
706}
707
[FILE END]
(C) 2025 0x4248
(C) 2025 4248 Media and 4248 Systems, All part of 0x4248
See LICENCE files for more information. Not all files are by 0x4248 always check Licencing.