Atlas - SDL_audioresample.c

Home / ext / SDL / src / audio Lines: 1 | Size: 22872 bytes [Download] [Show on GitHub] [Search similar files] [Raw] [Raw (proxy)]
[FILE BEGIN]
1/* 2 Simple DirectMedia Layer 3 Copyright (C) 1997-2025 Sam Lantinga <[email protected]> 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any damages 7 arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any purpose, 10 including commercial applications, and to alter it and redistribute it 11 freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must not 14 claim that you wrote the original software. If you use this software 15 in a product, an acknowledgment in the product documentation would be 16 appreciated but is not required. 17 2. Altered source versions must be plainly marked as such, and must not be 18 misrepresented as being the original software. 19 3. This notice may not be removed or altered from any source distribution. 20*/ 21#include "SDL_internal.h" 22 23#include "SDL_sysaudio.h" 24 25#include "SDL_audioresample.h" 26 27// SDL's resampler uses a "bandlimited interpolation" algorithm: 28// https://ccrma.stanford.edu/~jos/resample/ 29 30// TODO: Support changing this at runtime? 31#if defined(SDL_SSE_INTRINSICS) || defined(SDL_NEON_INTRINSICS) 32// In <current year>, SSE is basically mandatory anyway 33// We want RESAMPLER_SAMPLES_PER_FRAME to be a multiple of 4, to make SIMD easier 34#define RESAMPLER_ZERO_CROSSINGS 6 35#else 36#define RESAMPLER_ZERO_CROSSINGS 5 37#endif 38 39#define RESAMPLER_SAMPLES_PER_FRAME (RESAMPLER_ZERO_CROSSINGS * 2) 40 41// For a given srcpos, `srcpos + frame` are sampled, where `-RESAMPLER_ZERO_CROSSINGS < frame <= RESAMPLER_ZERO_CROSSINGS`. 42// Note, when upsampling, it is also possible to start sampling from `srcpos = -1`. 43#define RESAMPLER_MAX_PADDING_FRAMES (RESAMPLER_ZERO_CROSSINGS + 1) 44 45// More bits gives more precision, at the cost of a larger table. 46#define RESAMPLER_BITS_PER_ZERO_CROSSING 3 47#define RESAMPLER_SAMPLES_PER_ZERO_CROSSING (1 << RESAMPLER_BITS_PER_ZERO_CROSSING) 48#define RESAMPLER_FILTER_INTERP_BITS (32 - RESAMPLER_BITS_PER_ZERO_CROSSING) 49#define RESAMPLER_FILTER_INTERP_RANGE (1 << RESAMPLER_FILTER_INTERP_BITS) 50 51// ResampleFrame is just a vector/matrix/matrix multiplication. 52// It performs cubic interpolation of the filter, then multiplies that with the input. 53// dst = [1, frac, frac^2, frac^3] * filter * src 54 55// Cubic Polynomial 56typedef union Cubic 57{ 58 float v[4]; 59 60#ifdef SDL_SSE_INTRINSICS 61 // Aligned loads can be used directly as memory operands for mul/add 62 __m128 v128; 63#endif 64 65#ifdef SDL_NEON_INTRINSICS 66 float32x4_t v128; 67#endif 68 69} Cubic; 70 71static void ResampleFrame_Generic(const float *src, float *dst, const Cubic *filter, float frac, int chans) 72{ 73 const float frac2 = frac * frac; 74 const float frac3 = frac * frac2; 75 76 int i, chan; 77 float scales[RESAMPLER_SAMPLES_PER_FRAME]; 78 79 for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) { 80 scales[i] = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3); 81 } 82 83 for (chan = 0; chan < chans; ++chan) { 84 float out = 0.0f; 85 86 for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i) { 87 out += src[i * chans + chan] * scales[i]; 88 } 89 90 dst[chan] = out; 91 } 92} 93 94static void ResampleFrame_Mono(const float *src, float *dst, const Cubic *filter, float frac, int chans) 95{ 96 const float frac2 = frac * frac; 97 const float frac3 = frac * frac2; 98 99 int i; 100 float out = 0.0f; 101 102 for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) { 103 // Interpolate between the nearest two filters 104 const float scale = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3); 105 106 out += src[i] * scale; 107 } 108 109 dst[0] = out; 110} 111 112static void ResampleFrame_Stereo(const float *src, float *dst, const Cubic *filter, float frac, int chans) 113{ 114 const float frac2 = frac * frac; 115 const float frac3 = frac * frac2; 116 117 int i; 118 float out0 = 0.0f; 119 float out1 = 0.0f; 120 121 for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) { 122 // Interpolate between the nearest two filters 123 const float scale = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3); 124 125 out0 += src[i * 2 + 0] * scale; 126 out1 += src[i * 2 + 1] * scale; 127 } 128 129 dst[0] = out0; 130 dst[1] = out1; 131} 132 133#ifdef SDL_SSE_INTRINSICS 134#define sdl_madd_ps(a, b, c) _mm_add_ps(a, _mm_mul_ps(b, c)) // Not-so-fused multiply-add 135 136static void SDL_TARGETING("sse") ResampleFrame_Generic_SSE(const float *src, float *dst, const Cubic *filter, float frac, int chans) 137{ 138#if RESAMPLER_SAMPLES_PER_FRAME != 12 139#error Invalid samples per frame 140#endif 141 142 __m128 f0, f1, f2; 143 144 { 145 const __m128 frac1 = _mm_set1_ps(frac); 146 const __m128 frac2 = _mm_mul_ps(frac1, frac1); 147 const __m128 frac3 = _mm_mul_ps(frac1, frac2); 148 149// Transposed in SetupAudioResampler 150// Explicitly use _mm_load_ps to workaround ICE in GCC 4.9.4 accessing Cubic.v128 151#define X(out) \ 152 out = _mm_load_ps(filter[0].v); \ 153 out = sdl_madd_ps(out, frac1, _mm_load_ps(filter[1].v)); \ 154 out = sdl_madd_ps(out, frac2, _mm_load_ps(filter[2].v)); \ 155 out = sdl_madd_ps(out, frac3, _mm_load_ps(filter[3].v)); \ 156 filter += 4 157 158 X(f0); 159 X(f1); 160 X(f2); 161 162#undef X 163 } 164 165 if (chans == 2) { 166 // Duplicate each of the filter elements and multiply by the input 167 // Use two accumulators to improve throughput 168 __m128 out0 = _mm_mul_ps(_mm_loadu_ps(src + 0), _mm_unpacklo_ps(f0, f0)); 169 __m128 out1 = _mm_mul_ps(_mm_loadu_ps(src + 4), _mm_unpackhi_ps(f0, f0)); 170 out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + 8), _mm_unpacklo_ps(f1, f1)); 171 out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + 12), _mm_unpackhi_ps(f1, f1)); 172 out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + 16), _mm_unpacklo_ps(f2, f2)); 173 out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + 20), _mm_unpackhi_ps(f2, f2)); 174 175 // Add the accumulators together 176 __m128 out = _mm_add_ps(out0, out1); 177 178 // Add the lower and upper pairs together 179 out = _mm_add_ps(out, _mm_movehl_ps(out, out)); 180 181 // Store the result 182 _mm_storel_pi((__m64 *)dst, out); 183 return; 184 } 185 186 if (chans == 1) { 187 // Multiply the filter by the input 188 __m128 out = _mm_mul_ps(f0, _mm_loadu_ps(src + 0)); 189 out = sdl_madd_ps(out, f1, _mm_loadu_ps(src + 4)); 190 out = sdl_madd_ps(out, f2, _mm_loadu_ps(src + 8)); 191 192 // Horizontal sum 193 __m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(2, 3, 0, 1)); 194 out = _mm_add_ps(out, shuf); 195 out = _mm_add_ss(out, _mm_movehl_ps(shuf, out)); 196 197 _mm_store_ss(dst, out); 198 return; 199 } 200 201 int chan = 0; 202 203 // Process 4 channels at once 204 for (; chan + 4 <= chans; chan += 4) { 205 const float *in = &src[chan]; 206 __m128 out0 = _mm_setzero_ps(); 207 __m128 out1 = _mm_setzero_ps(); 208 209#define X(a, b, out) \ 210 out = sdl_madd_ps(out, _mm_loadu_ps(in), _mm_shuffle_ps(a, a, _MM_SHUFFLE(b, b, b, b))); \ 211 in += chans 212 213#define Y(a) \ 214 X(a, 0, out0); \ 215 X(a, 1, out1); \ 216 X(a, 2, out0); \ 217 X(a, 3, out1) 218 219 Y(f0); 220 Y(f1); 221 Y(f2); 222 223#undef X 224#undef Y 225 226 // Add the accumulators together 227 __m128 out = _mm_add_ps(out0, out1); 228 229 _mm_storeu_ps(&dst[chan], out); 230 } 231 232 // Process the remaining channels one at a time. 233 // Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times). 234 // Without vgatherdps (AVX2), this gets quite messy. 235 for (; chan < chans; ++chan) { 236 const float *in = &src[chan]; 237 __m128 v0, v1, v2; 238 239#define X(x) \ 240 x = _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans)); \ 241 in += chans + chans; \ 242 x = _mm_movelh_ps(x, _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans))); \ 243 in += chans + chans 244 245 X(v0); 246 X(v1); 247 X(v2); 248 249#undef X 250 251 __m128 out = _mm_mul_ps(f0, v0); 252 out = sdl_madd_ps(out, f1, v1); 253 out = sdl_madd_ps(out, f2, v2); 254 255 // Horizontal sum 256 __m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(2, 3, 0, 1)); 257 out = _mm_add_ps(out, shuf); 258 out = _mm_add_ss(out, _mm_movehl_ps(shuf, out)); 259 260 _mm_store_ss(&dst[chan], out); 261 } 262} 263 264#undef sdl_madd_ps 265#endif 266 267#ifdef SDL_NEON_INTRINSICS 268static void ResampleFrame_Generic_NEON(const float *src, float *dst, const Cubic *filter, float frac, int chans) 269{ 270#if RESAMPLER_SAMPLES_PER_FRAME != 12 271#error Invalid samples per frame 272#endif 273 274 float32x4_t f0, f1, f2; 275 276 { 277 const float32x4_t frac1 = vdupq_n_f32(frac); 278 const float32x4_t frac2 = vmulq_f32(frac1, frac1); 279 const float32x4_t frac3 = vmulq_f32(frac1, frac2); 280 281// Transposed in SetupAudioResampler 282#define X(out) \ 283 out = vmlaq_f32(vmlaq_f32(vmlaq_f32(filter[0].v128, filter[1].v128, frac1), filter[2].v128, frac2), filter[3].v128, frac3); \ 284 filter += 4 285 286 X(f0); 287 X(f1); 288 X(f2); 289 290#undef X 291 } 292 293 if (chans == 2) { 294 float32x4x2_t g0 = vzipq_f32(f0, f0); 295 float32x4x2_t g1 = vzipq_f32(f1, f1); 296 float32x4x2_t g2 = vzipq_f32(f2, f2); 297 298 // Duplicate each of the filter elements and multiply by the input 299 // Use two accumulators to improve throughput 300 float32x4_t out0 = vmulq_f32(vld1q_f32(src + 0), g0.val[0]); 301 float32x4_t out1 = vmulq_f32(vld1q_f32(src + 4), g0.val[1]); 302 out0 = vmlaq_f32(out0, vld1q_f32(src + 8), g1.val[0]); 303 out1 = vmlaq_f32(out1, vld1q_f32(src + 12), g1.val[1]); 304 out0 = vmlaq_f32(out0, vld1q_f32(src + 16), g2.val[0]); 305 out1 = vmlaq_f32(out1, vld1q_f32(src + 20), g2.val[1]); 306 307 // Add the accumulators together 308 out0 = vaddq_f32(out0, out1); 309 310 // Add the lower and upper pairs together 311 float32x2_t out = vadd_f32(vget_low_f32(out0), vget_high_f32(out0)); 312 313 // Store the result 314 vst1_f32(dst, out); 315 return; 316 } 317 318 if (chans == 1) { 319 // Multiply the filter by the input 320 float32x4_t out = vmulq_f32(f0, vld1q_f32(src + 0)); 321 out = vmlaq_f32(out, f1, vld1q_f32(src + 4)); 322 out = vmlaq_f32(out, f2, vld1q_f32(src + 8)); 323 324 // Horizontal sum 325 float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out)); 326 sum = vpadd_f32(sum, sum); 327 328 vst1_lane_f32(dst, sum, 0); 329 return; 330 } 331 332 int chan = 0; 333 334 // Process 4 channels at once 335 for (; chan + 4 <= chans; chan += 4) { 336 const float *in = &src[chan]; 337 float32x4_t out0 = vdupq_n_f32(0); 338 float32x4_t out1 = vdupq_n_f32(0); 339 340#define X(a, b, out) \ 341 out = vmlaq_f32(out, vld1q_f32(in), vdupq_lane_f32(a, b)); \ 342 in += chans 343 344#define Y(a) \ 345 X(vget_low_f32(a), 0, out0); \ 346 X(vget_low_f32(a), 1, out1); \ 347 X(vget_high_f32(a), 0, out0); \ 348 X(vget_high_f32(a), 1, out1) 349 350 Y(f0); 351 Y(f1); 352 Y(f2); 353 354#undef X 355#undef Y 356 357 // Add the accumulators together 358 float32x4_t out = vaddq_f32(out0, out1); 359 360 vst1q_f32(&dst[chan], out); 361 } 362 363 // Process the remaining channels one at a time. 364 // Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times). 365 for (; chan < chans; ++chan) { 366 const float *in = &src[chan]; 367 float32x4_t v0, v1, v2; 368 369#define X(x) \ 370 x = vld1q_dup_f32(in); \ 371 in += chans; \ 372 x = vld1q_lane_f32(in, x, 1); \ 373 in += chans; \ 374 x = vld1q_lane_f32(in, x, 2); \ 375 in += chans; \ 376 x = vld1q_lane_f32(in, x, 3); \ 377 in += chans 378 379 X(v0); 380 X(v1); 381 X(v2); 382 383#undef X 384 385 float32x4_t out = vmulq_f32(f0, v0); 386 out = vmlaq_f32(out, f1, v1); 387 out = vmlaq_f32(out, f2, v2); 388 389 // Horizontal sum 390 float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out)); 391 sum = vpadd_f32(sum, sum); 392 393 vst1_lane_f32(&dst[chan], sum, 0); 394 } 395} 396#endif 397 398// Calculate the cubic equation which passes through all four points. 399// https://en.wikipedia.org/wiki/Ordinary_least_squares 400// https://en.wikipedia.org/wiki/Polynomial_regression 401static void CubicLeastSquares(Cubic *coeffs, float y0, float y1, float y2, float y3) 402{ 403 // Least squares matrix for xs = [0, 1/3, 2/3, 1] 404 // [ 1.0 0.0 0.0 0.0 ] 405 // [ -5.5 9.0 -4.5 1.0 ] 406 // [ 9.0 -22.5 18.0 -4.5 ] 407 // [ -4.5 13.5 -13.5 4.5 ] 408 409 coeffs->v[0] = y0; 410 coeffs->v[1] = -5.5f * y0 + 9.0f * y1 - 4.5f * y2 + y3; 411 coeffs->v[2] = 9.0f * y0 - 22.5f * y1 + 18.0f * y2 - 4.5f * y3; 412 coeffs->v[3] = -4.5f * y0 + 13.5f * y1 - 13.5f * y2 + 4.5f * y3; 413} 414 415// Zeroth-order modified Bessel function of the first kind 416// https://mathworld.wolfram.com/ModifiedBesselFunctionoftheFirstKind.html 417static float BesselI0(float x) 418{ 419 float sum = 0.0f; 420 float i = 1.0f; 421 float t = 1.0f; 422 x *= x * 0.25f; 423 424 while (t >= sum * SDL_FLT_EPSILON) { 425 sum += t; 426 t *= x / (i * i); 427 ++i; 428 } 429 430 return sum; 431} 432 433// Pre-calculate 180 degrees of sin(pi * x) / pi 434// The speedup from this isn't huge, but it also avoids precision issues. 435// If sinf isn't available, SDL_sinf just calls SDL_sin. 436// Know what SDL_sin(SDL_PI_F) equals? Not quite zero. 437static void SincTable(float *table, int len) 438{ 439 int i; 440 441 for (i = 0; i < len; ++i) { 442 table[i] = SDL_sinf(i * (SDL_PI_F / len)) / SDL_PI_F; 443 } 444} 445 446// Calculate Sinc(x/y), using a lookup table 447static float Sinc(const float *table, int x, int y) 448{ 449 float s = table[x % y]; 450 s = ((x / y) & 1) ? -s : s; 451 return (s * y) / x; 452} 453 454static Cubic ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING][RESAMPLER_SAMPLES_PER_FRAME]; 455 456static void GenerateResamplerFilter(void) 457{ 458 enum 459 { 460 // Generate samples at 3x the target resolution, so that we have samples at [0, 1/3, 2/3, 1] of each position 461 TABLE_SAMPLES_PER_ZERO_CROSSING = RESAMPLER_SAMPLES_PER_ZERO_CROSSING * 3, 462 TABLE_SIZE = RESAMPLER_ZERO_CROSSINGS * TABLE_SAMPLES_PER_ZERO_CROSSING, 463 }; 464 465 // if dB > 50, beta=(0.1102 * (dB - 8.7)), according to Matlab. 466 const float dB = 80.0f; 467 const float beta = 0.1102f * (dB - 8.7f); 468 const float bessel_beta = BesselI0(beta); 469 const float lensqr = TABLE_SIZE * TABLE_SIZE; 470 471 int i, j; 472 473 float sinc[TABLE_SAMPLES_PER_ZERO_CROSSING]; 474 SincTable(sinc, TABLE_SAMPLES_PER_ZERO_CROSSING); 475 476 // Generate one wing of the filter 477 // https://en.wikipedia.org/wiki/Kaiser_window 478 // https://en.wikipedia.org/wiki/Whittaker%E2%80%93Shannon_interpolation_formula 479 float filter[TABLE_SIZE + 1]; 480 filter[0] = 1.0f; 481 482 for (i = 1; i <= TABLE_SIZE; ++i) { 483 float b = BesselI0(beta * SDL_sqrtf((lensqr - (i * i)) / lensqr)) / bessel_beta; 484 float s = Sinc(sinc, i, TABLE_SAMPLES_PER_ZERO_CROSSING); 485 filter[i] = b * s; 486 } 487 488 // Generate the coefficients for each point 489 // When interpolating, the fraction represents how far we are between input samples, 490 // so we need to align the filter by "moving" it to the right. 491 // 492 // For the left wing, this means interpolating "forwards" (away from the center) 493 // For the right wing, this means interpolating "backwards" (towards the center) 494 // 495 // The center of the filter is at the end of the left wing (RESAMPLER_ZERO_CROSSINGS - 1) 496 // The left wing is the filter, but reversed 497 // The right wing is the filter, but offset by 1 498 // 499 // Since the right wing is offset by 1, this just means we interpolate backwards 500 // between the same points, instead of forwards 501 // interp(p[n], p[n+1], t) = interp(p[n+1], p[n+1-1], 1 - t) = interp(p[n+1], p[n], 1 - t) 502 for (i = 0; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) { 503 for (j = 0; j < RESAMPLER_ZERO_CROSSINGS; ++j) { 504 const float *ys = &filter[((j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING) + i) * 3]; 505 506 Cubic *fwd = &ResamplerFilter[i][RESAMPLER_ZERO_CROSSINGS - j - 1]; 507 Cubic *rev = &ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING - i - 1][RESAMPLER_ZERO_CROSSINGS + j]; 508 509 // Calculate the cubic equation of the 4 points 510 CubicLeastSquares(fwd, ys[0], ys[1], ys[2], ys[3]); 511 CubicLeastSquares(rev, ys[3], ys[2], ys[1], ys[0]); 512 } 513 } 514} 515 516typedef void (*ResampleFrameFunc)(const float *src, float *dst, const Cubic *filter, float frac, int chans); 517static ResampleFrameFunc ResampleFrame[8]; 518 519// Transpose 4x4 floats 520static void Transpose4x4(Cubic *data) 521{ 522 int i, j; 523 524 Cubic temp[4] = { data[0], data[1], data[2], data[3] }; 525 526 for (i = 0; i < 4; ++i) { 527 for (j = 0; j < 4; ++j) { 528 data[i].v[j] = temp[j].v[i]; 529 } 530 } 531} 532 533static void SetupAudioResampler(void) 534{ 535 int i, j; 536 bool transpose = false; 537 538 GenerateResamplerFilter(); 539 540#ifdef SDL_SSE_INTRINSICS 541 if (SDL_HasSSE()) { 542 for (i = 0; i < 8; ++i) { 543 ResampleFrame[i] = ResampleFrame_Generic_SSE; 544 } 545 transpose = true; 546 } else 547#endif 548#ifdef SDL_NEON_INTRINSICS 549 if (SDL_HasNEON()) { 550 for (i = 0; i < 8; ++i) { 551 ResampleFrame[i] = ResampleFrame_Generic_NEON; 552 } 553 transpose = true; 554 } else 555#endif 556 { 557 for (i = 0; i < 8; ++i) { 558 ResampleFrame[i] = ResampleFrame_Generic; 559 } 560 561 ResampleFrame[0] = ResampleFrame_Mono; 562 ResampleFrame[1] = ResampleFrame_Stereo; 563 } 564 565 if (transpose) { 566 // Transpose each set of 4 coefficients, to reduce work when resampling 567 for (i = 0; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) { 568 for (j = 0; j + 4 <= RESAMPLER_SAMPLES_PER_FRAME; j += 4) { 569 Transpose4x4(&ResamplerFilter[i][j]); 570 } 571 } 572 } 573} 574 575void SDL_SetupAudioResampler(void) 576{ 577 static SDL_InitState init; 578 579 if (SDL_ShouldInit(&init)) { 580 SetupAudioResampler(); 581 SDL_SetInitialized(&init, true); 582 } 583} 584 585Sint64 SDL_GetResampleRate(int src_rate, int dst_rate) 586{ 587 SDL_assert(src_rate > 0); 588 SDL_assert(dst_rate > 0); 589 590 Sint64 numerator = (Sint64)src_rate << 32; 591 Sint64 denominator = (Sint64)dst_rate; 592 593 // Generally it's expected that `dst_frames = (src_frames * dst_rate) / src_rate` 594 // To match this as closely as possible without infinite precision, always round up the resample rate. 595 // For example, without rounding up, a sample ratio of 2:3 would have `sample_rate = 0xAAAAAAAA` 596 // After 3 frames, the position would be 0x1.FFFFFFFE, meaning we haven't fully consumed the second input frame. 597 // By rounding up to 0xAAAAAAAB, we would instead reach 0x2.00000001, fulling consuming the second frame. 598 // Technically you could say this is kicking the can 0x100000000 steps down the road, but I'm fine with that :) 599 // sample_rate = div_ceil(numerator, denominator) 600 Sint64 sample_rate = ((numerator - 1) / denominator) + 1; 601 602 SDL_assert(sample_rate > 0); 603 604 return sample_rate; 605} 606 607int SDL_GetResamplerHistoryFrames(void) 608{ 609 // Even if we aren't currently resampling, make sure to keep enough history in case we need to later. 610 611 return RESAMPLER_MAX_PADDING_FRAMES; 612} 613 614int SDL_GetResamplerPaddingFrames(Sint64 resample_rate) 615{ 616 // This must always be <= SDL_GetResamplerHistoryFrames() 617 618 return resample_rate ? RESAMPLER_MAX_PADDING_FRAMES : 0; 619} 620 621// These are not general purpose. They do not check for all possible underflow/overflow 622SDL_FORCE_INLINE bool ResamplerAdd(Sint64 a, Sint64 b, Sint64 *ret) 623{ 624 if ((b > 0) && (a > SDL_MAX_SINT64 - b)) { 625 return false; 626 } 627 628 *ret = a + b; 629 return true; 630} 631 632SDL_FORCE_INLINE bool ResamplerMul(Sint64 a, Sint64 b, Sint64 *ret) 633{ 634 if ((b > 0) && (a > SDL_MAX_SINT64 / b)) { 635 return false; 636 } 637 638 *ret = a * b; 639 return true; 640} 641 642Sint64 SDL_GetResamplerInputFrames(Sint64 output_frames, Sint64 resample_rate, Sint64 resample_offset) 643{ 644 // Calculate the index of the last input frame, then add 1. 645 // ((((output_frames - 1) * resample_rate) + resample_offset) >> 32) + 1 646 647 Sint64 output_offset; 648 if (!ResamplerMul(output_frames, resample_rate, &output_offset) || 649 !ResamplerAdd(output_offset, -resample_rate + resample_offset + 0x100000000, &output_offset)) { 650 output_offset = SDL_MAX_SINT64; 651 } 652 653 Sint64 input_frames = (Sint64)(Sint32)(output_offset >> 32); 654 input_frames = SDL_max(input_frames, 0); 655 656 return input_frames; 657} 658 659Sint64 SDL_GetResamplerOutputFrames(Sint64 input_frames, Sint64 resample_rate, Sint64 *inout_resample_offset) 660{ 661 Sint64 resample_offset = *inout_resample_offset; 662 663 // input_offset = (input_frames << 32) - resample_offset; 664 Sint64 input_offset; 665 if (!ResamplerMul(input_frames, 0x100000000, &input_offset) || 666 !ResamplerAdd(input_offset, -resample_offset, &input_offset)) { 667 input_offset = SDL_MAX_SINT64; 668 } 669 670 // output_frames = div_ceil(input_offset, resample_rate) 671 Sint64 output_frames = (input_offset > 0) ? ((input_offset - 1) / resample_rate) + 1 : 0; 672 673 *inout_resample_offset = (output_frames * resample_rate) - input_offset; 674 675 return output_frames; 676} 677 678void SDL_ResampleAudio(int chans, const float *src, int inframes, float *dst, int outframes, 679 Sint64 resample_rate, Sint64 *inout_resample_offset) 680{ 681 int i; 682 Sint64 srcpos = *inout_resample_offset; 683 ResampleFrameFunc resample_frame = ResampleFrame[chans - 1]; 684 685 SDL_assert(resample_rate > 0); 686 687 src -= (RESAMPLER_ZERO_CROSSINGS - 1) * chans; 688 689 for (i = 0; i < outframes; ++i) { 690 int srcindex = (int)(Sint32)(srcpos >> 32); 691 Uint32 srcfraction = (Uint32)(srcpos & 0xFFFFFFFF); 692 srcpos += resample_rate; 693 694 SDL_assert(srcindex >= -1 && srcindex < inframes); 695 696 const Cubic *filter = ResamplerFilter[srcfraction >> RESAMPLER_FILTER_INTERP_BITS]; 697 const float frac = (float)(srcfraction & (RESAMPLER_FILTER_INTERP_RANGE - 1)) * (1.0f / RESAMPLER_FILTER_INTERP_RANGE); 698 699 const float *frame = &src[srcindex * chans]; 700 resample_frame(frame, dst, filter, frac, chans); 701 702 dst += chans; 703 } 704 705 *inout_resample_offset = srcpos - ((Sint64)inframes << 32); 706} 707
[FILE END]
(C) 2025 0x4248 (C) 2025 4248 Media and 4248 Systems, All part of 0x4248 See LICENCE files for more information. Not all files are by 0x4248 always check Licencing.