Atlas - SDL_blit_N.c

Home / ext / SDL / src / video Lines: 7 | Size: 124649 bytes [Download] [Show on GitHub] [Search similar files] [Raw] [Raw (proxy)]
[FILE BEGIN]
1/* 2 Simple DirectMedia Layer 3 Copyright (C) 1997-2025 Sam Lantinga <[email protected]> 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any damages 7 arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any purpose, 10 including commercial applications, and to alter it and redistribute it 11 freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must not 14 claim that you wrote the original software. If you use this software 15 in a product, an acknowledgment in the product documentation would be 16 appreciated but is not required. 17 2. Altered source versions must be plainly marked as such, and must not be 18 misrepresented as being the original software. 19 3. This notice may not be removed or altered from any source distribution. 20*/ 21#include "SDL_internal.h" 22 23#ifdef SDL_HAVE_BLIT_N 24 25#include "SDL_pixels_c.h" 26#include "SDL_surface_c.h" 27#include "SDL_blit_copy.h" 28 29// General optimized routines that write char by char 30#define HAVE_FAST_WRITE_INT8 1 31 32// On some CPU, it's slower than combining and write a word 33#ifdef __MIPS__ 34#undef HAVE_FAST_WRITE_INT8 35#define HAVE_FAST_WRITE_INT8 0 36#endif 37 38// Functions to blit from N-bit surfaces to other surfaces 39 40#define BLIT_FEATURE_NONE 0x00 41#define BLIT_FEATURE_HAS_SSE41 0x01 42#define BLIT_FEATURE_HAS_ALTIVEC 0x02 43#define BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH 0x04 44 45#ifdef SDL_ALTIVEC_BLITTERS 46#ifdef SDL_PLATFORM_MACOS 47#include <sys/sysctl.h> 48static size_t GetL3CacheSize(void) 49{ 50 const char key[] = "hw.l3cachesize"; 51 u_int64_t result = 0; 52 size_t typeSize = sizeof(result); 53 54 int err = sysctlbyname(key, &result, &typeSize, NULL, 0); 55 if (0 != err) { 56 return 0; 57 } 58 59 return result; 60} 61#else 62static size_t GetL3CacheSize(void) 63{ 64 // XXX: Just guess G4 65 return 2097152; 66} 67#endif // SDL_PLATFORM_MACOS 68 69#if (defined(SDL_PLATFORM_MACOS) && (__GNUC__ < 4)) 70#define VECUINT8_LITERAL(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ 71 (vector unsigned char)(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) 72#define VECUINT16_LITERAL(a, b, c, d, e, f, g, h) \ 73 (vector unsigned short)(a, b, c, d, e, f, g, h) 74#else 75#define VECUINT8_LITERAL(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ 76 (vector unsigned char) \ 77 { \ 78 a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p \ 79 } 80#define VECUINT16_LITERAL(a, b, c, d, e, f, g, h) \ 81 (vector unsigned short) \ 82 { \ 83 a, b, c, d, e, f, g, h \ 84 } 85#endif 86 87#define UNALIGNED_PTR(x) (((size_t)x) & 0x0000000F) 88#define VSWIZZLE32(a, b, c, d) (vector unsigned char)(0x00 + a, 0x00 + b, 0x00 + c, 0x00 + d, \ 89 0x04 + a, 0x04 + b, 0x04 + c, 0x04 + d, \ 90 0x08 + a, 0x08 + b, 0x08 + c, 0x08 + d, \ 91 0x0C + a, 0x0C + b, 0x0C + c, 0x0C + d) 92 93#define MAKE8888(dstfmt, r, g, b, a) \ 94 (((r << dstfmt->Rshift) & dstfmt->Rmask) | \ 95 ((g << dstfmt->Gshift) & dstfmt->Gmask) | \ 96 ((b << dstfmt->Bshift) & dstfmt->Bmask) | \ 97 ((a << dstfmt->Ashift) & dstfmt->Amask)) 98 99/* 100 * Data Stream Touch...Altivec cache prefetching. 101 * 102 * Don't use this on a G5...however, the speed boost is very significant 103 * on a G4. 104 */ 105#define DST_CHAN_SRC 1 106#define DST_CHAN_DEST 2 107 108// macro to set DST control word value... 109#define DST_CTRL(size, count, stride) \ 110 (((size) << 24) | ((count) << 16) | (stride)) 111 112#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ 113 ? vec_lvsl(0, src) \ 114 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) 115 116// Calculate the permute vector used for 32->32 swizzling 117static vector unsigned char calc_swizzle32(const SDL_PixelFormatDetails *srcfmt, const SDL_PixelFormatDetails *dstfmt) 118{ 119 /* 120 * We have to assume that the bits that aren't used by other 121 * colors is alpha, and it's one complete byte, since some formats 122 * leave alpha with a zero mask, but we should still swizzle the bits. 123 */ 124 // ARGB 125 static const SDL_PixelFormatDetails default_pixel_format = { 126 SDL_PIXELFORMAT_ARGB8888, 0, 0, { 0, 0 }, 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, 8, 8, 8, 8, 16, 8, 0, 24 127 }; 128 const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00, 129 0x04, 0x04, 0x04, 0x04, 130 0x08, 0x08, 0x08, 0x08, 131 0x0C, 0x0C, 0x0C, 132 0x0C); 133 vector unsigned char vswiz; 134 vector unsigned int srcvec; 135 Uint32 rmask, gmask, bmask, amask; 136 137 if (!srcfmt) { 138 srcfmt = &default_pixel_format; 139 } 140 if (!dstfmt) { 141 dstfmt = &default_pixel_format; 142 } 143 144#define RESHIFT(X) (3 - ((X) >> 3)) 145 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); 146 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); 147 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); 148 149 // Use zero for alpha if either surface doesn't have alpha 150 if (dstfmt->Amask) { 151 amask = 152 ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); 153 } else { 154 amask = 155 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 156 0xFFFFFFFF); 157 } 158#undef RESHIFT 159 160 ((unsigned int *)(char *)&srcvec)[0] = (rmask | gmask | bmask | amask); 161 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); 162 return (vswiz); 163} 164 165#if SDL_BYTEORDER == SDL_LIL_ENDIAN 166// reorder bytes for PowerPC little endian 167static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute) 168{ 169 /* The result vector of calc_swizzle32 reorder bytes using vec_perm. 170 The LE transformation for vec_perm has an implicit assumption 171 that the permutation is being used to reorder vector elements, 172 not to reorder bytes within those elements. 173 Unfortunately the result order is not the expected one for powerpc 174 little endian when the two first vector parameters of vec_perm are 175 not of type 'vector char'. This is because the numbering from the 176 left for BE, and numbering from the right for LE, produces a 177 different interpretation of what the odd and even lanes are. 178 Refer to fedora bug 1392465 179 */ 180 181 const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL( 182 0x01, 0x00, 0x03, 0x02, 183 0x05, 0x04, 0x07, 0x06, 184 0x09, 0x08, 0x0B, 0x0A, 185 0x0D, 0x0C, 0x0F, 0x0E); 186 187 vector unsigned char vswiz_ppc64le; 188 vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder); 189 return (vswiz_ppc64le); 190} 191#endif 192 193static void Blit_XRGB8888_RGB565(SDL_BlitInfo *info); 194static void Blit_XRGB8888_RGB565Altivec(SDL_BlitInfo *info) 195{ 196 int height = info->dst_h; 197 Uint8 *src = (Uint8 *)info->src; 198 int srcskip = info->src_skip; 199 Uint8 *dst = (Uint8 *)info->dst; 200 int dstskip = info->dst_skip; 201 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 202 vector unsigned char valpha = vec_splat_u8(0); 203 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); 204 vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06, 205 0x00, 0x0a, 0x00, 0x0e, 206 0x00, 0x12, 0x00, 0x16, 207 0x00, 0x1a, 0x00, 0x1e); 208 vector unsigned short v1 = vec_splat_u16(1); 209 vector unsigned short v3 = vec_splat_u16(3); 210 vector unsigned short v3f = 211 VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f, 212 0x003f, 0x003f, 0x003f, 0x003f); 213 vector unsigned short vfc = 214 VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc, 215 0x00fc, 0x00fc, 0x00fc, 0x00fc); 216 vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); 217 vf800 = vec_sl(vf800, vec_splat_u16(8)); 218 219 while (height--) { 220 vector unsigned char valigner; 221 vector unsigned char voverflow; 222 vector unsigned char vsrc; 223 224 int width = info->dst_w; 225 int extrawidth; 226 227 // do scalar until we can align... 228#define ONE_PIXEL_BLEND(condition, widthvar) \ 229 while (condition) { \ 230 Uint32 Pixel; \ 231 unsigned sR, sG, sB, sA; \ 232 DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \ 233 sR, sG, sB, sA); \ 234 *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \ 235 ((sG << 3) & 0x000007E0) | \ 236 ((sB >> 3) & 0x0000001F)); \ 237 dst += 2; \ 238 src += 4; \ 239 widthvar--; \ 240 } 241 242 ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width); 243 244 // After all that work, here's the vector part! 245 extrawidth = (width % 8); // trailing unaligned stores 246 width -= extrawidth; 247 vsrc = vec_ld(0, src); 248 valigner = VEC_ALIGNER(src); 249 250 while (width) { 251 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; 252 vector unsigned int vsrc1, vsrc2; 253 vector unsigned char vdst; 254 255 voverflow = vec_ld(15, src); 256 vsrc = vec_perm(vsrc, voverflow, valigner); 257 vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute); 258 src += 16; 259 vsrc = voverflow; 260 voverflow = vec_ld(15, src); 261 vsrc = vec_perm(vsrc, voverflow, valigner); 262 vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute); 263 // 1555 264 vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2); 265 vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge); 266 vgpixel = vec_and(vgpixel, vfc); 267 vgpixel = vec_sl(vgpixel, v3); 268 vrpixel = vec_sl(vpixel, v1); 269 vrpixel = vec_and(vrpixel, vf800); 270 vbpixel = vec_and(vpixel, v3f); 271 vdst = 272 vec_or((vector unsigned char)vrpixel, 273 (vector unsigned char)vgpixel); 274 // 565 275 vdst = vec_or(vdst, (vector unsigned char)vbpixel); 276 vec_st(vdst, 0, dst); 277 278 width -= 8; 279 src += 16; 280 dst += 16; 281 vsrc = voverflow; 282 } 283 284 SDL_assert(width == 0); 285 286 // do scalar until we can align... 287 ONE_PIXEL_BLEND((extrawidth), extrawidth); 288#undef ONE_PIXEL_BLEND 289 290 src += srcskip; // move to next row, accounting for pitch. 291 dst += dstskip; 292 } 293} 294 295#ifdef BROKEN_ALTIVEC_BLITTERS // This doesn't properly expand to the lower destination bits 296static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) 297{ 298 int height = info->dst_h; 299 Uint8 *src = (Uint8 *)info->src; 300 int srcskip = info->src_skip; 301 Uint8 *dst = (Uint8 *)info->dst; 302 int dstskip = info->dst_skip; 303 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 304 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 305 unsigned alpha; 306 vector unsigned char valpha; 307 vector unsigned char vpermute; 308 vector unsigned short vf800; 309 vector unsigned int v8 = vec_splat_u32(8); 310 vector unsigned int v16 = vec_add(v8, v8); 311 vector unsigned short v2 = vec_splat_u16(2); 312 vector unsigned short v3 = vec_splat_u16(3); 313 /* 314 0x10 - 0x1f is the alpha 315 0x00 - 0x0e evens are the red 316 0x01 - 0x0f odds are zero 317 */ 318 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01, 319 0x10, 0x02, 0x01, 0x01, 320 0x10, 0x04, 0x01, 0x01, 321 0x10, 0x06, 0x01, 322 0x01); 323 vector unsigned char vredalpha2 = 324 (vector unsigned char)(vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))); 325 /* 326 0x00 - 0x0f is ARxx ARxx ARxx ARxx 327 0x11 - 0x0f odds are blue 328 */ 329 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11, 330 0x04, 0x05, 0x06, 0x13, 331 0x08, 0x09, 0x0a, 0x15, 332 0x0c, 0x0d, 0x0e, 0x17); 333 vector unsigned char vblue2 = 334 (vector unsigned char)(vec_add((vector unsigned int)vblue1, v8)); 335 /* 336 0x00 - 0x0f is ARxB ARxB ARxB ARxB 337 0x10 - 0x0e evens are green 338 */ 339 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03, 340 0x04, 0x05, 0x12, 0x07, 341 0x08, 0x09, 0x14, 0x0b, 342 0x0c, 0x0d, 0x16, 0x0f); 343 vector unsigned char vgreen2 = 344 (vector unsigned char)(vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))); 345 346 SDL_assert(srcfmt->bytes_per_pixel == 2); 347 SDL_assert(dstfmt->bytes_per_pixel == 4); 348 349 vf800 = (vector unsigned short)vec_splat_u8(-7); 350 vf800 = vec_sl(vf800, vec_splat_u16(8)); 351 352 if (dstfmt->Amask && info->a) { 353 ((unsigned char *)&valpha)[0] = alpha = info->a; 354 valpha = vec_splat(valpha, 0); 355 } else { 356 alpha = 0; 357 valpha = vec_splat_u8(0); 358 } 359 360 vpermute = calc_swizzle32(NULL, dstfmt); 361 while (height--) { 362 vector unsigned char valigner; 363 vector unsigned char voverflow; 364 vector unsigned char vsrc; 365 366 int width = info->dst_w; 367 int extrawidth; 368 369 // do scalar until we can align... 370#define ONE_PIXEL_BLEND(condition, widthvar) \ 371 while (condition) { \ 372 unsigned sR, sG, sB; \ 373 unsigned short Pixel = *((unsigned short *)src); \ 374 RGB_FROM_RGB565(Pixel, sR, sG, sB); \ 375 ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \ 376 src += 2; \ 377 dst += 4; \ 378 widthvar--; \ 379 } 380 ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width); 381 382 // After all that work, here's the vector part! 383 extrawidth = (width % 8); // trailing unaligned stores 384 width -= extrawidth; 385 vsrc = vec_ld(0, src); 386 valigner = VEC_ALIGNER(src); 387 388 while (width) { 389 vector unsigned short vR, vG, vB; 390 vector unsigned char vdst1, vdst2; 391 392 voverflow = vec_ld(15, src); 393 vsrc = vec_perm(vsrc, voverflow, valigner); 394 395 vR = vec_and((vector unsigned short)vsrc, vf800); 396 vB = vec_sl((vector unsigned short)vsrc, v3); 397 vG = vec_sl(vB, v2); 398 399 vdst1 = 400 (vector unsigned char)vec_perm((vector unsigned char)vR, 401 valpha, vredalpha1); 402 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); 403 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); 404 vdst1 = vec_perm(vdst1, valpha, vpermute); 405 vec_st(vdst1, 0, dst); 406 407 vdst2 = 408 (vector unsigned char)vec_perm((vector unsigned char)vR, 409 valpha, vredalpha2); 410 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); 411 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); 412 vdst2 = vec_perm(vdst2, valpha, vpermute); 413 vec_st(vdst2, 16, dst); 414 415 width -= 8; 416 dst += 32; 417 src += 16; 418 vsrc = voverflow; 419 } 420 421 SDL_assert(width == 0); 422 423 // do scalar until we can align... 424 ONE_PIXEL_BLEND((extrawidth), extrawidth); 425#undef ONE_PIXEL_BLEND 426 427 src += srcskip; // move to next row, accounting for pitch. 428 dst += dstskip; 429 } 430} 431 432static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) 433{ 434 int height = info->dst_h; 435 Uint8 *src = (Uint8 *)info->src; 436 int srcskip = info->src_skip; 437 Uint8 *dst = (Uint8 *)info->dst; 438 int dstskip = info->dst_skip; 439 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 440 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 441 unsigned alpha; 442 vector unsigned char valpha; 443 vector unsigned char vpermute; 444 vector unsigned short vf800; 445 vector unsigned int v8 = vec_splat_u32(8); 446 vector unsigned int v16 = vec_add(v8, v8); 447 vector unsigned short v1 = vec_splat_u16(1); 448 vector unsigned short v3 = vec_splat_u16(3); 449 /* 450 0x10 - 0x1f is the alpha 451 0x00 - 0x0e evens are the red 452 0x01 - 0x0f odds are zero 453 */ 454 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01, 455 0x10, 0x02, 0x01, 0x01, 456 0x10, 0x04, 0x01, 0x01, 457 0x10, 0x06, 0x01, 458 0x01); 459 vector unsigned char vredalpha2 = 460 (vector unsigned char)(vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))); 461 /* 462 0x00 - 0x0f is ARxx ARxx ARxx ARxx 463 0x11 - 0x0f odds are blue 464 */ 465 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11, 466 0x04, 0x05, 0x06, 0x13, 467 0x08, 0x09, 0x0a, 0x15, 468 0x0c, 0x0d, 0x0e, 0x17); 469 vector unsigned char vblue2 = 470 (vector unsigned char)(vec_add((vector unsigned int)vblue1, v8)); 471 /* 472 0x00 - 0x0f is ARxB ARxB ARxB ARxB 473 0x10 - 0x0e evens are green 474 */ 475 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03, 476 0x04, 0x05, 0x12, 0x07, 477 0x08, 0x09, 0x14, 0x0b, 478 0x0c, 0x0d, 0x16, 0x0f); 479 vector unsigned char vgreen2 = 480 (vector unsigned char)(vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))); 481 482 SDL_assert(srcfmt->bytes_per_pixel == 2); 483 SDL_assert(dstfmt->bytes_per_pixel == 4); 484 485 vf800 = (vector unsigned short)vec_splat_u8(-7); 486 vf800 = vec_sl(vf800, vec_splat_u16(8)); 487 488 if (dstfmt->Amask && info->a) { 489 ((unsigned char *)&valpha)[0] = alpha = info->a; 490 valpha = vec_splat(valpha, 0); 491 } else { 492 alpha = 0; 493 valpha = vec_splat_u8(0); 494 } 495 496 vpermute = calc_swizzle32(NULL, dstfmt); 497 while (height--) { 498 vector unsigned char valigner; 499 vector unsigned char voverflow; 500 vector unsigned char vsrc; 501 502 int width = info->dst_w; 503 int extrawidth; 504 505 // do scalar until we can align... 506#define ONE_PIXEL_BLEND(condition, widthvar) \ 507 while (condition) { \ 508 unsigned sR, sG, sB; \ 509 unsigned short Pixel = *((unsigned short *)src); \ 510 RGB_FROM_RGB555(Pixel, sR, sG, sB); \ 511 ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \ 512 src += 2; \ 513 dst += 4; \ 514 widthvar--; \ 515 } 516 ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width); 517 518 // After all that work, here's the vector part! 519 extrawidth = (width % 8); // trailing unaligned stores 520 width -= extrawidth; 521 vsrc = vec_ld(0, src); 522 valigner = VEC_ALIGNER(src); 523 524 while (width) { 525 vector unsigned short vR, vG, vB; 526 vector unsigned char vdst1, vdst2; 527 528 voverflow = vec_ld(15, src); 529 vsrc = vec_perm(vsrc, voverflow, valigner); 530 531 vR = vec_and(vec_sl((vector unsigned short)vsrc, v1), vf800); 532 vB = vec_sl((vector unsigned short)vsrc, v3); 533 vG = vec_sl(vB, v3); 534 535 vdst1 = 536 (vector unsigned char)vec_perm((vector unsigned char)vR, 537 valpha, vredalpha1); 538 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); 539 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); 540 vdst1 = vec_perm(vdst1, valpha, vpermute); 541 vec_st(vdst1, 0, dst); 542 543 vdst2 = 544 (vector unsigned char)vec_perm((vector unsigned char)vR, 545 valpha, vredalpha2); 546 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); 547 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); 548 vdst2 = vec_perm(vdst2, valpha, vpermute); 549 vec_st(vdst2, 16, dst); 550 551 width -= 8; 552 dst += 32; 553 src += 16; 554 vsrc = voverflow; 555 } 556 557 SDL_assert(width == 0); 558 559 // do scalar until we can align... 560 ONE_PIXEL_BLEND((extrawidth), extrawidth); 561#undef ONE_PIXEL_BLEND 562 563 src += srcskip; // move to next row, accounting for pitch. 564 dst += dstskip; 565 } 566} 567#endif // BROKEN_ALTIVEC_BLITTERS 568 569static void BlitNtoNKey(SDL_BlitInfo *info); 570static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info); 571static void Blit32to32KeyAltivec(SDL_BlitInfo *info) 572{ 573 int height = info->dst_h; 574 Uint32 *srcp = (Uint32 *)info->src; 575 int srcskip = info->src_skip / 4; 576 Uint32 *dstp = (Uint32 *)info->dst; 577 int dstskip = info->dst_skip / 4; 578 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 579 int srcbpp = srcfmt->bytes_per_pixel; 580 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 581 int dstbpp = dstfmt->bytes_per_pixel; 582 int copy_alpha = (srcfmt->Amask && dstfmt->Amask); 583 unsigned alpha = dstfmt->Amask ? info->a : 0; 584 Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; 585 Uint32 ckey = info->colorkey; 586 vector unsigned int valpha; 587 vector unsigned char vpermute; 588 vector unsigned char vzero; 589 vector unsigned int vckey; 590 vector unsigned int vrgbmask; 591 vpermute = calc_swizzle32(srcfmt, dstfmt); 592 if (info->dst_w < 16) { 593 if (copy_alpha) { 594 BlitNtoNKeyCopyAlpha(info); 595 } else { 596 BlitNtoNKey(info); 597 } 598 return; 599 } 600 vzero = vec_splat_u8(0); 601 if (alpha) { 602 ((unsigned char *)&valpha)[0] = (unsigned char)alpha; 603 valpha = 604 (vector unsigned int)vec_splat((vector unsigned char)valpha, 0); 605 } else { 606 valpha = (vector unsigned int)vzero; 607 } 608 ckey &= rgbmask; 609 ((unsigned int *)(char *)&vckey)[0] = ckey; 610 vckey = vec_splat(vckey, 0); 611 ((unsigned int *)(char *)&vrgbmask)[0] = rgbmask; 612 vrgbmask = vec_splat(vrgbmask, 0); 613 614#if SDL_BYTEORDER == SDL_LIL_ENDIAN 615 // reorder bytes for PowerPC little endian 616 vpermute = reorder_ppc64le_vec(vpermute); 617#endif 618 619 while (height--) { 620#define ONE_PIXEL_BLEND(condition, widthvar) \ 621 if (copy_alpha) { \ 622 while (condition) { \ 623 Uint32 Pixel; \ 624 unsigned sR, sG, sB, sA; \ 625 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \ 626 sR, sG, sB, sA); \ 627 if ((Pixel & rgbmask) != ckey) { \ 628 ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \ 629 sR, sG, sB, sA); \ 630 } \ 631 dstp = (Uint32 *)(((Uint8 *)dstp) + dstbpp); \ 632 srcp = (Uint32 *)(((Uint8 *)srcp) + srcbpp); \ 633 widthvar--; \ 634 } \ 635 } else { \ 636 while (condition) { \ 637 Uint32 Pixel; \ 638 unsigned sR, sG, sB; \ 639 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \ 640 if (Pixel != ckey) { \ 641 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ 642 ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \ 643 sR, sG, sB, alpha); \ 644 } \ 645 dstp = (Uint32 *)(((Uint8 *)dstp) + dstbpp); \ 646 srcp = (Uint32 *)(((Uint8 *)srcp) + srcbpp); \ 647 widthvar--; \ 648 } \ 649 } 650 int width = info->dst_w; 651 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 652 SDL_assert(width > 0); 653 if (width > 0) { 654 int extrawidth = (width % 4); 655 vector unsigned char valigner = VEC_ALIGNER(srcp); 656 vector unsigned int vs = vec_ld(0, srcp); 657 width -= extrawidth; 658 SDL_assert(width >= 4); 659 while (width) { 660 vector unsigned char vsel; 661 vector unsigned int vd; 662 vector unsigned int voverflow = vec_ld(15, srcp); 663 // load the source vec 664 vs = vec_perm(vs, voverflow, valigner); 665 // vsel is set for items that match the key 666 vsel = (vector unsigned char)vec_and(vs, vrgbmask); 667 vsel = (vector unsigned char)vec_cmpeq(vs, vckey); 668 // permute the src vec to the dest format 669 vs = vec_perm(vs, valpha, vpermute); 670 // load the destination vec 671 vd = vec_ld(0, dstp); 672 // select the source and dest into vs 673 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, 674 (vector unsigned char)vd, 675 vsel); 676 677 vec_st(vd, 0, dstp); 678 srcp += 4; 679 width -= 4; 680 dstp += 4; 681 vs = voverflow; 682 } 683 ONE_PIXEL_BLEND((extrawidth), extrawidth); 684#undef ONE_PIXEL_BLEND 685 srcp += srcskip; 686 dstp += dstskip; 687 } 688 } 689} 690 691// Altivec code to swizzle one 32-bit surface to a different 32-bit format. 692// Use this on a G5 693static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info) 694{ 695 int height = info->dst_h; 696 Uint32 *src = (Uint32 *)info->src; 697 int srcskip = info->src_skip / 4; 698 Uint32 *dst = (Uint32 *)info->dst; 699 int dstskip = info->dst_skip / 4; 700 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 701 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 702 vector unsigned int vzero = vec_splat_u32(0); 703 vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt); 704 if (dstfmt->Amask && !srcfmt->Amask) { 705 if (info->a) { 706 vector unsigned char valpha; 707 ((unsigned char *)&valpha)[0] = info->a; 708 vzero = (vector unsigned int)vec_splat(valpha, 0); 709 } 710 } 711 712 SDL_assert(srcfmt->bytes_per_pixel == 4); 713 SDL_assert(dstfmt->bytes_per_pixel == 4); 714 715#if SDL_BYTEORDER == SDL_LIL_ENDIAN 716 // reorder bytes for PowerPC little endian 717 vpermute = reorder_ppc64le_vec(vpermute); 718#endif 719 720 while (height--) { 721 vector unsigned char valigner; 722 vector unsigned int vbits; 723 vector unsigned int voverflow; 724 Uint32 bits; 725 Uint8 r, g, b, a; 726 727 int width = info->dst_w; 728 int extrawidth; 729 730 // do scalar until we can align... 731 while ((UNALIGNED_PTR(dst)) && (width)) { 732 bits = *(src++); 733 RGBA_FROM_8888(bits, srcfmt, r, g, b, a); 734 if (!srcfmt->Amask) 735 a = info->a; 736 *(dst++) = MAKE8888(dstfmt, r, g, b, a); 737 width--; 738 } 739 740 // After all that work, here's the vector part! 741 extrawidth = (width % 4); 742 width -= extrawidth; 743 valigner = VEC_ALIGNER(src); 744 vbits = vec_ld(0, src); 745 746 while (width) { 747 voverflow = vec_ld(15, src); 748 src += 4; 749 width -= 4; 750 vbits = vec_perm(vbits, voverflow, valigner); // src is ready. 751 vbits = vec_perm(vbits, vzero, vpermute); // swizzle it. 752 vec_st(vbits, 0, dst); // store it back out. 753 dst += 4; 754 vbits = voverflow; 755 } 756 757 SDL_assert(width == 0); 758 759 // cover pixels at the end of the row that didn't fit in 16 bytes. 760 while (extrawidth) { 761 bits = *(src++); // max 7 pixels, don't bother with prefetch. 762 RGBA_FROM_8888(bits, srcfmt, r, g, b, a); 763 if (!srcfmt->Amask) 764 a = info->a; 765 *(dst++) = MAKE8888(dstfmt, r, g, b, a); 766 extrawidth--; 767 } 768 769 src += srcskip; 770 dst += dstskip; 771 } 772} 773 774// Altivec code to swizzle one 32-bit surface to a different 32-bit format. 775// Use this on a G4 776static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info) 777{ 778 const int scalar_dst_lead = sizeof(Uint32) * 4; 779 const int vector_dst_lead = sizeof(Uint32) * 16; 780 781 int height = info->dst_h; 782 Uint32 *src = (Uint32 *)info->src; 783 int srcskip = info->src_skip / 4; 784 Uint32 *dst = (Uint32 *)info->dst; 785 int dstskip = info->dst_skip / 4; 786 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 787 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 788 vector unsigned int vzero = vec_splat_u32(0); 789 vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt); 790 if (dstfmt->Amask && !srcfmt->Amask) { 791 if (info->a) { 792 vector unsigned char valpha; 793 ((unsigned char *)&valpha)[0] = info->a; 794 vzero = (vector unsigned int)vec_splat(valpha, 0); 795 } 796 } 797 798 SDL_assert(srcfmt->bytes_per_pixel == 4); 799 SDL_assert(dstfmt->bytes_per_pixel == 4); 800 801#if SDL_BYTEORDER == SDL_LIL_ENDIAN 802 // reorder bytes for PowerPC little endian 803 vpermute = reorder_ppc64le_vec(vpermute); 804#endif 805 806 while (height--) { 807 vector unsigned char valigner; 808 vector unsigned int vbits; 809 vector unsigned int voverflow; 810 Uint32 bits; 811 Uint8 r, g, b, a; 812 813 int width = info->dst_w; 814 int extrawidth; 815 816 // do scalar until we can align... 817 while ((UNALIGNED_PTR(dst)) && (width)) { 818 vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024), 819 DST_CHAN_SRC); 820 vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024), 821 DST_CHAN_DEST); 822 bits = *(src++); 823 RGBA_FROM_8888(bits, srcfmt, r, g, b, a); 824 if (!srcfmt->Amask) 825 a = info->a; 826 *(dst++) = MAKE8888(dstfmt, r, g, b, a); 827 width--; 828 } 829 830 // After all that work, here's the vector part! 831 extrawidth = (width % 4); 832 width -= extrawidth; 833 valigner = VEC_ALIGNER(src); 834 vbits = vec_ld(0, src); 835 836 while (width) { 837 vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024), 838 DST_CHAN_SRC); 839 vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024), 840 DST_CHAN_DEST); 841 voverflow = vec_ld(15, src); 842 src += 4; 843 width -= 4; 844 vbits = vec_perm(vbits, voverflow, valigner); // src is ready. 845 vbits = vec_perm(vbits, vzero, vpermute); // swizzle it. 846 vec_st(vbits, 0, dst); // store it back out. 847 dst += 4; 848 vbits = voverflow; 849 } 850 851 SDL_assert(width == 0); 852 853 // cover pixels at the end of the row that didn't fit in 16 bytes. 854 while (extrawidth) { 855 bits = *(src++); // max 7 pixels, don't bother with prefetch. 856 RGBA_FROM_8888(bits, srcfmt, r, g, b, a); 857 if (!srcfmt->Amask) 858 a = info->a; 859 *(dst++) = MAKE8888(dstfmt, r, g, b, a); 860 extrawidth--; 861 } 862 863 src += srcskip; 864 dst += dstskip; 865 } 866 867 vec_dss(DST_CHAN_SRC); 868 vec_dss(DST_CHAN_DEST); 869} 870 871// !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. 872#define GetBlitFeatures() \ 873 ((SDL_HasAltiVec() ? BLIT_FEATURE_HAS_ALTIVEC : 0) | \ 874 ((GetL3CacheSize() == 0) ? BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH : 0)) 875 876#ifdef __MWERKS__ 877#pragma altivec_model off 878#endif 879#else 880#define GetBlitFeatures() \ 881 (SDL_HasSSE41() ? BLIT_FEATURE_HAS_SSE41 : 0) 882#endif 883 884// This is now endian dependent 885#if SDL_BYTEORDER == SDL_LIL_ENDIAN 886#define HI 1 887#define LO 0 888#else // SDL_BYTEORDER == SDL_BIG_ENDIAN 889#define HI 0 890#define LO 1 891#endif 892 893// Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 894#define RGB888_RGB555(dst, src) \ 895 { \ 896 *(Uint16 *)(dst) = (Uint16)((((*src) & 0x00F80000) >> 9) | \ 897 (((*src) & 0x0000F800) >> 6) | \ 898 (((*src) & 0x000000F8) >> 3)); \ 899 } 900#ifndef USE_DUFFS_LOOP 901#define RGB888_RGB555_TWO(dst, src) \ 902 { \ 903 *(Uint32 *)(dst) = (((((src[HI]) & 0x00F80000) >> 9) | \ 904 (((src[HI]) & 0x0000F800) >> 6) | \ 905 (((src[HI]) & 0x000000F8) >> 3)) \ 906 << 16) | \ 907 (((src[LO]) & 0x00F80000) >> 9) | \ 908 (((src[LO]) & 0x0000F800) >> 6) | \ 909 (((src[LO]) & 0x000000F8) >> 3); \ 910 } 911#endif 912static void Blit_XRGB8888_RGB555(SDL_BlitInfo *info) 913{ 914#ifndef USE_DUFFS_LOOP 915 int c; 916#endif 917 int width, height; 918 Uint32 *src; 919 Uint16 *dst; 920 int srcskip, dstskip; 921 922 // Set up some basic variables 923 width = info->dst_w; 924 height = info->dst_h; 925 src = (Uint32 *)info->src; 926 srcskip = info->src_skip / 4; 927 dst = (Uint16 *)info->dst; 928 dstskip = info->dst_skip / 2; 929 930#ifdef USE_DUFFS_LOOP 931 while (height--) { 932 /* *INDENT-OFF* */ // clang-format off 933 DUFFS_LOOP( 934 RGB888_RGB555(dst, src); 935 ++src; 936 ++dst; 937 , width); 938 /* *INDENT-ON* */ // clang-format on 939 src += srcskip; 940 dst += dstskip; 941 } 942#else 943 // Memory align at 4-byte boundary, if necessary 944 if ((long)dst & 0x03) { 945 // Don't do anything if width is 0 946 if (width == 0) { 947 return; 948 } 949 --width; 950 951 while (height--) { 952 // Perform copy alignment 953 RGB888_RGB555(dst, src); 954 ++src; 955 ++dst; 956 957 // Copy in 4 pixel chunks 958 for (c = width / 4; c; --c) { 959 RGB888_RGB555_TWO(dst, src); 960 src += 2; 961 dst += 2; 962 RGB888_RGB555_TWO(dst, src); 963 src += 2; 964 dst += 2; 965 } 966 // Get any leftovers 967 switch (width & 3) { 968 case 3: 969 RGB888_RGB555(dst, src); 970 ++src; 971 ++dst; 972 SDL_FALLTHROUGH; 973 case 2: 974 RGB888_RGB555_TWO(dst, src); 975 src += 2; 976 dst += 2; 977 break; 978 case 1: 979 RGB888_RGB555(dst, src); 980 ++src; 981 ++dst; 982 break; 983 } 984 src += srcskip; 985 dst += dstskip; 986 } 987 } else { 988 while (height--) { 989 // Copy in 4 pixel chunks 990 for (c = width / 4; c; --c) { 991 RGB888_RGB555_TWO(dst, src); 992 src += 2; 993 dst += 2; 994 RGB888_RGB555_TWO(dst, src); 995 src += 2; 996 dst += 2; 997 } 998 // Get any leftovers 999 switch (width & 3) { 1000 case 3: 1001 RGB888_RGB555(dst, src); 1002 ++src; 1003 ++dst; 1004 SDL_FALLTHROUGH; 1005 case 2: 1006 RGB888_RGB555_TWO(dst, src); 1007 src += 2; 1008 dst += 2; 1009 break; 1010 case 1: 1011 RGB888_RGB555(dst, src); 1012 ++src; 1013 ++dst; 1014 break; 1015 } 1016 src += srcskip; 1017 dst += dstskip; 1018 } 1019 } 1020#endif // USE_DUFFS_LOOP 1021} 1022 1023// Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 1024#define RGB888_RGB565(dst, src) \ 1025 { \ 1026 *(Uint16 *)(dst) = (Uint16)((((*src) & 0x00F80000) >> 8) | \ 1027 (((*src) & 0x0000FC00) >> 5) | \ 1028 (((*src) & 0x000000F8) >> 3)); \ 1029 } 1030#ifndef USE_DUFFS_LOOP 1031#define RGB888_RGB565_TWO(dst, src) \ 1032 { \ 1033 *(Uint32 *)(dst) = (((((src[HI]) & 0x00F80000) >> 8) | \ 1034 (((src[HI]) & 0x0000FC00) >> 5) | \ 1035 (((src[HI]) & 0x000000F8) >> 3)) \ 1036 << 16) | \ 1037 (((src[LO]) & 0x00F80000) >> 8) | \ 1038 (((src[LO]) & 0x0000FC00) >> 5) | \ 1039 (((src[LO]) & 0x000000F8) >> 3); \ 1040 } 1041#endif 1042static void Blit_XRGB8888_RGB565(SDL_BlitInfo *info) 1043{ 1044#ifndef USE_DUFFS_LOOP 1045 int c; 1046#endif 1047 int width, height; 1048 Uint32 *src; 1049 Uint16 *dst; 1050 int srcskip, dstskip; 1051 1052 // Set up some basic variables 1053 width = info->dst_w; 1054 height = info->dst_h; 1055 src = (Uint32 *)info->src; 1056 srcskip = info->src_skip / 4; 1057 dst = (Uint16 *)info->dst; 1058 dstskip = info->dst_skip / 2; 1059 1060#ifdef USE_DUFFS_LOOP 1061 while (height--) { 1062 /* *INDENT-OFF* */ // clang-format off 1063 DUFFS_LOOP( 1064 RGB888_RGB565(dst, src); 1065 ++src; 1066 ++dst; 1067 , width); 1068 /* *INDENT-ON* */ // clang-format on 1069 src += srcskip; 1070 dst += dstskip; 1071 } 1072#else 1073 // Memory align at 4-byte boundary, if necessary 1074 if ((long)dst & 0x03) { 1075 // Don't do anything if width is 0 1076 if (width == 0) { 1077 return; 1078 } 1079 --width; 1080 1081 while (height--) { 1082 // Perform copy alignment 1083 RGB888_RGB565(dst, src); 1084 ++src; 1085 ++dst; 1086 1087 // Copy in 4 pixel chunks 1088 for (c = width / 4; c; --c) { 1089 RGB888_RGB565_TWO(dst, src); 1090 src += 2; 1091 dst += 2; 1092 RGB888_RGB565_TWO(dst, src); 1093 src += 2; 1094 dst += 2; 1095 } 1096 // Get any leftovers 1097 switch (width & 3) { 1098 case 3: 1099 RGB888_RGB565(dst, src); 1100 ++src; 1101 ++dst; 1102 SDL_FALLTHROUGH; 1103 case 2: 1104 RGB888_RGB565_TWO(dst, src); 1105 src += 2; 1106 dst += 2; 1107 break; 1108 case 1: 1109 RGB888_RGB565(dst, src); 1110 ++src; 1111 ++dst; 1112 break; 1113 } 1114 src += srcskip; 1115 dst += dstskip; 1116 } 1117 } else { 1118 while (height--) { 1119 // Copy in 4 pixel chunks 1120 for (c = width / 4; c; --c) { 1121 RGB888_RGB565_TWO(dst, src); 1122 src += 2; 1123 dst += 2; 1124 RGB888_RGB565_TWO(dst, src); 1125 src += 2; 1126 dst += 2; 1127 } 1128 // Get any leftovers 1129 switch (width & 3) { 1130 case 3: 1131 RGB888_RGB565(dst, src); 1132 ++src; 1133 ++dst; 1134 SDL_FALLTHROUGH; 1135 case 2: 1136 RGB888_RGB565_TWO(dst, src); 1137 src += 2; 1138 dst += 2; 1139 break; 1140 case 1: 1141 RGB888_RGB565(dst, src); 1142 ++src; 1143 ++dst; 1144 break; 1145 } 1146 src += srcskip; 1147 dst += dstskip; 1148 } 1149 } 1150#endif // USE_DUFFS_LOOP 1151} 1152 1153#ifdef SDL_SSE4_1_INTRINSICS 1154 1155static void SDL_TARGETING("sse4.1") Blit_RGB565_32_SSE41(SDL_BlitInfo *info) 1156{ 1157 int c; 1158 int width, height; 1159 const Uint16 *src; 1160 Uint32 *dst; 1161 int srcskip, dstskip; 1162 Uint8 r, g, b; 1163 1164 // Set up some basic variables 1165 width = info->dst_w; 1166 height = info->dst_h; 1167 src = (const Uint16 *)info->src; 1168 srcskip = info->src_skip / 2; 1169 dst = (Uint32 *)info->dst; 1170 dstskip = info->dst_skip / 4; 1171 1172 // Red and blue channel multiplier to repeat 5 bits 1173 __m128i rb_mult = _mm_shuffle_epi32(_mm_cvtsi32_si128(0x01080108), 0); 1174 1175 // Green channel multiplier to shift by 5 and then repeat 6 bits 1176 __m128i g_mult = _mm_shuffle_epi32(_mm_cvtsi32_si128(0x20802080), 0); 1177 1178 // Red channel mask 1179 __m128i r_mask = _mm_shuffle_epi32(_mm_cvtsi32_si128(0xf800f800), 0); 1180 1181 // Green channel mask 1182 __m128i g_mask = _mm_shuffle_epi32(_mm_cvtsi32_si128(0x07e007e0), 0); 1183 1184 // Alpha channel mask 1185 __m128i a_mask = _mm_shuffle_epi32(_mm_cvtsi32_si128(0xff00ff00), 0); 1186 1187 // Get the masks for converting from ARGB 1188 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 1189 const Uint32 Rshift = dstfmt->Rshift; 1190 const Uint32 Gshift = dstfmt->Gshift; 1191 const Uint32 Bshift = dstfmt->Bshift; 1192 Uint32 Amask, Ashift; 1193 1194 SDL_Get8888AlphaMaskAndShift(dstfmt, &Amask, &Ashift); 1195 1196 // The byte offsets for the start of each pixel 1197 const __m128i mask_offsets = _mm_set_epi8(12, 12, 12, 12, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0); 1198 const __m128i convert_mask = _mm_add_epi32( 1199 _mm_set1_epi32( 1200 ((16 >> 3) << Rshift) | 1201 (( 8 >> 3) << Gshift) | 1202 (( 0 >> 3) << Bshift) | 1203 ((24 >> 3) << Ashift)), 1204 mask_offsets); 1205 1206 while (height--) { 1207 // Copy in 8 pixel chunks 1208 for (c = width / 8; c; --c) { 1209 __m128i pixel = _mm_loadu_si128((__m128i *)src); 1210 __m128i red = pixel; 1211 __m128i green = pixel; 1212 __m128i blue = pixel; 1213 1214 // Get red in the upper 5 bits and then multiply 1215 red = _mm_and_si128(red, r_mask); 1216 red = _mm_mulhi_epu16(red, rb_mult); 1217 1218 // Get blue in the upper 5 bits and then multiply 1219 blue = _mm_slli_epi16(blue, 11); 1220 blue = _mm_mulhi_epu16(blue, rb_mult); 1221 1222 // Combine the red and blue channels 1223 __m128i red_blue = _mm_or_si128(_mm_slli_epi16(red, 8), blue); 1224 1225 // Get the green channel and then multiply into place 1226 green = _mm_and_si128(green, g_mask); 1227 green = _mm_mulhi_epu16(green, g_mult); 1228 1229 // Combine the green and alpha channels 1230 __m128i green_alpha = _mm_or_si128(green, a_mask); 1231 1232 // Unpack them into output ARGB pixels 1233 __m128i out1 = _mm_unpacklo_epi8(red_blue, green_alpha); 1234 __m128i out2 = _mm_unpackhi_epi8(red_blue, green_alpha); 1235 1236 // Convert to dst format and save! 1237 // This is an SSSE3 instruction 1238 out1 = _mm_shuffle_epi8(out1, convert_mask); 1239 out2 = _mm_shuffle_epi8(out2, convert_mask); 1240 1241 _mm_storeu_si128((__m128i*)dst, out1); 1242 _mm_storeu_si128((__m128i*)(dst + 4), out2); 1243 1244 src += 8; 1245 dst += 8; 1246 } 1247 1248 // Get any leftovers 1249 switch (width & 7) { 1250 case 7: 1251 RGB_FROM_RGB565(*src, r, g, b); 1252 *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask; 1253 ++src; 1254 SDL_FALLTHROUGH; 1255 case 6: 1256 RGB_FROM_RGB565(*src, r, g, b); 1257 *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask; 1258 ++src; 1259 SDL_FALLTHROUGH; 1260 case 5: 1261 RGB_FROM_RGB565(*src, r, g, b); 1262 *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask; 1263 ++src; 1264 SDL_FALLTHROUGH; 1265 case 4: 1266 RGB_FROM_RGB565(*src, r, g, b); 1267 *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask; 1268 ++src; 1269 SDL_FALLTHROUGH; 1270 case 3: 1271 RGB_FROM_RGB565(*src, r, g, b); 1272 *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask; 1273 ++src; 1274 SDL_FALLTHROUGH; 1275 case 2: 1276 RGB_FROM_RGB565(*src, r, g, b); 1277 *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask; 1278 ++src; 1279 SDL_FALLTHROUGH; 1280 case 1: 1281 RGB_FROM_RGB565(*src, r, g, b); 1282 *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask; 1283 ++src; 1284 break; 1285 } 1286 src += srcskip; 1287 dst += dstskip; 1288 } 1289} 1290 1291#endif // SDL_SSE4_1_INTRINSICS 1292 1293#ifdef SDL_HAVE_BLIT_N_RGB565 1294 1295// Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces 1296#define RGB565_32(dst, src, map) (map[src[LO] * 2] | map[src[HI] * 2 + 1]) 1297static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map) 1298{ 1299#ifndef USE_DUFFS_LOOP 1300 int c; 1301#endif 1302 int width, height; 1303 Uint8 *src; 1304 Uint32 *dst; 1305 int srcskip, dstskip; 1306 1307 // Set up some basic variables 1308 width = info->dst_w; 1309 height = info->dst_h; 1310 src = info->src; 1311 srcskip = info->src_skip; 1312 dst = (Uint32 *)info->dst; 1313 dstskip = info->dst_skip / 4; 1314 1315#ifdef USE_DUFFS_LOOP 1316 while (height--) { 1317 /* *INDENT-OFF* */ // clang-format off 1318 DUFFS_LOOP( 1319 { 1320 *dst++ = RGB565_32(dst, src, map); 1321 src += 2; 1322 }, 1323 width); 1324 /* *INDENT-ON* */ // clang-format on 1325 src += srcskip; 1326 dst += dstskip; 1327 } 1328#else 1329 while (height--) { 1330 // Copy in 4 pixel chunks 1331 for (c = width / 4; c; --c) { 1332 *dst++ = RGB565_32(dst, src, map); 1333 src += 2; 1334 *dst++ = RGB565_32(dst, src, map); 1335 src += 2; 1336 *dst++ = RGB565_32(dst, src, map); 1337 src += 2; 1338 *dst++ = RGB565_32(dst, src, map); 1339 src += 2; 1340 } 1341 // Get any leftovers 1342 switch (width & 3) { 1343 case 3: 1344 *dst++ = RGB565_32(dst, src, map); 1345 src += 2; 1346 SDL_FALLTHROUGH; 1347 case 2: 1348 *dst++ = RGB565_32(dst, src, map); 1349 src += 2; 1350 SDL_FALLTHROUGH; 1351 case 1: 1352 *dst++ = RGB565_32(dst, src, map); 1353 src += 2; 1354 break; 1355 } 1356 src += srcskip; 1357 dst += dstskip; 1358 } 1359#endif // USE_DUFFS_LOOP 1360} 1361 1362// This is the code used to generate the lookup tables below: 1363#if 0 1364#include <SDL3/SDL.h> 1365#include <stdio.h> 1366 1367#define GENERATE_SHIFTS 1368 1369static Uint32 Calculate(int v, int bits, int vmax, int shift) 1370{ 1371#if defined(GENERATE_FLOOR) 1372 return (Uint32)SDL_floor(v * 255.0f / vmax) << shift; 1373#elif defined(GENERATE_ROUND) 1374 return (Uint32)SDL_roundf(v * 255.0f / vmax) << shift; 1375#elif defined(GENERATE_SHIFTS) 1376 switch (bits) { 1377 case 1: 1378 v = (v << 7) | (v << 6) | (v << 5) | (v << 4) | (v << 3) | (v << 2) | (v << 1) | v; 1379 break; 1380 case 2: 1381 v = (v << 6) | (v << 4) | (v << 2) | v; 1382 break; 1383 case 3: 1384 v = (v << 5) | (v << 2) | (v >> 1); 1385 break; 1386 case 4: 1387 v = (v << 4) | v; 1388 break; 1389 case 5: 1390 v = (v << 3) | (v >> 2); 1391 break; 1392 case 6: 1393 v = (v << 2) | (v >> 4); 1394 break; 1395 case 7: 1396 v = (v << 1) | (v >> 6); 1397 break; 1398 case 8: 1399 break; 1400 } 1401 return (Uint32)v << shift; 1402#endif 1403} 1404 1405static Uint32 CalculateARGB(int v, const SDL_PixelFormatDetails *sfmt, const SDL_PixelFormatDetails *dfmt) 1406{ 1407 Uint8 r = (v & sfmt->Rmask) >> sfmt->Rshift; 1408 Uint8 g = (v & sfmt->Gmask) >> sfmt->Gshift; 1409 Uint8 b = (v & sfmt->Bmask) >> sfmt->Bshift; 1410 return dfmt->Amask | 1411 Calculate(r, sfmt->Rbits, (1 << sfmt->Rbits) - 1, dfmt->Rshift) | 1412 Calculate(g, sfmt->Gbits, (1 << sfmt->Gbits) - 1, dfmt->Gshift) | 1413 Calculate(b, sfmt->Bbits, (1 << sfmt->Bbits) - 1, dfmt->Bshift); 1414} 1415 1416static void GenerateLUT(SDL_PixelFormat src, SDL_PixelFormat dst) 1417{ 1418 static Uint32 lut[512]; 1419 const char *src_name = SDL_GetPixelFormatName(src) + 16; 1420 const char *dst_name = SDL_GetPixelFormatName(dst) + 16; 1421 const SDL_PixelFormatDetails *sfmt = SDL_GetPixelFormatDetails(src); 1422 const SDL_PixelFormatDetails *dfmt = SDL_GetPixelFormatDetails(dst); 1423 int i; 1424 1425 for (i = 0; i < 256; ++i) { 1426 lut[i * 2] = CalculateARGB(i, sfmt, dfmt); 1427 lut[i * 2 + 1] = CalculateARGB(i << 8, sfmt, dfmt); 1428 } 1429 1430 printf("// Special optimized blit for %s -> %s\n\n", src_name, dst_name); 1431 printf("static const Uint32 %s_%s_LUT[%d] = {", src_name, dst_name, (int)SDL_arraysize(lut)); 1432 for (i = 0; i < SDL_arraysize(lut); ++i) { 1433 if ((i % 8) == 0) { 1434 printf("\n "); 1435 } 1436 printf("0x%.8x", lut[i]); 1437 if (i < (SDL_arraysize(lut) - 1)) { 1438 printf(","); 1439 if (((i + 1) % 8) != 0) { 1440 printf(" "); 1441 } 1442 } 1443 } 1444 printf("\n};\n\n"); 1445} 1446 1447int main(int argc, char *argv[]) 1448{ 1449 GenerateLUT(SDL_PIXELFORMAT_RGB565, SDL_PIXELFORMAT_ARGB8888); 1450 GenerateLUT(SDL_PIXELFORMAT_RGB565, SDL_PIXELFORMAT_ABGR8888); 1451 GenerateLUT(SDL_PIXELFORMAT_RGB565, SDL_PIXELFORMAT_RGBA8888); 1452 GenerateLUT(SDL_PIXELFORMAT_RGB565, SDL_PIXELFORMAT_BGRA8888); 1453} 1454#endif // 0 1455 1456/* *INDENT-OFF* */ // clang-format off 1457 1458// Special optimized blit for RGB565 -> ARGB8888 1459 1460static const Uint32 RGB565_ARGB8888_LUT[512] = { 1461 0xff000000, 0xff000000, 0xff000008, 0xff002000, 0xff000010, 0xff004100, 0xff000018, 0xff006100, 1462 0xff000021, 0xff008200, 0xff000029, 0xff00a200, 0xff000031, 0xff00c300, 0xff000039, 0xff00e300, 1463 0xff000042, 0xff080000, 0xff00004a, 0xff082000, 0xff000052, 0xff084100, 0xff00005a, 0xff086100, 1464 0xff000063, 0xff088200, 0xff00006b, 0xff08a200, 0xff000073, 0xff08c300, 0xff00007b, 0xff08e300, 1465 0xff000084, 0xff100000, 0xff00008c, 0xff102000, 0xff000094, 0xff104100, 0xff00009c, 0xff106100, 1466 0xff0000a5, 0xff108200, 0xff0000ad, 0xff10a200, 0xff0000b5, 0xff10c300, 0xff0000bd, 0xff10e300, 1467 0xff0000c6, 0xff180000, 0xff0000ce, 0xff182000, 0xff0000d6, 0xff184100, 0xff0000de, 0xff186100, 1468 0xff0000e7, 0xff188200, 0xff0000ef, 0xff18a200, 0xff0000f7, 0xff18c300, 0xff0000ff, 0xff18e300, 1469 0xff000400, 0xff210000, 0xff000408, 0xff212000, 0xff000410, 0xff214100, 0xff000418, 0xff216100, 1470 0xff000421, 0xff218200, 0xff000429, 0xff21a200, 0xff000431, 0xff21c300, 0xff000439, 0xff21e300, 1471 0xff000442, 0xff290000, 0xff00044a, 0xff292000, 0xff000452, 0xff294100, 0xff00045a, 0xff296100, 1472 0xff000463, 0xff298200, 0xff00046b, 0xff29a200, 0xff000473, 0xff29c300, 0xff00047b, 0xff29e300, 1473 0xff000484, 0xff310000, 0xff00048c, 0xff312000, 0xff000494, 0xff314100, 0xff00049c, 0xff316100, 1474 0xff0004a5, 0xff318200, 0xff0004ad, 0xff31a200, 0xff0004b5, 0xff31c300, 0xff0004bd, 0xff31e300, 1475 0xff0004c6, 0xff390000, 0xff0004ce, 0xff392000, 0xff0004d6, 0xff394100, 0xff0004de, 0xff396100, 1476 0xff0004e7, 0xff398200, 0xff0004ef, 0xff39a200, 0xff0004f7, 0xff39c300, 0xff0004ff, 0xff39e300, 1477 0xff000800, 0xff420000, 0xff000808, 0xff422000, 0xff000810, 0xff424100, 0xff000818, 0xff426100, 1478 0xff000821, 0xff428200, 0xff000829, 0xff42a200, 0xff000831, 0xff42c300, 0xff000839, 0xff42e300, 1479 0xff000842, 0xff4a0000, 0xff00084a, 0xff4a2000, 0xff000852, 0xff4a4100, 0xff00085a, 0xff4a6100, 1480 0xff000863, 0xff4a8200, 0xff00086b, 0xff4aa200, 0xff000873, 0xff4ac300, 0xff00087b, 0xff4ae300, 1481 0xff000884, 0xff520000, 0xff00088c, 0xff522000, 0xff000894, 0xff524100, 0xff00089c, 0xff526100, 1482 0xff0008a5, 0xff528200, 0xff0008ad, 0xff52a200, 0xff0008b5, 0xff52c300, 0xff0008bd, 0xff52e300, 1483 0xff0008c6, 0xff5a0000, 0xff0008ce, 0xff5a2000, 0xff0008d6, 0xff5a4100, 0xff0008de, 0xff5a6100, 1484 0xff0008e7, 0xff5a8200, 0xff0008ef, 0xff5aa200, 0xff0008f7, 0xff5ac300, 0xff0008ff, 0xff5ae300, 1485 0xff000c00, 0xff630000, 0xff000c08, 0xff632000, 0xff000c10, 0xff634100, 0xff000c18, 0xff636100, 1486 0xff000c21, 0xff638200, 0xff000c29, 0xff63a200, 0xff000c31, 0xff63c300, 0xff000c39, 0xff63e300, 1487 0xff000c42, 0xff6b0000, 0xff000c4a, 0xff6b2000, 0xff000c52, 0xff6b4100, 0xff000c5a, 0xff6b6100, 1488 0xff000c63, 0xff6b8200, 0xff000c6b, 0xff6ba200, 0xff000c73, 0xff6bc300, 0xff000c7b, 0xff6be300, 1489 0xff000c84, 0xff730000, 0xff000c8c, 0xff732000, 0xff000c94, 0xff734100, 0xff000c9c, 0xff736100, 1490 0xff000ca5, 0xff738200, 0xff000cad, 0xff73a200, 0xff000cb5, 0xff73c300, 0xff000cbd, 0xff73e300, 1491 0xff000cc6, 0xff7b0000, 0xff000cce, 0xff7b2000, 0xff000cd6, 0xff7b4100, 0xff000cde, 0xff7b6100, 1492 0xff000ce7, 0xff7b8200, 0xff000cef, 0xff7ba200, 0xff000cf7, 0xff7bc300, 0xff000cff, 0xff7be300, 1493 0xff001000, 0xff840000, 0xff001008, 0xff842000, 0xff001010, 0xff844100, 0xff001018, 0xff846100, 1494 0xff001021, 0xff848200, 0xff001029, 0xff84a200, 0xff001031, 0xff84c300, 0xff001039, 0xff84e300, 1495 0xff001042, 0xff8c0000, 0xff00104a, 0xff8c2000, 0xff001052, 0xff8c4100, 0xff00105a, 0xff8c6100, 1496 0xff001063, 0xff8c8200, 0xff00106b, 0xff8ca200, 0xff001073, 0xff8cc300, 0xff00107b, 0xff8ce300, 1497 0xff001084, 0xff940000, 0xff00108c, 0xff942000, 0xff001094, 0xff944100, 0xff00109c, 0xff946100, 1498 0xff0010a5, 0xff948200, 0xff0010ad, 0xff94a200, 0xff0010b5, 0xff94c300, 0xff0010bd, 0xff94e300, 1499 0xff0010c6, 0xff9c0000, 0xff0010ce, 0xff9c2000, 0xff0010d6, 0xff9c4100, 0xff0010de, 0xff9c6100, 1500 0xff0010e7, 0xff9c8200, 0xff0010ef, 0xff9ca200, 0xff0010f7, 0xff9cc300, 0xff0010ff, 0xff9ce300, 1501 0xff001400, 0xffa50000, 0xff001408, 0xffa52000, 0xff001410, 0xffa54100, 0xff001418, 0xffa56100, 1502 0xff001421, 0xffa58200, 0xff001429, 0xffa5a200, 0xff001431, 0xffa5c300, 0xff001439, 0xffa5e300, 1503 0xff001442, 0xffad0000, 0xff00144a, 0xffad2000, 0xff001452, 0xffad4100, 0xff00145a, 0xffad6100, 1504 0xff001463, 0xffad8200, 0xff00146b, 0xffada200, 0xff001473, 0xffadc300, 0xff00147b, 0xffade300, 1505 0xff001484, 0xffb50000, 0xff00148c, 0xffb52000, 0xff001494, 0xffb54100, 0xff00149c, 0xffb56100, 1506 0xff0014a5, 0xffb58200, 0xff0014ad, 0xffb5a200, 0xff0014b5, 0xffb5c300, 0xff0014bd, 0xffb5e300, 1507 0xff0014c6, 0xffbd0000, 0xff0014ce, 0xffbd2000, 0xff0014d6, 0xffbd4100, 0xff0014de, 0xffbd6100, 1508 0xff0014e7, 0xffbd8200, 0xff0014ef, 0xffbda200, 0xff0014f7, 0xffbdc300, 0xff0014ff, 0xffbde300, 1509 0xff001800, 0xffc60000, 0xff001808, 0xffc62000, 0xff001810, 0xffc64100, 0xff001818, 0xffc66100, 1510 0xff001821, 0xffc68200, 0xff001829, 0xffc6a200, 0xff001831, 0xffc6c300, 0xff001839, 0xffc6e300, 1511 0xff001842, 0xffce0000, 0xff00184a, 0xffce2000, 0xff001852, 0xffce4100, 0xff00185a, 0xffce6100, 1512 0xff001863, 0xffce8200, 0xff00186b, 0xffcea200, 0xff001873, 0xffcec300, 0xff00187b, 0xffcee300, 1513 0xff001884, 0xffd60000, 0xff00188c, 0xffd62000, 0xff001894, 0xffd64100, 0xff00189c, 0xffd66100, 1514 0xff0018a5, 0xffd68200, 0xff0018ad, 0xffd6a200, 0xff0018b5, 0xffd6c300, 0xff0018bd, 0xffd6e300, 1515 0xff0018c6, 0xffde0000, 0xff0018ce, 0xffde2000, 0xff0018d6, 0xffde4100, 0xff0018de, 0xffde6100, 1516 0xff0018e7, 0xffde8200, 0xff0018ef, 0xffdea200, 0xff0018f7, 0xffdec300, 0xff0018ff, 0xffdee300, 1517 0xff001c00, 0xffe70000, 0xff001c08, 0xffe72000, 0xff001c10, 0xffe74100, 0xff001c18, 0xffe76100, 1518 0xff001c21, 0xffe78200, 0xff001c29, 0xffe7a200, 0xff001c31, 0xffe7c300, 0xff001c39, 0xffe7e300, 1519 0xff001c42, 0xffef0000, 0xff001c4a, 0xffef2000, 0xff001c52, 0xffef4100, 0xff001c5a, 0xffef6100, 1520 0xff001c63, 0xffef8200, 0xff001c6b, 0xffefa200, 0xff001c73, 0xffefc300, 0xff001c7b, 0xffefe300, 1521 0xff001c84, 0xfff70000, 0xff001c8c, 0xfff72000, 0xff001c94, 0xfff74100, 0xff001c9c, 0xfff76100, 1522 0xff001ca5, 0xfff78200, 0xff001cad, 0xfff7a200, 0xff001cb5, 0xfff7c300, 0xff001cbd, 0xfff7e300, 1523 0xff001cc6, 0xffff0000, 0xff001cce, 0xffff2000, 0xff001cd6, 0xffff4100, 0xff001cde, 0xffff6100, 1524 0xff001ce7, 0xffff8200, 0xff001cef, 0xffffa200, 0xff001cf7, 0xffffc300, 0xff001cff, 0xffffe300 1525}; 1526 1527static void Blit_RGB565_ARGB8888(SDL_BlitInfo * info) 1528{ 1529 Blit_RGB565_32(info, RGB565_ARGB8888_LUT); 1530} 1531 1532// Special optimized blit for RGB565 -> ABGR8888 1533 1534static const Uint32 RGB565_ABGR8888_LUT[512] = { 1535 0xff000000, 0xff000000, 0xff080000, 0xff002000, 0xff100000, 0xff004100, 0xff180000, 0xff006100, 1536 0xff210000, 0xff008200, 0xff290000, 0xff00a200, 0xff310000, 0xff00c300, 0xff390000, 0xff00e300, 1537 0xff420000, 0xff000008, 0xff4a0000, 0xff002008, 0xff520000, 0xff004108, 0xff5a0000, 0xff006108, 1538 0xff630000, 0xff008208, 0xff6b0000, 0xff00a208, 0xff730000, 0xff00c308, 0xff7b0000, 0xff00e308, 1539 0xff840000, 0xff000010, 0xff8c0000, 0xff002010, 0xff940000, 0xff004110, 0xff9c0000, 0xff006110, 1540 0xffa50000, 0xff008210, 0xffad0000, 0xff00a210, 0xffb50000, 0xff00c310, 0xffbd0000, 0xff00e310, 1541 0xffc60000, 0xff000018, 0xffce0000, 0xff002018, 0xffd60000, 0xff004118, 0xffde0000, 0xff006118, 1542 0xffe70000, 0xff008218, 0xffef0000, 0xff00a218, 0xfff70000, 0xff00c318, 0xffff0000, 0xff00e318, 1543 0xff000400, 0xff000021, 0xff080400, 0xff002021, 0xff100400, 0xff004121, 0xff180400, 0xff006121, 1544 0xff210400, 0xff008221, 0xff290400, 0xff00a221, 0xff310400, 0xff00c321, 0xff390400, 0xff00e321, 1545 0xff420400, 0xff000029, 0xff4a0400, 0xff002029, 0xff520400, 0xff004129, 0xff5a0400, 0xff006129, 1546 0xff630400, 0xff008229, 0xff6b0400, 0xff00a229, 0xff730400, 0xff00c329, 0xff7b0400, 0xff00e329, 1547 0xff840400, 0xff000031, 0xff8c0400, 0xff002031, 0xff940400, 0xff004131, 0xff9c0400, 0xff006131, 1548 0xffa50400, 0xff008231, 0xffad0400, 0xff00a231, 0xffb50400, 0xff00c331, 0xffbd0400, 0xff00e331, 1549 0xffc60400, 0xff000039, 0xffce0400, 0xff002039, 0xffd60400, 0xff004139, 0xffde0400, 0xff006139, 1550 0xffe70400, 0xff008239, 0xffef0400, 0xff00a239, 0xfff70400, 0xff00c339, 0xffff0400, 0xff00e339, 1551 0xff000800, 0xff000042, 0xff080800, 0xff002042, 0xff100800, 0xff004142, 0xff180800, 0xff006142, 1552 0xff210800, 0xff008242, 0xff290800, 0xff00a242, 0xff310800, 0xff00c342, 0xff390800, 0xff00e342, 1553 0xff420800, 0xff00004a, 0xff4a0800, 0xff00204a, 0xff520800, 0xff00414a, 0xff5a0800, 0xff00614a, 1554 0xff630800, 0xff00824a, 0xff6b0800, 0xff00a24a, 0xff730800, 0xff00c34a, 0xff7b0800, 0xff00e34a, 1555 0xff840800, 0xff000052, 0xff8c0800, 0xff002052, 0xff940800, 0xff004152, 0xff9c0800, 0xff006152, 1556 0xffa50800, 0xff008252, 0xffad0800, 0xff00a252, 0xffb50800, 0xff00c352, 0xffbd0800, 0xff00e352, 1557 0xffc60800, 0xff00005a, 0xffce0800, 0xff00205a, 0xffd60800, 0xff00415a, 0xffde0800, 0xff00615a, 1558 0xffe70800, 0xff00825a, 0xffef0800, 0xff00a25a, 0xfff70800, 0xff00c35a, 0xffff0800, 0xff00e35a, 1559 0xff000c00, 0xff000063, 0xff080c00, 0xff002063, 0xff100c00, 0xff004163, 0xff180c00, 0xff006163, 1560 0xff210c00, 0xff008263, 0xff290c00, 0xff00a263, 0xff310c00, 0xff00c363, 0xff390c00, 0xff00e363, 1561 0xff420c00, 0xff00006b, 0xff4a0c00, 0xff00206b, 0xff520c00, 0xff00416b, 0xff5a0c00, 0xff00616b, 1562 0xff630c00, 0xff00826b, 0xff6b0c00, 0xff00a26b, 0xff730c00, 0xff00c36b, 0xff7b0c00, 0xff00e36b, 1563 0xff840c00, 0xff000073, 0xff8c0c00, 0xff002073, 0xff940c00, 0xff004173, 0xff9c0c00, 0xff006173, 1564 0xffa50c00, 0xff008273, 0xffad0c00, 0xff00a273, 0xffb50c00, 0xff00c373, 0xffbd0c00, 0xff00e373, 1565 0xffc60c00, 0xff00007b, 0xffce0c00, 0xff00207b, 0xffd60c00, 0xff00417b, 0xffde0c00, 0xff00617b, 1566 0xffe70c00, 0xff00827b, 0xffef0c00, 0xff00a27b, 0xfff70c00, 0xff00c37b, 0xffff0c00, 0xff00e37b, 1567 0xff001000, 0xff000084, 0xff081000, 0xff002084, 0xff101000, 0xff004184, 0xff181000, 0xff006184, 1568 0xff211000, 0xff008284, 0xff291000, 0xff00a284, 0xff311000, 0xff00c384, 0xff391000, 0xff00e384, 1569 0xff421000, 0xff00008c, 0xff4a1000, 0xff00208c, 0xff521000, 0xff00418c, 0xff5a1000, 0xff00618c, 1570 0xff631000, 0xff00828c, 0xff6b1000, 0xff00a28c, 0xff731000, 0xff00c38c, 0xff7b1000, 0xff00e38c, 1571 0xff841000, 0xff000094, 0xff8c1000, 0xff002094, 0xff941000, 0xff004194, 0xff9c1000, 0xff006194, 1572 0xffa51000, 0xff008294, 0xffad1000, 0xff00a294, 0xffb51000, 0xff00c394, 0xffbd1000, 0xff00e394, 1573 0xffc61000, 0xff00009c, 0xffce1000, 0xff00209c, 0xffd61000, 0xff00419c, 0xffde1000, 0xff00619c, 1574 0xffe71000, 0xff00829c, 0xffef1000, 0xff00a29c, 0xfff71000, 0xff00c39c, 0xffff1000, 0xff00e39c, 1575 0xff001400, 0xff0000a5, 0xff081400, 0xff0020a5, 0xff101400, 0xff0041a5, 0xff181400, 0xff0061a5, 1576 0xff211400, 0xff0082a5, 0xff291400, 0xff00a2a5, 0xff311400, 0xff00c3a5, 0xff391400, 0xff00e3a5, 1577 0xff421400, 0xff0000ad, 0xff4a1400, 0xff0020ad, 0xff521400, 0xff0041ad, 0xff5a1400, 0xff0061ad, 1578 0xff631400, 0xff0082ad, 0xff6b1400, 0xff00a2ad, 0xff731400, 0xff00c3ad, 0xff7b1400, 0xff00e3ad, 1579 0xff841400, 0xff0000b5, 0xff8c1400, 0xff0020b5, 0xff941400, 0xff0041b5, 0xff9c1400, 0xff0061b5, 1580 0xffa51400, 0xff0082b5, 0xffad1400, 0xff00a2b5, 0xffb51400, 0xff00c3b5, 0xffbd1400, 0xff00e3b5, 1581 0xffc61400, 0xff0000bd, 0xffce1400, 0xff0020bd, 0xffd61400, 0xff0041bd, 0xffde1400, 0xff0061bd, 1582 0xffe71400, 0xff0082bd, 0xffef1400, 0xff00a2bd, 0xfff71400, 0xff00c3bd, 0xffff1400, 0xff00e3bd, 1583 0xff001800, 0xff0000c6, 0xff081800, 0xff0020c6, 0xff101800, 0xff0041c6, 0xff181800, 0xff0061c6, 1584 0xff211800, 0xff0082c6, 0xff291800, 0xff00a2c6, 0xff311800, 0xff00c3c6, 0xff391800, 0xff00e3c6, 1585 0xff421800, 0xff0000ce, 0xff4a1800, 0xff0020ce, 0xff521800, 0xff0041ce, 0xff5a1800, 0xff0061ce, 1586 0xff631800, 0xff0082ce, 0xff6b1800, 0xff00a2ce, 0xff731800, 0xff00c3ce, 0xff7b1800, 0xff00e3ce, 1587 0xff841800, 0xff0000d6, 0xff8c1800, 0xff0020d6, 0xff941800, 0xff0041d6, 0xff9c1800, 0xff0061d6, 1588 0xffa51800, 0xff0082d6, 0xffad1800, 0xff00a2d6, 0xffb51800, 0xff00c3d6, 0xffbd1800, 0xff00e3d6, 1589 0xffc61800, 0xff0000de, 0xffce1800, 0xff0020de, 0xffd61800, 0xff0041de, 0xffde1800, 0xff0061de, 1590 0xffe71800, 0xff0082de, 0xffef1800, 0xff00a2de, 0xfff71800, 0xff00c3de, 0xffff1800, 0xff00e3de, 1591 0xff001c00, 0xff0000e7, 0xff081c00, 0xff0020e7, 0xff101c00, 0xff0041e7, 0xff181c00, 0xff0061e7, 1592 0xff211c00, 0xff0082e7, 0xff291c00, 0xff00a2e7, 0xff311c00, 0xff00c3e7, 0xff391c00, 0xff00e3e7, 1593 0xff421c00, 0xff0000ef, 0xff4a1c00, 0xff0020ef, 0xff521c00, 0xff0041ef, 0xff5a1c00, 0xff0061ef, 1594 0xff631c00, 0xff0082ef, 0xff6b1c00, 0xff00a2ef, 0xff731c00, 0xff00c3ef, 0xff7b1c00, 0xff00e3ef, 1595 0xff841c00, 0xff0000f7, 0xff8c1c00, 0xff0020f7, 0xff941c00, 0xff0041f7, 0xff9c1c00, 0xff0061f7, 1596 0xffa51c00, 0xff0082f7, 0xffad1c00, 0xff00a2f7, 0xffb51c00, 0xff00c3f7, 0xffbd1c00, 0xff00e3f7, 1597 0xffc61c00, 0xff0000ff, 0xffce1c00, 0xff0020ff, 0xffd61c00, 0xff0041ff, 0xffde1c00, 0xff0061ff, 1598 0xffe71c00, 0xff0082ff, 0xffef1c00, 0xff00a2ff, 0xfff71c00, 0xff00c3ff, 0xffff1c00, 0xff00e3ff 1599}; 1600 1601static void Blit_RGB565_ABGR8888(SDL_BlitInfo * info) 1602{ 1603 Blit_RGB565_32(info, RGB565_ABGR8888_LUT); 1604} 1605 1606// Special optimized blit for RGB565 -> RGBA8888 1607 1608static const Uint32 RGB565_RGBA8888_LUT[512] = { 1609 0x000000ff, 0x000000ff, 0x000008ff, 0x002000ff, 0x000010ff, 0x004100ff, 0x000018ff, 0x006100ff, 1610 0x000021ff, 0x008200ff, 0x000029ff, 0x00a200ff, 0x000031ff, 0x00c300ff, 0x000039ff, 0x00e300ff, 1611 0x000042ff, 0x080000ff, 0x00004aff, 0x082000ff, 0x000052ff, 0x084100ff, 0x00005aff, 0x086100ff, 1612 0x000063ff, 0x088200ff, 0x00006bff, 0x08a200ff, 0x000073ff, 0x08c300ff, 0x00007bff, 0x08e300ff, 1613 0x000084ff, 0x100000ff, 0x00008cff, 0x102000ff, 0x000094ff, 0x104100ff, 0x00009cff, 0x106100ff, 1614 0x0000a5ff, 0x108200ff, 0x0000adff, 0x10a200ff, 0x0000b5ff, 0x10c300ff, 0x0000bdff, 0x10e300ff, 1615 0x0000c6ff, 0x180000ff, 0x0000ceff, 0x182000ff, 0x0000d6ff, 0x184100ff, 0x0000deff, 0x186100ff, 1616 0x0000e7ff, 0x188200ff, 0x0000efff, 0x18a200ff, 0x0000f7ff, 0x18c300ff, 0x0000ffff, 0x18e300ff, 1617 0x000400ff, 0x210000ff, 0x000408ff, 0x212000ff, 0x000410ff, 0x214100ff, 0x000418ff, 0x216100ff, 1618 0x000421ff, 0x218200ff, 0x000429ff, 0x21a200ff, 0x000431ff, 0x21c300ff, 0x000439ff, 0x21e300ff, 1619 0x000442ff, 0x290000ff, 0x00044aff, 0x292000ff, 0x000452ff, 0x294100ff, 0x00045aff, 0x296100ff, 1620 0x000463ff, 0x298200ff, 0x00046bff, 0x29a200ff, 0x000473ff, 0x29c300ff, 0x00047bff, 0x29e300ff, 1621 0x000484ff, 0x310000ff, 0x00048cff, 0x312000ff, 0x000494ff, 0x314100ff, 0x00049cff, 0x316100ff, 1622 0x0004a5ff, 0x318200ff, 0x0004adff, 0x31a200ff, 0x0004b5ff, 0x31c300ff, 0x0004bdff, 0x31e300ff, 1623 0x0004c6ff, 0x390000ff, 0x0004ceff, 0x392000ff, 0x0004d6ff, 0x394100ff, 0x0004deff, 0x396100ff, 1624 0x0004e7ff, 0x398200ff, 0x0004efff, 0x39a200ff, 0x0004f7ff, 0x39c300ff, 0x0004ffff, 0x39e300ff, 1625 0x000800ff, 0x420000ff, 0x000808ff, 0x422000ff, 0x000810ff, 0x424100ff, 0x000818ff, 0x426100ff, 1626 0x000821ff, 0x428200ff, 0x000829ff, 0x42a200ff, 0x000831ff, 0x42c300ff, 0x000839ff, 0x42e300ff, 1627 0x000842ff, 0x4a0000ff, 0x00084aff, 0x4a2000ff, 0x000852ff, 0x4a4100ff, 0x00085aff, 0x4a6100ff, 1628 0x000863ff, 0x4a8200ff, 0x00086bff, 0x4aa200ff, 0x000873ff, 0x4ac300ff, 0x00087bff, 0x4ae300ff, 1629 0x000884ff, 0x520000ff, 0x00088cff, 0x522000ff, 0x000894ff, 0x524100ff, 0x00089cff, 0x526100ff, 1630 0x0008a5ff, 0x528200ff, 0x0008adff, 0x52a200ff, 0x0008b5ff, 0x52c300ff, 0x0008bdff, 0x52e300ff, 1631 0x0008c6ff, 0x5a0000ff, 0x0008ceff, 0x5a2000ff, 0x0008d6ff, 0x5a4100ff, 0x0008deff, 0x5a6100ff, 1632 0x0008e7ff, 0x5a8200ff, 0x0008efff, 0x5aa200ff, 0x0008f7ff, 0x5ac300ff, 0x0008ffff, 0x5ae300ff, 1633 0x000c00ff, 0x630000ff, 0x000c08ff, 0x632000ff, 0x000c10ff, 0x634100ff, 0x000c18ff, 0x636100ff, 1634 0x000c21ff, 0x638200ff, 0x000c29ff, 0x63a200ff, 0x000c31ff, 0x63c300ff, 0x000c39ff, 0x63e300ff, 1635 0x000c42ff, 0x6b0000ff, 0x000c4aff, 0x6b2000ff, 0x000c52ff, 0x6b4100ff, 0x000c5aff, 0x6b6100ff, 1636 0x000c63ff, 0x6b8200ff, 0x000c6bff, 0x6ba200ff, 0x000c73ff, 0x6bc300ff, 0x000c7bff, 0x6be300ff, 1637 0x000c84ff, 0x730000ff, 0x000c8cff, 0x732000ff, 0x000c94ff, 0x734100ff, 0x000c9cff, 0x736100ff, 1638 0x000ca5ff, 0x738200ff, 0x000cadff, 0x73a200ff, 0x000cb5ff, 0x73c300ff, 0x000cbdff, 0x73e300ff, 1639 0x000cc6ff, 0x7b0000ff, 0x000cceff, 0x7b2000ff, 0x000cd6ff, 0x7b4100ff, 0x000cdeff, 0x7b6100ff, 1640 0x000ce7ff, 0x7b8200ff, 0x000cefff, 0x7ba200ff, 0x000cf7ff, 0x7bc300ff, 0x000cffff, 0x7be300ff, 1641 0x001000ff, 0x840000ff, 0x001008ff, 0x842000ff, 0x001010ff, 0x844100ff, 0x001018ff, 0x846100ff, 1642 0x001021ff, 0x848200ff, 0x001029ff, 0x84a200ff, 0x001031ff, 0x84c300ff, 0x001039ff, 0x84e300ff, 1643 0x001042ff, 0x8c0000ff, 0x00104aff, 0x8c2000ff, 0x001052ff, 0x8c4100ff, 0x00105aff, 0x8c6100ff, 1644 0x001063ff, 0x8c8200ff, 0x00106bff, 0x8ca200ff, 0x001073ff, 0x8cc300ff, 0x00107bff, 0x8ce300ff, 1645 0x001084ff, 0x940000ff, 0x00108cff, 0x942000ff, 0x001094ff, 0x944100ff, 0x00109cff, 0x946100ff, 1646 0x0010a5ff, 0x948200ff, 0x0010adff, 0x94a200ff, 0x0010b5ff, 0x94c300ff, 0x0010bdff, 0x94e300ff, 1647 0x0010c6ff, 0x9c0000ff, 0x0010ceff, 0x9c2000ff, 0x0010d6ff, 0x9c4100ff, 0x0010deff, 0x9c6100ff, 1648 0x0010e7ff, 0x9c8200ff, 0x0010efff, 0x9ca200ff, 0x0010f7ff, 0x9cc300ff, 0x0010ffff, 0x9ce300ff, 1649 0x001400ff, 0xa50000ff, 0x001408ff, 0xa52000ff, 0x001410ff, 0xa54100ff, 0x001418ff, 0xa56100ff, 1650 0x001421ff, 0xa58200ff, 0x001429ff, 0xa5a200ff, 0x001431ff, 0xa5c300ff, 0x001439ff, 0xa5e300ff, 1651 0x001442ff, 0xad0000ff, 0x00144aff, 0xad2000ff, 0x001452ff, 0xad4100ff, 0x00145aff, 0xad6100ff, 1652 0x001463ff, 0xad8200ff, 0x00146bff, 0xada200ff, 0x001473ff, 0xadc300ff, 0x00147bff, 0xade300ff, 1653 0x001484ff, 0xb50000ff, 0x00148cff, 0xb52000ff, 0x001494ff, 0xb54100ff, 0x00149cff, 0xb56100ff, 1654 0x0014a5ff, 0xb58200ff, 0x0014adff, 0xb5a200ff, 0x0014b5ff, 0xb5c300ff, 0x0014bdff, 0xb5e300ff, 1655 0x0014c6ff, 0xbd0000ff, 0x0014ceff, 0xbd2000ff, 0x0014d6ff, 0xbd4100ff, 0x0014deff, 0xbd6100ff, 1656 0x0014e7ff, 0xbd8200ff, 0x0014efff, 0xbda200ff, 0x0014f7ff, 0xbdc300ff, 0x0014ffff, 0xbde300ff, 1657 0x001800ff, 0xc60000ff, 0x001808ff, 0xc62000ff, 0x001810ff, 0xc64100ff, 0x001818ff, 0xc66100ff, 1658 0x001821ff, 0xc68200ff, 0x001829ff, 0xc6a200ff, 0x001831ff, 0xc6c300ff, 0x001839ff, 0xc6e300ff, 1659 0x001842ff, 0xce0000ff, 0x00184aff, 0xce2000ff, 0x001852ff, 0xce4100ff, 0x00185aff, 0xce6100ff, 1660 0x001863ff, 0xce8200ff, 0x00186bff, 0xcea200ff, 0x001873ff, 0xcec300ff, 0x00187bff, 0xcee300ff, 1661 0x001884ff, 0xd60000ff, 0x00188cff, 0xd62000ff, 0x001894ff, 0xd64100ff, 0x00189cff, 0xd66100ff, 1662 0x0018a5ff, 0xd68200ff, 0x0018adff, 0xd6a200ff, 0x0018b5ff, 0xd6c300ff, 0x0018bdff, 0xd6e300ff, 1663 0x0018c6ff, 0xde0000ff, 0x0018ceff, 0xde2000ff, 0x0018d6ff, 0xde4100ff, 0x0018deff, 0xde6100ff, 1664 0x0018e7ff, 0xde8200ff, 0x0018efff, 0xdea200ff, 0x0018f7ff, 0xdec300ff, 0x0018ffff, 0xdee300ff, 1665 0x001c00ff, 0xe70000ff, 0x001c08ff, 0xe72000ff, 0x001c10ff, 0xe74100ff, 0x001c18ff, 0xe76100ff, 1666 0x001c21ff, 0xe78200ff, 0x001c29ff, 0xe7a200ff, 0x001c31ff, 0xe7c300ff, 0x001c39ff, 0xe7e300ff, 1667 0x001c42ff, 0xef0000ff, 0x001c4aff, 0xef2000ff, 0x001c52ff, 0xef4100ff, 0x001c5aff, 0xef6100ff, 1668 0x001c63ff, 0xef8200ff, 0x001c6bff, 0xefa200ff, 0x001c73ff, 0xefc300ff, 0x001c7bff, 0xefe300ff, 1669 0x001c84ff, 0xf70000ff, 0x001c8cff, 0xf72000ff, 0x001c94ff, 0xf74100ff, 0x001c9cff, 0xf76100ff, 1670 0x001ca5ff, 0xf78200ff, 0x001cadff, 0xf7a200ff, 0x001cb5ff, 0xf7c300ff, 0x001cbdff, 0xf7e300ff, 1671 0x001cc6ff, 0xff0000ff, 0x001cceff, 0xff2000ff, 0x001cd6ff, 0xff4100ff, 0x001cdeff, 0xff6100ff, 1672 0x001ce7ff, 0xff8200ff, 0x001cefff, 0xffa200ff, 0x001cf7ff, 0xffc300ff, 0x001cffff, 0xffe300ff 1673}; 1674 1675static void Blit_RGB565_RGBA8888(SDL_BlitInfo * info) 1676{ 1677 Blit_RGB565_32(info, RGB565_RGBA8888_LUT); 1678} 1679 1680// Special optimized blit for RGB565 -> BGRA8888 1681 1682static const Uint32 RGB565_BGRA8888_LUT[512] = { 1683 0x000000ff, 0x000000ff, 0x080000ff, 0x002000ff, 0x100000ff, 0x004100ff, 0x180000ff, 0x006100ff, 1684 0x210000ff, 0x008200ff, 0x290000ff, 0x00a200ff, 0x310000ff, 0x00c300ff, 0x390000ff, 0x00e300ff, 1685 0x420000ff, 0x000008ff, 0x4a0000ff, 0x002008ff, 0x520000ff, 0x004108ff, 0x5a0000ff, 0x006108ff, 1686 0x630000ff, 0x008208ff, 0x6b0000ff, 0x00a208ff, 0x730000ff, 0x00c308ff, 0x7b0000ff, 0x00e308ff, 1687 0x840000ff, 0x000010ff, 0x8c0000ff, 0x002010ff, 0x940000ff, 0x004110ff, 0x9c0000ff, 0x006110ff, 1688 0xa50000ff, 0x008210ff, 0xad0000ff, 0x00a210ff, 0xb50000ff, 0x00c310ff, 0xbd0000ff, 0x00e310ff, 1689 0xc60000ff, 0x000018ff, 0xce0000ff, 0x002018ff, 0xd60000ff, 0x004118ff, 0xde0000ff, 0x006118ff, 1690 0xe70000ff, 0x008218ff, 0xef0000ff, 0x00a218ff, 0xf70000ff, 0x00c318ff, 0xff0000ff, 0x00e318ff, 1691 0x000400ff, 0x000021ff, 0x080400ff, 0x002021ff, 0x100400ff, 0x004121ff, 0x180400ff, 0x006121ff, 1692 0x210400ff, 0x008221ff, 0x290400ff, 0x00a221ff, 0x310400ff, 0x00c321ff, 0x390400ff, 0x00e321ff, 1693 0x420400ff, 0x000029ff, 0x4a0400ff, 0x002029ff, 0x520400ff, 0x004129ff, 0x5a0400ff, 0x006129ff, 1694 0x630400ff, 0x008229ff, 0x6b0400ff, 0x00a229ff, 0x730400ff, 0x00c329ff, 0x7b0400ff, 0x00e329ff, 1695 0x840400ff, 0x000031ff, 0x8c0400ff, 0x002031ff, 0x940400ff, 0x004131ff, 0x9c0400ff, 0x006131ff, 1696 0xa50400ff, 0x008231ff, 0xad0400ff, 0x00a231ff, 0xb50400ff, 0x00c331ff, 0xbd0400ff, 0x00e331ff, 1697 0xc60400ff, 0x000039ff, 0xce0400ff, 0x002039ff, 0xd60400ff, 0x004139ff, 0xde0400ff, 0x006139ff, 1698 0xe70400ff, 0x008239ff, 0xef0400ff, 0x00a239ff, 0xf70400ff, 0x00c339ff, 0xff0400ff, 0x00e339ff, 1699 0x000800ff, 0x000042ff, 0x080800ff, 0x002042ff, 0x100800ff, 0x004142ff, 0x180800ff, 0x006142ff, 1700 0x210800ff, 0x008242ff, 0x290800ff, 0x00a242ff, 0x310800ff, 0x00c342ff, 0x390800ff, 0x00e342ff, 1701 0x420800ff, 0x00004aff, 0x4a0800ff, 0x00204aff, 0x520800ff, 0x00414aff, 0x5a0800ff, 0x00614aff, 1702 0x630800ff, 0x00824aff, 0x6b0800ff, 0x00a24aff, 0x730800ff, 0x00c34aff, 0x7b0800ff, 0x00e34aff, 1703 0x840800ff, 0x000052ff, 0x8c0800ff, 0x002052ff, 0x940800ff, 0x004152ff, 0x9c0800ff, 0x006152ff, 1704 0xa50800ff, 0x008252ff, 0xad0800ff, 0x00a252ff, 0xb50800ff, 0x00c352ff, 0xbd0800ff, 0x00e352ff, 1705 0xc60800ff, 0x00005aff, 0xce0800ff, 0x00205aff, 0xd60800ff, 0x00415aff, 0xde0800ff, 0x00615aff, 1706 0xe70800ff, 0x00825aff, 0xef0800ff, 0x00a25aff, 0xf70800ff, 0x00c35aff, 0xff0800ff, 0x00e35aff, 1707 0x000c00ff, 0x000063ff, 0x080c00ff, 0x002063ff, 0x100c00ff, 0x004163ff, 0x180c00ff, 0x006163ff, 1708 0x210c00ff, 0x008263ff, 0x290c00ff, 0x00a263ff, 0x310c00ff, 0x00c363ff, 0x390c00ff, 0x00e363ff, 1709 0x420c00ff, 0x00006bff, 0x4a0c00ff, 0x00206bff, 0x520c00ff, 0x00416bff, 0x5a0c00ff, 0x00616bff, 1710 0x630c00ff, 0x00826bff, 0x6b0c00ff, 0x00a26bff, 0x730c00ff, 0x00c36bff, 0x7b0c00ff, 0x00e36bff, 1711 0x840c00ff, 0x000073ff, 0x8c0c00ff, 0x002073ff, 0x940c00ff, 0x004173ff, 0x9c0c00ff, 0x006173ff, 1712 0xa50c00ff, 0x008273ff, 0xad0c00ff, 0x00a273ff, 0xb50c00ff, 0x00c373ff, 0xbd0c00ff, 0x00e373ff, 1713 0xc60c00ff, 0x00007bff, 0xce0c00ff, 0x00207bff, 0xd60c00ff, 0x00417bff, 0xde0c00ff, 0x00617bff, 1714 0xe70c00ff, 0x00827bff, 0xef0c00ff, 0x00a27bff, 0xf70c00ff, 0x00c37bff, 0xff0c00ff, 0x00e37bff, 1715 0x001000ff, 0x000084ff, 0x081000ff, 0x002084ff, 0x101000ff, 0x004184ff, 0x181000ff, 0x006184ff, 1716 0x211000ff, 0x008284ff, 0x291000ff, 0x00a284ff, 0x311000ff, 0x00c384ff, 0x391000ff, 0x00e384ff, 1717 0x421000ff, 0x00008cff, 0x4a1000ff, 0x00208cff, 0x521000ff, 0x00418cff, 0x5a1000ff, 0x00618cff, 1718 0x631000ff, 0x00828cff, 0x6b1000ff, 0x00a28cff, 0x731000ff, 0x00c38cff, 0x7b1000ff, 0x00e38cff, 1719 0x841000ff, 0x000094ff, 0x8c1000ff, 0x002094ff, 0x941000ff, 0x004194ff, 0x9c1000ff, 0x006194ff, 1720 0xa51000ff, 0x008294ff, 0xad1000ff, 0x00a294ff, 0xb51000ff, 0x00c394ff, 0xbd1000ff, 0x00e394ff, 1721 0xc61000ff, 0x00009cff, 0xce1000ff, 0x00209cff, 0xd61000ff, 0x00419cff, 0xde1000ff, 0x00619cff, 1722 0xe71000ff, 0x00829cff, 0xef1000ff, 0x00a29cff, 0xf71000ff, 0x00c39cff, 0xff1000ff, 0x00e39cff, 1723 0x001400ff, 0x0000a5ff, 0x081400ff, 0x0020a5ff, 0x101400ff, 0x0041a5ff, 0x181400ff, 0x0061a5ff, 1724 0x211400ff, 0x0082a5ff, 0x291400ff, 0x00a2a5ff, 0x311400ff, 0x00c3a5ff, 0x391400ff, 0x00e3a5ff, 1725 0x421400ff, 0x0000adff, 0x4a1400ff, 0x0020adff, 0x521400ff, 0x0041adff, 0x5a1400ff, 0x0061adff, 1726 0x631400ff, 0x0082adff, 0x6b1400ff, 0x00a2adff, 0x731400ff, 0x00c3adff, 0x7b1400ff, 0x00e3adff, 1727 0x841400ff, 0x0000b5ff, 0x8c1400ff, 0x0020b5ff, 0x941400ff, 0x0041b5ff, 0x9c1400ff, 0x0061b5ff, 1728 0xa51400ff, 0x0082b5ff, 0xad1400ff, 0x00a2b5ff, 0xb51400ff, 0x00c3b5ff, 0xbd1400ff, 0x00e3b5ff, 1729 0xc61400ff, 0x0000bdff, 0xce1400ff, 0x0020bdff, 0xd61400ff, 0x0041bdff, 0xde1400ff, 0x0061bdff, 1730 0xe71400ff, 0x0082bdff, 0xef1400ff, 0x00a2bdff, 0xf71400ff, 0x00c3bdff, 0xff1400ff, 0x00e3bdff, 1731 0x001800ff, 0x0000c6ff, 0x081800ff, 0x0020c6ff, 0x101800ff, 0x0041c6ff, 0x181800ff, 0x0061c6ff, 1732 0x211800ff, 0x0082c6ff, 0x291800ff, 0x00a2c6ff, 0x311800ff, 0x00c3c6ff, 0x391800ff, 0x00e3c6ff, 1733 0x421800ff, 0x0000ceff, 0x4a1800ff, 0x0020ceff, 0x521800ff, 0x0041ceff, 0x5a1800ff, 0x0061ceff, 1734 0x631800ff, 0x0082ceff, 0x6b1800ff, 0x00a2ceff, 0x731800ff, 0x00c3ceff, 0x7b1800ff, 0x00e3ceff, 1735 0x841800ff, 0x0000d6ff, 0x8c1800ff, 0x0020d6ff, 0x941800ff, 0x0041d6ff, 0x9c1800ff, 0x0061d6ff, 1736 0xa51800ff, 0x0082d6ff, 0xad1800ff, 0x00a2d6ff, 0xb51800ff, 0x00c3d6ff, 0xbd1800ff, 0x00e3d6ff, 1737 0xc61800ff, 0x0000deff, 0xce1800ff, 0x0020deff, 0xd61800ff, 0x0041deff, 0xde1800ff, 0x0061deff, 1738 0xe71800ff, 0x0082deff, 0xef1800ff, 0x00a2deff, 0xf71800ff, 0x00c3deff, 0xff1800ff, 0x00e3deff, 1739 0x001c00ff, 0x0000e7ff, 0x081c00ff, 0x0020e7ff, 0x101c00ff, 0x0041e7ff, 0x181c00ff, 0x0061e7ff, 1740 0x211c00ff, 0x0082e7ff, 0x291c00ff, 0x00a2e7ff, 0x311c00ff, 0x00c3e7ff, 0x391c00ff, 0x00e3e7ff, 1741 0x421c00ff, 0x0000efff, 0x4a1c00ff, 0x0020efff, 0x521c00ff, 0x0041efff, 0x5a1c00ff, 0x0061efff, 1742 0x631c00ff, 0x0082efff, 0x6b1c00ff, 0x00a2efff, 0x731c00ff, 0x00c3efff, 0x7b1c00ff, 0x00e3efff, 1743 0x841c00ff, 0x0000f7ff, 0x8c1c00ff, 0x0020f7ff, 0x941c00ff, 0x0041f7ff, 0x9c1c00ff, 0x0061f7ff, 1744 0xa51c00ff, 0x0082f7ff, 0xad1c00ff, 0x00a2f7ff, 0xb51c00ff, 0x00c3f7ff, 0xbd1c00ff, 0x00e3f7ff, 1745 0xc61c00ff, 0x0000ffff, 0xce1c00ff, 0x0020ffff, 0xd61c00ff, 0x0041ffff, 0xde1c00ff, 0x0061ffff, 1746 0xe71c00ff, 0x0082ffff, 0xef1c00ff, 0x00a2ffff, 0xf71c00ff, 0x00c3ffff, 0xff1c00ff, 0x00e3ffff 1747}; 1748 1749static void Blit_RGB565_BGRA8888(SDL_BlitInfo * info) 1750{ 1751 Blit_RGB565_32(info, RGB565_BGRA8888_LUT); 1752} 1753 1754/* *INDENT-ON* */ // clang-format on 1755 1756#endif // SDL_HAVE_BLIT_N_RGB565 1757 1758// blits 16 bit RGB<->RGBA with both surfaces having the same R,G,B fields 1759static void Blit2to2MaskAlpha(SDL_BlitInfo *info) 1760{ 1761 int width = info->dst_w; 1762 int height = info->dst_h; 1763 Uint16 *src = (Uint16 *)info->src; 1764 int srcskip = info->src_skip; 1765 Uint16 *dst = (Uint16 *)info->dst; 1766 int dstskip = info->dst_skip; 1767 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 1768 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 1769 1770 if (dstfmt->Amask) { 1771 // RGB->RGBA, SET_ALPHA 1772 Uint16 mask = ((Uint32)info->a >> (8 - dstfmt->Abits)) << dstfmt->Ashift; 1773 1774 while (height--) { 1775 /* *INDENT-OFF* */ // clang-format off 1776 DUFFS_LOOP_TRIVIAL( 1777 { 1778 *dst = *src | mask; 1779 ++dst; 1780 ++src; 1781 }, 1782 width); 1783 /* *INDENT-ON* */ // clang-format on 1784 src = (Uint16 *)((Uint8 *)src + srcskip); 1785 dst = (Uint16 *)((Uint8 *)dst + dstskip); 1786 } 1787 } else { 1788 // RGBA->RGB, NO_ALPHA 1789 Uint16 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; 1790 1791 while (height--) { 1792 /* *INDENT-OFF* */ // clang-format off 1793 DUFFS_LOOP_TRIVIAL( 1794 { 1795 *dst = *src & mask; 1796 ++dst; 1797 ++src; 1798 }, 1799 width); 1800 /* *INDENT-ON* */ // clang-format on 1801 src = (Uint16 *)((Uint8 *)src + srcskip); 1802 dst = (Uint16 *)((Uint8 *)dst + dstskip); 1803 } 1804 } 1805} 1806 1807// blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields 1808static void Blit4to4MaskAlpha(SDL_BlitInfo *info) 1809{ 1810 int width = info->dst_w; 1811 int height = info->dst_h; 1812 Uint32 *src = (Uint32 *)info->src; 1813 int srcskip = info->src_skip; 1814 Uint32 *dst = (Uint32 *)info->dst; 1815 int dstskip = info->dst_skip; 1816 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 1817 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 1818 1819 if (dstfmt->Amask) { 1820 // RGB->RGBA, SET_ALPHA 1821 Uint32 mask = ((Uint32)info->a >> (8 - dstfmt->Abits)) << dstfmt->Ashift; 1822 1823 while (height--) { 1824 /* *INDENT-OFF* */ // clang-format off 1825 DUFFS_LOOP_TRIVIAL( 1826 { 1827 *dst = *src | mask; 1828 ++dst; 1829 ++src; 1830 }, 1831 width); 1832 /* *INDENT-ON* */ // clang-format on 1833 src = (Uint32 *)((Uint8 *)src + srcskip); 1834 dst = (Uint32 *)((Uint8 *)dst + dstskip); 1835 } 1836 } else { 1837 // RGBA->RGB, NO_ALPHA 1838 Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; 1839 1840 while (height--) { 1841 /* *INDENT-OFF* */ // clang-format off 1842 DUFFS_LOOP_TRIVIAL( 1843 { 1844 *dst = *src & mask; 1845 ++dst; 1846 ++src; 1847 }, 1848 width); 1849 /* *INDENT-ON* */ // clang-format on 1850 src = (Uint32 *)((Uint8 *)src + srcskip); 1851 dst = (Uint32 *)((Uint8 *)dst + dstskip); 1852 } 1853 } 1854} 1855 1856// permutation for mapping srcfmt to dstfmt, overloading or not the alpha channel 1857static void get_permutation(const SDL_PixelFormatDetails *srcfmt, const SDL_PixelFormatDetails *dstfmt, 1858 int *_p0, int *_p1, int *_p2, int *_p3, int *_alpha_channel) 1859{ 1860 int alpha_channel = 0, p0, p1, p2, p3; 1861#if SDL_BYTEORDER == SDL_LIL_ENDIAN 1862 int Pixel = 0x04030201; // identity permutation 1863#else 1864 int Pixel = 0x01020304; // identity permutation 1865 int srcbpp = srcfmt->bytes_per_pixel; 1866 int dstbpp = dstfmt->bytes_per_pixel; 1867#endif 1868 1869 if (srcfmt->Amask) { 1870 RGBA_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2, p3); 1871 } else { 1872 RGB_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2); 1873 p3 = 0; 1874 } 1875 1876 if (dstfmt->Amask) { 1877 if (srcfmt->Amask) { 1878 PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, p3); 1879 } else { 1880 PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, 0); 1881 } 1882 } else { 1883 PIXEL_FROM_RGB(Pixel, dstfmt, p0, p1, p2); 1884 } 1885 1886#if SDL_BYTEORDER == SDL_LIL_ENDIAN 1887 p0 = Pixel & 0xFF; 1888 p1 = (Pixel >> 8) & 0xFF; 1889 p2 = (Pixel >> 16) & 0xFF; 1890 p3 = (Pixel >> 24) & 0xFF; 1891#else 1892 p3 = Pixel & 0xFF; 1893 p2 = (Pixel >> 8) & 0xFF; 1894 p1 = (Pixel >> 16) & 0xFF; 1895 p0 = (Pixel >> 24) & 0xFF; 1896#endif 1897 1898 if (p0 == 0) { 1899 p0 = 1; 1900 alpha_channel = 0; 1901 } else if (p1 == 0) { 1902 p1 = 1; 1903 alpha_channel = 1; 1904 } else if (p2 == 0) { 1905 p2 = 1; 1906 alpha_channel = 2; 1907 } else if (p3 == 0) { 1908 p3 = 1; 1909 alpha_channel = 3; 1910 } 1911 1912#if SDL_BYTEORDER == SDL_LIL_ENDIAN 1913#else 1914 if (srcbpp == 3 && dstbpp == 4) { 1915 if (p0 != 1) { 1916 p0--; 1917 } 1918 if (p1 != 1) { 1919 p1--; 1920 } 1921 if (p2 != 1) { 1922 p2--; 1923 } 1924 if (p3 != 1) { 1925 p3--; 1926 } 1927 } else if (srcbpp == 4 && dstbpp == 3) { 1928 p0 = p1; 1929 p1 = p2; 1930 p2 = p3; 1931 } 1932#endif 1933 *_p0 = p0 - 1; 1934 *_p1 = p1 - 1; 1935 *_p2 = p2 - 1; 1936 *_p3 = p3 - 1; 1937 1938 if (_alpha_channel) { 1939 *_alpha_channel = alpha_channel; 1940 } 1941} 1942 1943static void BlitNtoN(SDL_BlitInfo *info) 1944{ 1945 int width = info->dst_w; 1946 int height = info->dst_h; 1947 Uint8 *src = info->src; 1948 int srcskip = info->src_skip; 1949 Uint8 *dst = info->dst; 1950 int dstskip = info->dst_skip; 1951 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 1952 int srcbpp = srcfmt->bytes_per_pixel; 1953 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 1954 int dstbpp = dstfmt->bytes_per_pixel; 1955 unsigned alpha = dstfmt->Amask ? info->a : 0; 1956 1957#if HAVE_FAST_WRITE_INT8 1958 // Blit with permutation: 4->4 1959 if (srcbpp == 4 && dstbpp == 4 && 1960 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) && 1961 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) { 1962 1963 // Find the appropriate permutation 1964 int alpha_channel, p0, p1, p2, p3; 1965 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel); 1966 1967 while (height--) { 1968 /* *INDENT-OFF* */ // clang-format off 1969 DUFFS_LOOP( 1970 { 1971 dst[0] = src[p0]; 1972 dst[1] = src[p1]; 1973 dst[2] = src[p2]; 1974 dst[3] = src[p3]; 1975 dst[alpha_channel] = (Uint8)alpha; 1976 src += 4; 1977 dst += 4; 1978 }, width); 1979 /* *INDENT-ON* */ // clang-format on 1980 src += srcskip; 1981 dst += dstskip; 1982 } 1983 return; 1984 } 1985#endif 1986 1987 // Blit with permutation: 4->3 1988 if (srcbpp == 4 && dstbpp == 3 && 1989 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format)) { 1990 1991 // Find the appropriate permutation 1992 int p0, p1, p2, p3; 1993 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL); 1994 1995 while (height--) { 1996 /* *INDENT-OFF* */ // clang-format off 1997 DUFFS_LOOP( 1998 { 1999 dst[0] = src[p0]; 2000 dst[1] = src[p1]; 2001 dst[2] = src[p2]; 2002 src += 4; 2003 dst += 3; 2004 }, width); 2005 /* *INDENT-ON* */ // clang-format on 2006 src += srcskip; 2007 dst += dstskip; 2008 } 2009 return; 2010 } 2011 2012#if HAVE_FAST_WRITE_INT8 2013 // Blit with permutation: 3->4 2014 if (srcbpp == 3 && dstbpp == 4 && 2015 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) { 2016 2017 // Find the appropriate permutation 2018 int alpha_channel, p0, p1, p2, p3; 2019 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel); 2020 2021 while (height--) { 2022 /* *INDENT-OFF* */ // clang-format off 2023 DUFFS_LOOP( 2024 { 2025 dst[0] = src[p0]; 2026 dst[1] = src[p1]; 2027 dst[2] = src[p2]; 2028 dst[3] = src[p3]; 2029 dst[alpha_channel] = (Uint8)alpha; 2030 src += 3; 2031 dst += 4; 2032 }, width); 2033 /* *INDENT-ON* */ // clang-format on 2034 src += srcskip; 2035 dst += dstskip; 2036 } 2037 return; 2038 } 2039#endif 2040 2041 while (height--) { 2042 /* *INDENT-OFF* */ // clang-format off 2043 DUFFS_LOOP( 2044 { 2045 Uint32 Pixel; 2046 unsigned sR; 2047 unsigned sG; 2048 unsigned sB; 2049 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); 2050 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha); 2051 dst += dstbpp; 2052 src += srcbpp; 2053 }, 2054 width); 2055 /* *INDENT-ON* */ // clang-format on 2056 src += srcskip; 2057 dst += dstskip; 2058 } 2059} 2060 2061static void BlitNtoNCopyAlpha(SDL_BlitInfo *info) 2062{ 2063 int width = info->dst_w; 2064 int height = info->dst_h; 2065 Uint8 *src = info->src; 2066 int srcskip = info->src_skip; 2067 Uint8 *dst = info->dst; 2068 int dstskip = info->dst_skip; 2069 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 2070 int srcbpp = srcfmt->bytes_per_pixel; 2071 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 2072 int dstbpp = dstfmt->bytes_per_pixel; 2073 int c; 2074 2075#if HAVE_FAST_WRITE_INT8 2076 // Blit with permutation: 4->4 2077 if (srcbpp == 4 && dstbpp == 4 && 2078 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) && 2079 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) { 2080 2081 // Find the appropriate permutation 2082 int p0, p1, p2, p3; 2083 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL); 2084 2085 while (height--) { 2086 /* *INDENT-OFF* */ // clang-format off 2087 DUFFS_LOOP( 2088 { 2089 dst[0] = src[p0]; 2090 dst[1] = src[p1]; 2091 dst[2] = src[p2]; 2092 dst[3] = src[p3]; 2093 src += 4; 2094 dst += 4; 2095 }, width); 2096 /* *INDENT-ON* */ // clang-format on 2097 src += srcskip; 2098 dst += dstskip; 2099 } 2100 return; 2101 } 2102#endif 2103 2104 while (height--) { 2105 for (c = width; c; --c) { 2106 Uint32 Pixel; 2107 unsigned sR, sG, sB, sA; 2108 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); 2109 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA); 2110 dst += dstbpp; 2111 src += srcbpp; 2112 } 2113 src += srcskip; 2114 dst += dstskip; 2115 } 2116} 2117 2118static void Blit2to2Key(SDL_BlitInfo *info) 2119{ 2120 int width = info->dst_w; 2121 int height = info->dst_h; 2122 Uint16 *srcp = (Uint16 *)info->src; 2123 int srcskip = info->src_skip; 2124 Uint16 *dstp = (Uint16 *)info->dst; 2125 int dstskip = info->dst_skip; 2126 Uint32 ckey = info->colorkey; 2127 Uint32 rgbmask = ~info->src_fmt->Amask; 2128 2129 // Set up some basic variables 2130 srcskip /= 2; 2131 dstskip /= 2; 2132 ckey &= rgbmask; 2133 2134 while (height--) { 2135 /* *INDENT-OFF* */ // clang-format off 2136 DUFFS_LOOP_TRIVIAL( 2137 { 2138 if ( (*srcp & rgbmask) != ckey ) { 2139 *dstp = *srcp; 2140 } 2141 dstp++; 2142 srcp++; 2143 }, 2144 width); 2145 /* *INDENT-ON* */ // clang-format on 2146 srcp += srcskip; 2147 dstp += dstskip; 2148 } 2149} 2150 2151static void BlitNtoNKey(SDL_BlitInfo *info) 2152{ 2153 int width = info->dst_w; 2154 int height = info->dst_h; 2155 Uint8 *src = info->src; 2156 int srcskip = info->src_skip; 2157 Uint8 *dst = info->dst; 2158 int dstskip = info->dst_skip; 2159 Uint32 ckey = info->colorkey; 2160 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 2161 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 2162 int srcbpp = srcfmt->bytes_per_pixel; 2163 int dstbpp = dstfmt->bytes_per_pixel; 2164 unsigned alpha = dstfmt->Amask ? info->a : 0; 2165 Uint32 rgbmask = ~srcfmt->Amask; 2166 int sfmt = srcfmt->format; 2167 int dfmt = dstfmt->format; 2168 2169 // Set up some basic variables 2170 ckey &= rgbmask; 2171 2172 // BPP 4, same rgb 2173 if (srcbpp == 4 && dstbpp == 4 && srcfmt->Rmask == dstfmt->Rmask && srcfmt->Gmask == dstfmt->Gmask && srcfmt->Bmask == dstfmt->Bmask) { 2174 Uint32 *src32 = (Uint32 *)src; 2175 Uint32 *dst32 = (Uint32 *)dst; 2176 2177 if (dstfmt->Amask) { 2178 // RGB->RGBA, SET_ALPHA 2179 Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift; 2180 while (height--) { 2181 /* *INDENT-OFF* */ // clang-format off 2182 DUFFS_LOOP_TRIVIAL( 2183 { 2184 if ((*src32 & rgbmask) != ckey) { 2185 *dst32 = *src32 | mask; 2186 } 2187 ++dst32; 2188 ++src32; 2189 }, width); 2190 /* *INDENT-ON* */ // clang-format on 2191 src32 = (Uint32 *)((Uint8 *)src32 + srcskip); 2192 dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip); 2193 } 2194 return; 2195 } else { 2196 // RGBA->RGB, NO_ALPHA 2197 Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; 2198 while (height--) { 2199 /* *INDENT-OFF* */ // clang-format off 2200 DUFFS_LOOP_TRIVIAL( 2201 { 2202 if ((*src32 & rgbmask) != ckey) { 2203 *dst32 = *src32 & mask; 2204 } 2205 ++dst32; 2206 ++src32; 2207 }, width); 2208 /* *INDENT-ON* */ // clang-format on 2209 src32 = (Uint32 *)((Uint8 *)src32 + srcskip); 2210 dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip); 2211 } 2212 return; 2213 } 2214 } 2215 2216#if HAVE_FAST_WRITE_INT8 2217 // Blit with permutation: 4->4 2218 if (srcbpp == 4 && dstbpp == 4 && 2219 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) && 2220 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) { 2221 2222 // Find the appropriate permutation 2223 int alpha_channel, p0, p1, p2, p3; 2224 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel); 2225 2226 while (height--) { 2227 /* *INDENT-OFF* */ // clang-format off 2228 DUFFS_LOOP( 2229 { 2230 Uint32 *src32 = (Uint32 *)src; 2231 2232 if ((*src32 & rgbmask) != ckey) { 2233 dst[0] = src[p0]; 2234 dst[1] = src[p1]; 2235 dst[2] = src[p2]; 2236 dst[3] = src[p3]; 2237 dst[alpha_channel] = (Uint8)alpha; 2238 } 2239 src += 4; 2240 dst += 4; 2241 }, width); 2242 /* *INDENT-ON* */ // clang-format on 2243 src += srcskip; 2244 dst += dstskip; 2245 } 2246 return; 2247 } 2248#endif 2249 2250 // BPP 3, same rgb triplet 2251 if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_RGB24) || 2252 (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_BGR24)) { 2253 2254#if SDL_BYTEORDER == SDL_LIL_ENDIAN 2255 Uint8 k0 = ckey & 0xFF; 2256 Uint8 k1 = (ckey >> 8) & 0xFF; 2257 Uint8 k2 = (ckey >> 16) & 0xFF; 2258#else 2259 Uint8 k0 = (ckey >> 16) & 0xFF; 2260 Uint8 k1 = (ckey >> 8) & 0xFF; 2261 Uint8 k2 = ckey & 0xFF; 2262#endif 2263 2264 while (height--) { 2265 /* *INDENT-OFF* */ // clang-format off 2266 DUFFS_LOOP( 2267 { 2268 Uint8 s0 = src[0]; 2269 Uint8 s1 = src[1]; 2270 Uint8 s2 = src[2]; 2271 2272 if (k0 != s0 || k1 != s1 || k2 != s2) { 2273 dst[0] = s0; 2274 dst[1] = s1; 2275 dst[2] = s2; 2276 } 2277 src += 3; 2278 dst += 3; 2279 }, 2280 width); 2281 /* *INDENT-ON* */ // clang-format on 2282 src += srcskip; 2283 dst += dstskip; 2284 } 2285 return; 2286 } 2287 2288 // BPP 3, inversed rgb triplet 2289 if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_BGR24) || 2290 (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_RGB24)) { 2291 2292#if SDL_BYTEORDER == SDL_LIL_ENDIAN 2293 Uint8 k0 = ckey & 0xFF; 2294 Uint8 k1 = (ckey >> 8) & 0xFF; 2295 Uint8 k2 = (ckey >> 16) & 0xFF; 2296#else 2297 Uint8 k0 = (ckey >> 16) & 0xFF; 2298 Uint8 k1 = (ckey >> 8) & 0xFF; 2299 Uint8 k2 = ckey & 0xFF; 2300#endif 2301 2302 while (height--) { 2303 /* *INDENT-OFF* */ // clang-format off 2304 DUFFS_LOOP( 2305 { 2306 Uint8 s0 = src[0]; 2307 Uint8 s1 = src[1]; 2308 Uint8 s2 = src[2]; 2309 if (k0 != s0 || k1 != s1 || k2 != s2) { 2310 // Inversed RGB 2311 dst[0] = s2; 2312 dst[1] = s1; 2313 dst[2] = s0; 2314 } 2315 src += 3; 2316 dst += 3; 2317 }, 2318 width); 2319 /* *INDENT-ON* */ // clang-format on 2320 src += srcskip; 2321 dst += dstskip; 2322 } 2323 return; 2324 } 2325 2326 // Blit with permutation: 4->3 2327 if (srcbpp == 4 && dstbpp == 3 && 2328 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format)) { 2329 2330 // Find the appropriate permutation 2331 int p0, p1, p2, p3; 2332 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL); 2333 2334 while (height--) { 2335 /* *INDENT-OFF* */ // clang-format off 2336 DUFFS_LOOP( 2337 { 2338 Uint32 *src32 = (Uint32 *)src; 2339 if ((*src32 & rgbmask) != ckey) { 2340 dst[0] = src[p0]; 2341 dst[1] = src[p1]; 2342 dst[2] = src[p2]; 2343 } 2344 src += 4; 2345 dst += 3; 2346 }, width); 2347 /* *INDENT-ON* */ // clang-format on 2348 src += srcskip; 2349 dst += dstskip; 2350 } 2351 return; 2352 } 2353 2354#if HAVE_FAST_WRITE_INT8 2355 // Blit with permutation: 3->4 2356 if (srcbpp == 3 && dstbpp == 4 && 2357 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) { 2358 2359#if SDL_BYTEORDER == SDL_LIL_ENDIAN 2360 Uint8 k0 = ckey & 0xFF; 2361 Uint8 k1 = (ckey >> 8) & 0xFF; 2362 Uint8 k2 = (ckey >> 16) & 0xFF; 2363#else 2364 Uint8 k0 = (ckey >> 16) & 0xFF; 2365 Uint8 k1 = (ckey >> 8) & 0xFF; 2366 Uint8 k2 = ckey & 0xFF; 2367#endif 2368 2369 // Find the appropriate permutation 2370 int alpha_channel, p0, p1, p2, p3; 2371 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel); 2372 2373 while (height--) { 2374 /* *INDENT-OFF* */ // clang-format off 2375 DUFFS_LOOP( 2376 { 2377 Uint8 s0 = src[0]; 2378 Uint8 s1 = src[1]; 2379 Uint8 s2 = src[2]; 2380 2381 if (k0 != s0 || k1 != s1 || k2 != s2) { 2382 dst[0] = src[p0]; 2383 dst[1] = src[p1]; 2384 dst[2] = src[p2]; 2385 dst[3] = src[p3]; 2386 dst[alpha_channel] = (Uint8)alpha; 2387 } 2388 src += 3; 2389 dst += 4; 2390 }, width); 2391 /* *INDENT-ON* */ // clang-format on 2392 src += srcskip; 2393 dst += dstskip; 2394 } 2395 return; 2396 } 2397#endif 2398 2399 while (height--) { 2400 /* *INDENT-OFF* */ // clang-format off 2401 DUFFS_LOOP( 2402 { 2403 Uint32 Pixel; 2404 unsigned sR; 2405 unsigned sG; 2406 unsigned sB; 2407 RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel); 2408 if ( (Pixel & rgbmask) != ckey ) { 2409 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); 2410 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha); 2411 } 2412 dst += dstbpp; 2413 src += srcbpp; 2414 }, 2415 width); 2416 /* *INDENT-ON* */ // clang-format on 2417 src += srcskip; 2418 dst += dstskip; 2419 } 2420} 2421 2422static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info) 2423{ 2424 int width = info->dst_w; 2425 int height = info->dst_h; 2426 Uint8 *src = info->src; 2427 int srcskip = info->src_skip; 2428 Uint8 *dst = info->dst; 2429 int dstskip = info->dst_skip; 2430 Uint32 ckey = info->colorkey; 2431 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 2432 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 2433 Uint32 rgbmask = ~srcfmt->Amask; 2434 2435 Uint8 srcbpp; 2436 Uint8 dstbpp; 2437 Uint32 Pixel; 2438 unsigned sR, sG, sB, sA; 2439 2440 // Set up some basic variables 2441 srcbpp = srcfmt->bytes_per_pixel; 2442 dstbpp = dstfmt->bytes_per_pixel; 2443 ckey &= rgbmask; 2444 2445 // Fastpath: same source/destination format, with Amask, bpp 32, loop is vectorized. ~10x faster 2446 if (srcfmt->format == dstfmt->format) { 2447 2448 if (srcfmt->format == SDL_PIXELFORMAT_ARGB8888 || 2449 srcfmt->format == SDL_PIXELFORMAT_ABGR8888 || 2450 srcfmt->format == SDL_PIXELFORMAT_BGRA8888 || 2451 srcfmt->format == SDL_PIXELFORMAT_RGBA8888) { 2452 2453 Uint32 *src32 = (Uint32 *)src; 2454 Uint32 *dst32 = (Uint32 *)dst; 2455 while (height--) { 2456 /* *INDENT-OFF* */ // clang-format off 2457 DUFFS_LOOP_TRIVIAL( 2458 { 2459 if ((*src32 & rgbmask) != ckey) { 2460 *dst32 = *src32; 2461 } 2462 ++src32; 2463 ++dst32; 2464 }, 2465 width); 2466 /* *INDENT-ON* */ // clang-format on 2467 src32 = (Uint32 *)((Uint8 *)src32 + srcskip); 2468 dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip); 2469 } 2470 } 2471 return; 2472 } 2473 2474#if HAVE_FAST_WRITE_INT8 2475 // Blit with permutation: 4->4 2476 if (srcbpp == 4 && dstbpp == 4 && 2477 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) && 2478 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) { 2479 2480 // Find the appropriate permutation 2481 int p0, p1, p2, p3; 2482 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL); 2483 2484 while (height--) { 2485 /* *INDENT-OFF* */ // clang-format off 2486 DUFFS_LOOP( 2487 { 2488 Uint32 *src32 = (Uint32 *)src; 2489 if ((*src32 & rgbmask) != ckey) { 2490 dst[0] = src[p0]; 2491 dst[1] = src[p1]; 2492 dst[2] = src[p2]; 2493 dst[3] = src[p3]; 2494 } 2495 src += 4; 2496 dst += 4; 2497 }, width); 2498 /* *INDENT-ON* */ // clang-format on 2499 src += srcskip; 2500 dst += dstskip; 2501 } 2502 return; 2503 } 2504#endif 2505 2506 while (height--) { 2507 /* *INDENT-OFF* */ // clang-format off 2508 DUFFS_LOOP( 2509 { 2510 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); 2511 if ( (Pixel & rgbmask) != ckey ) { 2512 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA); 2513 } 2514 dst += dstbpp; 2515 src += srcbpp; 2516 }, 2517 width); 2518 /* *INDENT-ON* */ // clang-format on 2519 src += srcskip; 2520 dst += dstskip; 2521 } 2522} 2523 2524// Convert between two 8888 pixels with differing formats. 2525#define SWIZZLE_8888_SRC_ALPHA(src, dst, srcfmt, dstfmt) \ 2526 do { \ 2527 dst = (((src >> srcfmt->Rshift) & 0xFF) << dstfmt->Rshift) | \ 2528 (((src >> srcfmt->Gshift) & 0xFF) << dstfmt->Gshift) | \ 2529 (((src >> srcfmt->Bshift) & 0xFF) << dstfmt->Bshift) | \ 2530 (((src >> srcfmt->Ashift) & 0xFF) << dstfmt->Ashift); \ 2531 } while (0) 2532 2533#define SWIZZLE_8888_DST_ALPHA(src, dst, srcfmt, dstfmt, dstAmask) \ 2534 do { \ 2535 dst = (((src >> srcfmt->Rshift) & 0xFF) << dstfmt->Rshift) | \ 2536 (((src >> srcfmt->Gshift) & 0xFF) << dstfmt->Gshift) | \ 2537 (((src >> srcfmt->Bshift) & 0xFF) << dstfmt->Bshift) | \ 2538 dstAmask; \ 2539 } while (0) 2540 2541#ifdef SDL_SSE4_1_INTRINSICS 2542 2543static void SDL_TARGETING("sse4.1") Blit8888to8888PixelSwizzleSSE41(SDL_BlitInfo *info) 2544{ 2545 int width = info->dst_w; 2546 int height = info->dst_h; 2547 Uint8 *src = info->src; 2548 int srcskip = info->src_skip; 2549 Uint8 *dst = info->dst; 2550 int dstskip = info->dst_skip; 2551 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 2552 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 2553 bool fill_alpha = (!srcfmt->Amask || !dstfmt->Amask); 2554 Uint32 srcAmask, srcAshift; 2555 Uint32 dstAmask, dstAshift; 2556 2557 SDL_Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift); 2558 SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); 2559 2560 // The byte offsets for the start of each pixel 2561 const __m128i mask_offsets = _mm_set_epi8( 2562 12, 12, 12, 12, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0); 2563 2564 const __m128i convert_mask = _mm_add_epi32( 2565 _mm_set1_epi32( 2566 ((srcfmt->Rshift >> 3) << dstfmt->Rshift) | 2567 ((srcfmt->Gshift >> 3) << dstfmt->Gshift) | 2568 ((srcfmt->Bshift >> 3) << dstfmt->Bshift) | 2569 ((srcAshift >> 3) << dstAshift)), 2570 mask_offsets); 2571 2572 const __m128i alpha_fill_mask = _mm_set1_epi32((int)dstAmask); 2573 2574 while (height--) { 2575 int i = 0; 2576 2577 for (; i + 4 <= width; i += 4) { 2578 // Load 4 src pixels 2579 __m128i src128 = _mm_loadu_si128((__m128i *)src); 2580 2581 // Convert to dst format 2582 // This is an SSSE3 instruction 2583 src128 = _mm_shuffle_epi8(src128, convert_mask); 2584 2585 if (fill_alpha) { 2586 // Set the alpha channels of src to 255 2587 src128 = _mm_or_si128(src128, alpha_fill_mask); 2588 } 2589 2590 // Save the result 2591 _mm_storeu_si128((__m128i *)dst, src128); 2592 2593 src += 16; 2594 dst += 16; 2595 } 2596 2597 for (; i < width; ++i) { 2598 Uint32 src32 = *(Uint32 *)src; 2599 Uint32 dst32; 2600 if (fill_alpha) { 2601 SWIZZLE_8888_DST_ALPHA(src32, dst32, srcfmt, dstfmt, dstAmask); 2602 } else { 2603 SWIZZLE_8888_SRC_ALPHA(src32, dst32, srcfmt, dstfmt); 2604 } 2605 *(Uint32 *)dst = dst32; 2606 src += 4; 2607 dst += 4; 2608 } 2609 2610 src += srcskip; 2611 dst += dstskip; 2612 } 2613} 2614 2615#endif 2616 2617#ifdef SDL_AVX2_INTRINSICS 2618 2619static void SDL_TARGETING("avx2") Blit8888to8888PixelSwizzleAVX2(SDL_BlitInfo *info) 2620{ 2621 int width = info->dst_w; 2622 int height = info->dst_h; 2623 Uint8 *src = info->src; 2624 int srcskip = info->src_skip; 2625 Uint8 *dst = info->dst; 2626 int dstskip = info->dst_skip; 2627 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 2628 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 2629 bool fill_alpha = (!srcfmt->Amask || !dstfmt->Amask); 2630 Uint32 srcAmask, srcAshift; 2631 Uint32 dstAmask, dstAshift; 2632 2633 SDL_Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift); 2634 SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); 2635 2636 // The byte offsets for the start of each pixel 2637 const __m256i mask_offsets = _mm256_set_epi8( 2638 28, 28, 28, 28, 24, 24, 24, 24, 20, 20, 20, 20, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0); 2639 2640 const __m256i convert_mask = _mm256_add_epi32( 2641 _mm256_set1_epi32( 2642 ((srcfmt->Rshift >> 3) << dstfmt->Rshift) | 2643 ((srcfmt->Gshift >> 3) << dstfmt->Gshift) | 2644 ((srcfmt->Bshift >> 3) << dstfmt->Bshift) | 2645 ((srcAshift >> 3) << dstAshift)), 2646 mask_offsets); 2647 2648 const __m256i alpha_fill_mask = _mm256_set1_epi32((int)dstAmask); 2649 2650 while (height--) { 2651 int i = 0; 2652 2653 for (; i + 8 <= width; i += 8) { 2654 // Load 8 src pixels 2655 __m256i src256 = _mm256_loadu_si256((__m256i *)src); 2656 2657 // Convert to dst format 2658 src256 = _mm256_shuffle_epi8(src256, convert_mask); 2659 2660 if (fill_alpha) { 2661 // Set the alpha channels of src to 255 2662 src256 = _mm256_or_si256(src256, alpha_fill_mask); 2663 } 2664 2665 // Save the result 2666 _mm256_storeu_si256((__m256i *)dst, src256); 2667 2668 src += 32; 2669 dst += 32; 2670 } 2671 2672 for (; i < width; ++i) { 2673 Uint32 src32 = *(Uint32 *)src; 2674 Uint32 dst32; 2675 if (fill_alpha) { 2676 SWIZZLE_8888_DST_ALPHA(src32, dst32, srcfmt, dstfmt, dstAmask); 2677 } else { 2678 SWIZZLE_8888_SRC_ALPHA(src32, dst32, srcfmt, dstfmt); 2679 } 2680 *(Uint32 *)dst = dst32; 2681 src += 4; 2682 dst += 4; 2683 } 2684 2685 src += srcskip; 2686 dst += dstskip; 2687 } 2688} 2689 2690#endif 2691 2692#if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64)) 2693 2694static void Blit8888to8888PixelSwizzleNEON(SDL_BlitInfo *info) 2695{ 2696 int width = info->dst_w; 2697 int height = info->dst_h; 2698 Uint8 *src = info->src; 2699 int srcskip = info->src_skip; 2700 Uint8 *dst = info->dst; 2701 int dstskip = info->dst_skip; 2702 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 2703 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 2704 bool fill_alpha = (!srcfmt->Amask || !dstfmt->Amask); 2705 Uint32 srcAmask, srcAshift; 2706 Uint32 dstAmask, dstAshift; 2707 2708 SDL_Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift); 2709 SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); 2710 2711 // The byte offsets for the start of each pixel 2712 const uint8x16_t mask_offsets = vreinterpretq_u8_u64(vcombine_u64( 2713 vcreate_u64(0x0404040400000000), vcreate_u64(0x0c0c0c0c08080808))); 2714 2715 const uint8x16_t convert_mask = vreinterpretq_u8_u32(vaddq_u32( 2716 vreinterpretq_u32_u8(mask_offsets), 2717 vdupq_n_u32( 2718 ((srcfmt->Rshift >> 3) << dstfmt->Rshift) | 2719 ((srcfmt->Gshift >> 3) << dstfmt->Gshift) | 2720 ((srcfmt->Bshift >> 3) << dstfmt->Bshift) | 2721 ((srcAshift >> 3) << dstAshift)))); 2722 2723 const uint8x16_t alpha_fill_mask = vreinterpretq_u8_u32(vdupq_n_u32(dstAmask)); 2724 2725 while (height--) { 2726 int i = 0; 2727 2728 for (; i + 4 <= width; i += 4) { 2729 // Load 4 src pixels 2730 uint8x16_t src128 = vld1q_u8(src); 2731 2732 // Convert to dst format 2733 src128 = vqtbl1q_u8(src128, convert_mask); 2734 2735 if (fill_alpha) { 2736 // Set the alpha channels of src to 255 2737 src128 = vorrq_u8(src128, alpha_fill_mask); 2738 } 2739 2740 // Save the result 2741 vst1q_u8(dst, src128); 2742 2743 src += 16; 2744 dst += 16; 2745 } 2746 2747 // Process 1 pixel per iteration, max 3 iterations, same calculations as above 2748 for (; i < width; ++i) { 2749 // Top 32-bits will be not used in src32 2750 uint8x8_t src32 = vreinterpret_u8_u32(vld1_dup_u32((Uint32 *)src)); 2751 2752 // Convert to dst format 2753 src32 = vtbl1_u8(src32, vget_low_u8(convert_mask)); 2754 2755 if (fill_alpha) { 2756 // Set the alpha channels of src to 255 2757 src32 = vorr_u8(src32, vget_low_u8(alpha_fill_mask)); 2758 } 2759 2760 // Save the result, only low 32-bits 2761 vst1_lane_u32((Uint32 *)dst, vreinterpret_u32_u8(src32), 0); 2762 2763 src += 4; 2764 dst += 4; 2765 } 2766 2767 src += srcskip; 2768 dst += dstskip; 2769 } 2770} 2771 2772#endif 2773 2774// Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet 2775static void Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo *info) 2776{ 2777 int width = info->dst_w; 2778 int height = info->dst_h; 2779 Uint8 *src = info->src; 2780 int srcskip = info->src_skip; 2781 Uint8 *dst = info->dst; 2782 int dstskip = info->dst_skip; 2783 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 2784 int srcbpp = srcfmt->bytes_per_pixel; 2785 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 2786 int dstbpp = dstfmt->bytes_per_pixel; 2787 2788 if (dstfmt->Amask) { 2789 // SET_ALPHA 2790 Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift; 2791#if SDL_BYTEORDER == SDL_LIL_ENDIAN 2792 int i0 = 0, i1 = 1, i2 = 2; 2793#else 2794 int i0 = srcbpp - 1 - 0; 2795 int i1 = srcbpp - 1 - 1; 2796 int i2 = srcbpp - 1 - 2; 2797#endif 2798 while (height--) { 2799 /* *INDENT-OFF* */ // clang-format off 2800 DUFFS_LOOP( 2801 { 2802 Uint32 *dst32 = (Uint32 *)dst; 2803 Uint8 s0 = src[i0]; 2804 Uint8 s1 = src[i1]; 2805 Uint8 s2 = src[i2]; 2806 *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask; 2807 dst += 4; 2808 src += srcbpp; 2809 }, width); 2810 /* *INDENT-ON* */ // clang-format on 2811 src += srcskip; 2812 dst += dstskip; 2813 } 2814 } else { 2815 // NO_ALPHA 2816#if SDL_BYTEORDER == SDL_LIL_ENDIAN 2817 int i0 = 0, i1 = 1, i2 = 2; 2818 int j0 = 0, j1 = 1, j2 = 2; 2819#else 2820 int i0 = srcbpp - 1 - 0; 2821 int i1 = srcbpp - 1 - 1; 2822 int i2 = srcbpp - 1 - 2; 2823 int j0 = dstbpp - 1 - 0; 2824 int j1 = dstbpp - 1 - 1; 2825 int j2 = dstbpp - 1 - 2; 2826#endif 2827 while (height--) { 2828 /* *INDENT-OFF* */ // clang-format off 2829 DUFFS_LOOP( 2830 { 2831 Uint8 s0 = src[i0]; 2832 Uint8 s1 = src[i1]; 2833 Uint8 s2 = src[i2]; 2834 dst[j0] = s0; 2835 dst[j1] = s1; 2836 dst[j2] = s2; 2837 dst += dstbpp; 2838 src += srcbpp; 2839 }, width); 2840 /* *INDENT-ON* */ // clang-format on 2841 src += srcskip; 2842 dst += dstskip; 2843 } 2844 } 2845} 2846 2847// Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet 2848static void Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo *info) 2849{ 2850 int width = info->dst_w; 2851 int height = info->dst_h; 2852 Uint8 *src = info->src; 2853 int srcskip = info->src_skip; 2854 Uint8 *dst = info->dst; 2855 int dstskip = info->dst_skip; 2856 const SDL_PixelFormatDetails *srcfmt = info->src_fmt; 2857 int srcbpp = srcfmt->bytes_per_pixel; 2858 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; 2859 int dstbpp = dstfmt->bytes_per_pixel; 2860 2861 if (dstfmt->Amask) { 2862 if (srcfmt->Amask) { 2863 // COPY_ALPHA 2864 // Only to switch ABGR8888 <-> ARGB8888 2865 while (height--) { 2866#if SDL_BYTEORDER == SDL_LIL_ENDIAN 2867 int i0 = 0, i1 = 1, i2 = 2, i3 = 3; 2868#else 2869 int i0 = 3, i1 = 2, i2 = 1, i3 = 0; 2870#endif 2871 /* *INDENT-OFF* */ // clang-format off 2872 DUFFS_LOOP( 2873 { 2874 Uint32 *dst32 = (Uint32 *)dst; 2875 Uint8 s0 = src[i0]; 2876 Uint8 s1 = src[i1]; 2877 Uint8 s2 = src[i2]; 2878 Uint32 alphashift = ((Uint32)src[i3]) << dstfmt->Ashift; 2879 // inversed, compared to Blit_3or4_to_3or4__same_rgb 2880 *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift; 2881 dst += 4; 2882 src += 4; 2883 }, width); 2884 /* *INDENT-ON* */ // clang-format on 2885 src += srcskip; 2886 dst += dstskip; 2887 } 2888 } else { 2889 // SET_ALPHA 2890 Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift; 2891#if SDL_BYTEORDER == SDL_LIL_ENDIAN 2892 int i0 = 0, i1 = 1, i2 = 2; 2893#else 2894 int i0 = srcbpp - 1 - 0; 2895 int i1 = srcbpp - 1 - 1; 2896 int i2 = srcbpp - 1 - 2; 2897#endif 2898 while (height--) { 2899 /* *INDENT-OFF* */ // clang-format off 2900 DUFFS_LOOP( 2901 { 2902 Uint32 *dst32 = (Uint32 *)dst; 2903 Uint8 s0 = src[i0]; 2904 Uint8 s1 = src[i1]; 2905 Uint8 s2 = src[i2]; 2906 // inversed, compared to Blit_3or4_to_3or4__same_rgb 2907 *dst32 = (s0 << 16) | (s1 << 8) | (s2) | mask; 2908 dst += 4; 2909 src += srcbpp; 2910 }, width); 2911 /* *INDENT-ON* */ // clang-format on 2912 src += srcskip; 2913 dst += dstskip; 2914 } 2915 } 2916 } else { 2917 // NO_ALPHA 2918#if SDL_BYTEORDER == SDL_LIL_ENDIAN 2919 int i0 = 0, i1 = 1, i2 = 2; 2920 int j0 = 2, j1 = 1, j2 = 0; 2921#else 2922 int i0 = srcbpp - 1 - 0; 2923 int i1 = srcbpp - 1 - 1; 2924 int i2 = srcbpp - 1 - 2; 2925 int j0 = dstbpp - 1 - 2; 2926 int j1 = dstbpp - 1 - 1; 2927 int j2 = dstbpp - 1 - 0; 2928#endif 2929 while (height--) { 2930 /* *INDENT-OFF* */ // clang-format off 2931 DUFFS_LOOP( 2932 { 2933 Uint8 s0 = src[i0]; 2934 Uint8 s1 = src[i1]; 2935 Uint8 s2 = src[i2]; 2936 // inversed, compared to Blit_3or4_to_3or4__same_rgb 2937 dst[j0] = s0; 2938 dst[j1] = s1; 2939 dst[j2] = s2; 2940 dst += dstbpp; 2941 src += srcbpp; 2942 }, width); 2943 /* *INDENT-ON* */ // clang-format on 2944 src += srcskip; 2945 dst += dstskip; 2946 } 2947 } 2948} 2949 2950// Normal N to N optimized blitters 2951#define NO_ALPHA 1 2952#define SET_ALPHA 2 2953#define COPY_ALPHA 4 2954struct blit_table 2955{ 2956 Uint32 srcR, srcG, srcB; 2957 int dstbpp; 2958 Uint32 dstR, dstG, dstB; 2959 Uint32 blit_features; 2960 SDL_BlitFunc blitfunc; 2961 Uint32 alpha; // bitwise NO_ALPHA, SET_ALPHA, COPY_ALPHA 2962}; 2963static const struct blit_table normal_blit_1[] = { 2964 // Default for 8-bit RGB source, never optimized 2965 { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 } 2966}; 2967 2968static const struct blit_table normal_blit_2[] = { 2969#ifdef SDL_ALTIVEC_BLITTERS 2970#ifdef BROKEN_ALTIVEC_BLITTERS 2971 // has-altivec 2972 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000, 2973 BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2974 { 0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000, 2975 BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2976#endif // BROKEN_ALTIVEC_BLITTERS 2977#endif 2978#ifdef SDL_SSE4_1_INTRINSICS 2979 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF, 2980 BLIT_FEATURE_HAS_SSE41, Blit_RGB565_32_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2981 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000, 2982 BLIT_FEATURE_HAS_SSE41, Blit_RGB565_32_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2983 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00, 2984 BLIT_FEATURE_HAS_SSE41, Blit_RGB565_32_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2985 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000, 2986 BLIT_FEATURE_HAS_SSE41, Blit_RGB565_32_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2987#endif 2988#ifdef SDL_HAVE_BLIT_N_RGB565 2989 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF, 2990 0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2991 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000, 2992 0, Blit_RGB565_ABGR8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2993 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00, 2994 0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2995 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000, 2996 0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 2997#endif 2998 // Default for 16-bit RGB source, used if no other blitter matches 2999 { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 } 3000}; 3001 3002static const struct blit_table normal_blit_3[] = { 3003 // 3->4 with same rgb triplet 3004 { 0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000, 3005 0, Blit_3or4_to_3or4__same_rgb, 3006#if HAVE_FAST_WRITE_INT8 3007 NO_ALPHA | 3008#endif 3009 SET_ALPHA }, 3010 { 0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF, 3011 0, Blit_3or4_to_3or4__same_rgb, 3012#if HAVE_FAST_WRITE_INT8 3013 NO_ALPHA | 3014#endif 3015 SET_ALPHA }, 3016 // 3->4 with inversed rgb triplet 3017 { 0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF, 3018 0, Blit_3or4_to_3or4__inversed_rgb, 3019#if HAVE_FAST_WRITE_INT8 3020 NO_ALPHA | 3021#endif 3022 SET_ALPHA }, 3023 { 0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000, 3024 0, Blit_3or4_to_3or4__inversed_rgb, 3025#if HAVE_FAST_WRITE_INT8 3026 NO_ALPHA | 3027#endif 3028 SET_ALPHA }, 3029 // 3->3 to switch RGB 24 <-> BGR 24 3030 { 0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF, 3031 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA }, 3032 { 0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000, 3033 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA }, 3034 // Default for 24-bit RGB source, never optimized 3035 { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 } 3036}; 3037 3038static const struct blit_table normal_blit_4[] = { 3039#ifdef SDL_ALTIVEC_BLITTERS 3040 // has-altivec | dont-use-prefetch 3041 { 0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000, 3042 BLIT_FEATURE_HAS_ALTIVEC | BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 3043 // has-altivec 3044 { 0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000, 3045 BLIT_FEATURE_HAS_ALTIVEC, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, 3046 // has-altivec 3047 { 0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F, 3048 BLIT_FEATURE_HAS_ALTIVEC, Blit_XRGB8888_RGB565Altivec, NO_ALPHA }, 3049#endif 3050 // 4->3 with same rgb triplet 3051 { 0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000, 3052 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA }, 3053 { 0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF, 3054 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA }, 3055 // 4->3 with inversed rgb triplet 3056 { 0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF, 3057 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA }, 3058 { 0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000, 3059 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA }, 3060 // 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888 3061 { 0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF, 3062 0, Blit_3or4_to_3or4__inversed_rgb, 3063#if HAVE_FAST_WRITE_INT8 3064 NO_ALPHA | 3065#endif 3066 SET_ALPHA | COPY_ALPHA }, 3067 { 0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000, 3068 0, Blit_3or4_to_3or4__inversed_rgb, 3069#if HAVE_FAST_WRITE_INT8 3070 NO_ALPHA | 3071#endif 3072 SET_ALPHA | COPY_ALPHA }, 3073 // RGB 888 and RGB 565 3074 { 0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F, 3075 0, Blit_XRGB8888_RGB565, NO_ALPHA }, 3076 { 0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F, 3077 0, Blit_XRGB8888_RGB555, NO_ALPHA }, 3078 // Default for 32-bit RGB source, used if no other blitter matches 3079 { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 } 3080}; 3081 3082static const struct blit_table *const normal_blit[] = { 3083 normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4 3084}; 3085 3086// Mask matches table, or table entry is zero 3087#define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000)) 3088 3089SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface *surface) 3090{ 3091 const SDL_PixelFormatDetails *srcfmt; 3092 const SDL_PixelFormatDetails *dstfmt; 3093 const struct blit_table *table; 3094 int which; 3095 SDL_BlitFunc blitfun; 3096 3097 // Set up data for choosing the blit 3098 srcfmt = surface->fmt; 3099 dstfmt = surface->map.info.dst_fmt; 3100 3101 // We don't support destinations less than 8-bits 3102 if (dstfmt->bits_per_pixel < 8) { 3103 return NULL; 3104 } 3105 3106 switch (surface->map.info.flags & ~SDL_COPY_RLE_MASK) { 3107 case 0: 3108 if (SDL_PIXELLAYOUT(srcfmt->format) == SDL_PACKEDLAYOUT_8888 && 3109 SDL_PIXELLAYOUT(dstfmt->format) == SDL_PACKEDLAYOUT_8888) { 3110#ifdef SDL_AVX2_INTRINSICS 3111 if (SDL_HasAVX2()) { 3112 return Blit8888to8888PixelSwizzleAVX2; 3113 } 3114#endif 3115#ifdef SDL_SSE4_1_INTRINSICS 3116 if (SDL_HasSSE41()) { 3117 return Blit8888to8888PixelSwizzleSSE41; 3118 } 3119#endif 3120#if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64)) 3121 return Blit8888to8888PixelSwizzleNEON; 3122#endif 3123 } 3124 3125 blitfun = NULL; 3126 if (dstfmt->bits_per_pixel > 8) { 3127 Uint32 a_need = NO_ALPHA; 3128 if (dstfmt->Amask) { 3129 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA; 3130 } 3131 if (srcfmt->bytes_per_pixel > 0 && 3132 srcfmt->bytes_per_pixel <= SDL_arraysize(normal_blit)) { 3133 table = normal_blit[srcfmt->bytes_per_pixel - 1]; 3134 for (which = 0; table[which].dstbpp; ++which) { 3135 if (MASKOK(srcfmt->Rmask, table[which].srcR) && 3136 MASKOK(srcfmt->Gmask, table[which].srcG) && 3137 MASKOK(srcfmt->Bmask, table[which].srcB) && 3138 MASKOK(dstfmt->Rmask, table[which].dstR) && 3139 MASKOK(dstfmt->Gmask, table[which].dstG) && 3140 MASKOK(dstfmt->Bmask, table[which].dstB) && 3141 dstfmt->bytes_per_pixel == table[which].dstbpp && 3142 (a_need & table[which].alpha) == a_need && 3143 ((table[which].blit_features & GetBlitFeatures()) == 3144 table[which].blit_features)) { 3145 break; 3146 } 3147 } 3148 blitfun = table[which].blitfunc; 3149 } 3150 3151 if (blitfun == BlitNtoN) { // default C fallback catch-all. Slow! 3152 if (srcfmt->bytes_per_pixel == dstfmt->bytes_per_pixel && 3153 srcfmt->Rmask == dstfmt->Rmask && 3154 srcfmt->Gmask == dstfmt->Gmask && 3155 srcfmt->Bmask == dstfmt->Bmask) { 3156 if (a_need == COPY_ALPHA) { 3157 if (srcfmt->Amask == dstfmt->Amask) { 3158 // Fastpath C fallback: RGBA<->RGBA blit with matching RGBA 3159 blitfun = SDL_BlitCopy; 3160 } else { 3161 blitfun = BlitNtoNCopyAlpha; 3162 } 3163 } else { 3164 if (srcfmt->bytes_per_pixel == 4) { 3165 // Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB 3166 blitfun = Blit4to4MaskAlpha; 3167 } else if (srcfmt->bytes_per_pixel == 2) { 3168 // Fastpath C fallback: 16bit RGB<->RGBA blit with matching RGB 3169 blitfun = Blit2to2MaskAlpha; 3170 } 3171 } 3172 } else if (a_need == COPY_ALPHA) { 3173 blitfun = BlitNtoNCopyAlpha; 3174 } 3175 } 3176 } 3177 return blitfun; 3178 3179 case SDL_COPY_COLORKEY: 3180 /* colorkey blit: Here we don't have too many options, mostly 3181 because RLE is the preferred fast way to deal with this. 3182 If a particular case turns out to be useful we'll add it. */ 3183 3184 if (srcfmt->bytes_per_pixel == 2 && surface->map.identity != 0) { 3185 return Blit2to2Key; 3186 } else { 3187#ifdef SDL_ALTIVEC_BLITTERS 3188 if ((srcfmt->bytes_per_pixel == 4) && (dstfmt->bytes_per_pixel == 4) && SDL_HasAltiVec()) { 3189 return Blit32to32KeyAltivec; 3190 } else 3191#endif 3192 if (srcfmt->Amask && dstfmt->Amask) { 3193 return BlitNtoNKeyCopyAlpha; 3194 } else { 3195 return BlitNtoNKey; 3196 } 3197 } 3198 } 3199 3200 return NULL; 3201} 3202 3203#endif // SDL_HAVE_BLIT_N 3204
[FILE END]
(C) 2025 0x4248 (C) 2025 4248 Media and 4248 Systems, All part of 0x4248 See LICENCE files for more information. Not all files are by 0x4248 always check Licencing.