Atlas - yuv_rgb_sse.c
Home / ext / SDL / src / video / yuv2rgb Lines: 1 | Size: 16257 bytes [Download] [Show on GitHub] [Search similar files] [Raw] [Raw (proxy)][FILE BEGIN]1// Copyright 2016 Adrien Descamps 2// Distributed under BSD 3-Clause License 3#include "SDL_internal.h" 4 5#ifdef SDL_HAVE_YUV 6#include "yuv_rgb_internal.h" 7 8#ifdef SDL_SSE2_INTRINSICS 9 10/* SDL doesn't use these atm and compiling them adds seconds onto the build. --ryan. 11#define SSE_FUNCTION_NAME yuv420_rgb565_sse 12#define STD_FUNCTION_NAME yuv420_rgb565_std 13#define YUV_FORMAT YUV_FORMAT_420 14#define RGB_FORMAT RGB_FORMAT_RGB565 15#define SSE_ALIGNED 16#include "yuv_rgb_sse_func.h" 17 18#define SSE_FUNCTION_NAME yuv420_rgb24_sse 19#define STD_FUNCTION_NAME yuv420_rgb24_std 20#define YUV_FORMAT YUV_FORMAT_420 21#define RGB_FORMAT RGB_FORMAT_RGB24 22#define SSE_ALIGNED 23#include "yuv_rgb_sse_func.h" 24 25#define SSE_FUNCTION_NAME yuv420_rgba_sse 26#define STD_FUNCTION_NAME yuv420_rgba_std 27#define YUV_FORMAT YUV_FORMAT_420 28#define RGB_FORMAT RGB_FORMAT_RGBA 29#define SSE_ALIGNED 30#include "yuv_rgb_sse_func.h" 31 32#define SSE_FUNCTION_NAME yuv420_bgra_sse 33#define STD_FUNCTION_NAME yuv420_bgra_std 34#define YUV_FORMAT YUV_FORMAT_420 35#define RGB_FORMAT RGB_FORMAT_BGRA 36#define SSE_ALIGNED 37#include "yuv_rgb_sse_func.h" 38 39#define SSE_FUNCTION_NAME yuv420_argb_sse 40#define STD_FUNCTION_NAME yuv420_argb_std 41#define YUV_FORMAT YUV_FORMAT_420 42#define RGB_FORMAT RGB_FORMAT_ARGB 43#define SSE_ALIGNED 44#include "yuv_rgb_sse_func.h" 45 46#define SSE_FUNCTION_NAME yuv420_abgr_sse 47#define STD_FUNCTION_NAME yuv420_abgr_std 48#define YUV_FORMAT YUV_FORMAT_420 49#define RGB_FORMAT RGB_FORMAT_ABGR 50#define SSE_ALIGNED 51#include "yuv_rgb_sse_func.h" 52 53#define SSE_FUNCTION_NAME yuv422_rgb565_sse 54#define STD_FUNCTION_NAME yuv422_rgb565_std 55#define YUV_FORMAT YUV_FORMAT_422 56#define RGB_FORMAT RGB_FORMAT_RGB565 57#define SSE_ALIGNED 58#include "yuv_rgb_sse_func.h" 59 60#define SSE_FUNCTION_NAME yuv422_rgb24_sse 61#define STD_FUNCTION_NAME yuv422_rgb24_std 62#define YUV_FORMAT YUV_FORMAT_422 63#define RGB_FORMAT RGB_FORMAT_RGB24 64#define SSE_ALIGNED 65#include "yuv_rgb_sse_func.h" 66 67#define SSE_FUNCTION_NAME yuv422_rgba_sse 68#define STD_FUNCTION_NAME yuv422_rgba_std 69#define YUV_FORMAT YUV_FORMAT_422 70#define RGB_FORMAT RGB_FORMAT_RGBA 71#define SSE_ALIGNED 72#include "yuv_rgb_sse_func.h" 73 74#define SSE_FUNCTION_NAME yuv422_bgra_sse 75#define STD_FUNCTION_NAME yuv422_bgra_std 76#define YUV_FORMAT YUV_FORMAT_422 77#define RGB_FORMAT RGB_FORMAT_BGRA 78#define SSE_ALIGNED 79#include "yuv_rgb_sse_func.h" 80 81#define SSE_FUNCTION_NAME yuv422_argb_sse 82#define STD_FUNCTION_NAME yuv422_argb_std 83#define YUV_FORMAT YUV_FORMAT_422 84#define RGB_FORMAT RGB_FORMAT_ARGB 85#define SSE_ALIGNED 86#include "yuv_rgb_sse_func.h" 87 88#define SSE_FUNCTION_NAME yuv422_abgr_sse 89#define STD_FUNCTION_NAME yuv422_abgr_std 90#define YUV_FORMAT YUV_FORMAT_422 91#define RGB_FORMAT RGB_FORMAT_ABGR 92#define SSE_ALIGNED 93#include "yuv_rgb_sse_func.h" 94 95#define SSE_FUNCTION_NAME yuvnv12_rgb565_sse 96#define STD_FUNCTION_NAME yuvnv12_rgb565_std 97#define YUV_FORMAT YUV_FORMAT_NV12 98#define RGB_FORMAT RGB_FORMAT_RGB565 99#define SSE_ALIGNED 100#include "yuv_rgb_sse_func.h" 101 102#define SSE_FUNCTION_NAME yuvnv12_rgb24_sse 103#define STD_FUNCTION_NAME yuvnv12_rgb24_std 104#define YUV_FORMAT YUV_FORMAT_NV12 105#define RGB_FORMAT RGB_FORMAT_RGB24 106#define SSE_ALIGNED 107#include "yuv_rgb_sse_func.h" 108 109#define SSE_FUNCTION_NAME yuvnv12_rgba_sse 110#define STD_FUNCTION_NAME yuvnv12_rgba_std 111#define YUV_FORMAT YUV_FORMAT_NV12 112#define RGB_FORMAT RGB_FORMAT_RGBA 113#define SSE_ALIGNED 114#include "yuv_rgb_sse_func.h" 115 116#define SSE_FUNCTION_NAME yuvnv12_bgra_sse 117#define STD_FUNCTION_NAME yuvnv12_bgra_std 118#define YUV_FORMAT YUV_FORMAT_NV12 119#define RGB_FORMAT RGB_FORMAT_BGRA 120#define SSE_ALIGNED 121#include "yuv_rgb_sse_func.h" 122 123#define SSE_FUNCTION_NAME yuvnv12_argb_sse 124#define STD_FUNCTION_NAME yuvnv12_argb_std 125#define YUV_FORMAT YUV_FORMAT_NV12 126#define RGB_FORMAT RGB_FORMAT_ARGB 127#define SSE_ALIGNED 128#include "yuv_rgb_sse_func.h" 129 130#define SSE_FUNCTION_NAME yuvnv12_abgr_sse 131#define STD_FUNCTION_NAME yuvnv12_abgr_std 132#define YUV_FORMAT YUV_FORMAT_NV12 133#define RGB_FORMAT RGB_FORMAT_ABGR 134#define SSE_ALIGNED 135#include "yuv_rgb_sse_func.h" 136*/ 137 138#define SSE_FUNCTION_NAME yuv420_rgb565_sseu 139#define STD_FUNCTION_NAME yuv420_rgb565_std 140#define YUV_FORMAT YUV_FORMAT_420 141#define RGB_FORMAT RGB_FORMAT_RGB565 142#include "yuv_rgb_sse_func.h" 143 144#define SSE_FUNCTION_NAME yuv420_rgb24_sseu 145#define STD_FUNCTION_NAME yuv420_rgb24_std 146#define YUV_FORMAT YUV_FORMAT_420 147#define RGB_FORMAT RGB_FORMAT_RGB24 148#include "yuv_rgb_sse_func.h" 149 150#define SSE_FUNCTION_NAME yuv420_rgba_sseu 151#define STD_FUNCTION_NAME yuv420_rgba_std 152#define YUV_FORMAT YUV_FORMAT_420 153#define RGB_FORMAT RGB_FORMAT_RGBA 154#include "yuv_rgb_sse_func.h" 155 156#define SSE_FUNCTION_NAME yuv420_bgra_sseu 157#define STD_FUNCTION_NAME yuv420_bgra_std 158#define YUV_FORMAT YUV_FORMAT_420 159#define RGB_FORMAT RGB_FORMAT_BGRA 160#include "yuv_rgb_sse_func.h" 161 162#define SSE_FUNCTION_NAME yuv420_argb_sseu 163#define STD_FUNCTION_NAME yuv420_argb_std 164#define YUV_FORMAT YUV_FORMAT_420 165#define RGB_FORMAT RGB_FORMAT_ARGB 166#include "yuv_rgb_sse_func.h" 167 168#define SSE_FUNCTION_NAME yuv420_abgr_sseu 169#define STD_FUNCTION_NAME yuv420_abgr_std 170#define YUV_FORMAT YUV_FORMAT_420 171#define RGB_FORMAT RGB_FORMAT_ABGR 172#include "yuv_rgb_sse_func.h" 173 174#define SSE_FUNCTION_NAME yuv422_rgb565_sseu 175#define STD_FUNCTION_NAME yuv422_rgb565_std 176#define YUV_FORMAT YUV_FORMAT_422 177#define RGB_FORMAT RGB_FORMAT_RGB565 178#include "yuv_rgb_sse_func.h" 179 180#define SSE_FUNCTION_NAME yuv422_rgb24_sseu 181#define STD_FUNCTION_NAME yuv422_rgb24_std 182#define YUV_FORMAT YUV_FORMAT_422 183#define RGB_FORMAT RGB_FORMAT_RGB24 184#include "yuv_rgb_sse_func.h" 185 186#define SSE_FUNCTION_NAME yuv422_rgba_sseu 187#define STD_FUNCTION_NAME yuv422_rgba_std 188#define YUV_FORMAT YUV_FORMAT_422 189#define RGB_FORMAT RGB_FORMAT_RGBA 190#include "yuv_rgb_sse_func.h" 191 192#define SSE_FUNCTION_NAME yuv422_bgra_sseu 193#define STD_FUNCTION_NAME yuv422_bgra_std 194#define YUV_FORMAT YUV_FORMAT_422 195#define RGB_FORMAT RGB_FORMAT_BGRA 196#include "yuv_rgb_sse_func.h" 197 198#define SSE_FUNCTION_NAME yuv422_argb_sseu 199#define STD_FUNCTION_NAME yuv422_argb_std 200#define YUV_FORMAT YUV_FORMAT_422 201#define RGB_FORMAT RGB_FORMAT_ARGB 202#include "yuv_rgb_sse_func.h" 203 204#define SSE_FUNCTION_NAME yuv422_abgr_sseu 205#define STD_FUNCTION_NAME yuv422_abgr_std 206#define YUV_FORMAT YUV_FORMAT_422 207#define RGB_FORMAT RGB_FORMAT_ABGR 208#include "yuv_rgb_sse_func.h" 209 210#define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu 211#define STD_FUNCTION_NAME yuvnv12_rgb565_std 212#define YUV_FORMAT YUV_FORMAT_NV12 213#define RGB_FORMAT RGB_FORMAT_RGB565 214#include "yuv_rgb_sse_func.h" 215 216#define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu 217#define STD_FUNCTION_NAME yuvnv12_rgb24_std 218#define YUV_FORMAT YUV_FORMAT_NV12 219#define RGB_FORMAT RGB_FORMAT_RGB24 220#include "yuv_rgb_sse_func.h" 221 222#define SSE_FUNCTION_NAME yuvnv12_rgba_sseu 223#define STD_FUNCTION_NAME yuvnv12_rgba_std 224#define YUV_FORMAT YUV_FORMAT_NV12 225#define RGB_FORMAT RGB_FORMAT_RGBA 226#include "yuv_rgb_sse_func.h" 227 228#define SSE_FUNCTION_NAME yuvnv12_bgra_sseu 229#define STD_FUNCTION_NAME yuvnv12_bgra_std 230#define YUV_FORMAT YUV_FORMAT_NV12 231#define RGB_FORMAT RGB_FORMAT_BGRA 232#include "yuv_rgb_sse_func.h" 233 234#define SSE_FUNCTION_NAME yuvnv12_argb_sseu 235#define STD_FUNCTION_NAME yuvnv12_argb_std 236#define YUV_FORMAT YUV_FORMAT_NV12 237#define RGB_FORMAT RGB_FORMAT_ARGB 238#include "yuv_rgb_sse_func.h" 239 240#define SSE_FUNCTION_NAME yuvnv12_abgr_sseu 241#define STD_FUNCTION_NAME yuvnv12_abgr_std 242#define YUV_FORMAT YUV_FORMAT_NV12 243#define RGB_FORMAT RGB_FORMAT_ABGR 244#include "yuv_rgb_sse_func.h" 245 246 247/* SDL doesn't use these atm and compiling them adds seconds onto the build. --ryan. 248#define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 249R1 = _mm_unpacklo_epi8(RGB1, RGB4); \ 250R2 = _mm_unpackhi_epi8(RGB1, RGB4); \ 251G1 = _mm_unpacklo_epi8(RGB2, RGB5); \ 252G2 = _mm_unpackhi_epi8(RGB2, RGB5); \ 253B1 = _mm_unpacklo_epi8(RGB3, RGB6); \ 254B2 = _mm_unpackhi_epi8(RGB3, RGB6); 255 256#define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 257RGB1 = _mm_unpacklo_epi8(R1, G2); \ 258RGB2 = _mm_unpackhi_epi8(R1, G2); \ 259RGB3 = _mm_unpacklo_epi8(R2, B1); \ 260RGB4 = _mm_unpackhi_epi8(R2, B1); \ 261RGB5 = _mm_unpacklo_epi8(G1, B2); \ 262RGB6 = _mm_unpackhi_epi8(G1, B2); \ 263 264#define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 265UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 266UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 267UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 268UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 269UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 270 271#define RGB2YUV_16(R, G, B, Y, U, V) \ 272Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \ 273 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \ 274Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \ 275Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \ 276Y = _mm_srai_epi16(Y, PRECISION); \ 277U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \ 278 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \ 279U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \ 280U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \ 281U = _mm_srai_epi16(U, PRECISION); \ 282V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \ 283 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \ 284V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \ 285V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \ 286V = _mm_srai_epi16(V, PRECISION); 287*/ 288 289#if 0 // SDL doesn't use these atm and compiling them adds seconds onto the build. --ryan. 290#define RGB2YUV_32 \ 291 __m128i r1, r2, b1, b2, g1, g2; \ 292 __m128i r_16, g_16, b_16; \ 293 __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \ 294 __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \ 295 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \ 296 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \ 297 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \ 298 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \ 299 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \ 300 /* unpack rgb24 data to r, g and b data in separate channels*/ \ 301 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 302 /* process pixels of first line */ \ 303 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 304 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 305 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 306 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 307 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 308 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 309 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 310 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 311 y = _mm_packus_epi16(y1_16, y2_16); \ 312 u1 = _mm_packus_epi16(u1_16, u2_16); \ 313 v1 = _mm_packus_epi16(v1_16, v2_16); \ 314 /* save Y values */ \ 315 SAVE_SI128((__m128i*)(y_ptr1), y); \ 316 /* process pixels of second line */ \ 317 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 318 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 319 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 320 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 321 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 322 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 323 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 324 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 325 y = _mm_packus_epi16(y1_16, y2_16); \ 326 u2 = _mm_packus_epi16(u1_16, u2_16); \ 327 v2 = _mm_packus_epi16(v1_16, v2_16); \ 328 /* save Y values */ \ 329 SAVE_SI128((__m128i*)(y_ptr2), y); \ 330 /* vertical subsampling of u/v values */ \ 331 u1_tmp = _mm_avg_epu8(u1, u2); \ 332 v1_tmp = _mm_avg_epu8(v1, v2); \ 333 /* do the same again with next data */ \ 334 rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \ 335 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \ 336 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \ 337 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \ 338 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \ 339 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \ 340 /* unpack rgb24 data to r, g and b data in separate channels*/ \ 341 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 342 /* process pixels of first line */ \ 343 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 344 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 345 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 346 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 347 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 348 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 349 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 350 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 351 y = _mm_packus_epi16(y1_16, y2_16); \ 352 u1 = _mm_packus_epi16(u1_16, u2_16); \ 353 v1 = _mm_packus_epi16(v1_16, v2_16); \ 354 /* save Y values */ \ 355 SAVE_SI128((__m128i*)(y_ptr1+16), y); \ 356 /* process pixels of second line */ \ 357 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 358 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 359 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 360 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 361 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 362 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 363 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 364 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 365 y = _mm_packus_epi16(y1_16, y2_16); \ 366 u2 = _mm_packus_epi16(u1_16, u2_16); \ 367 v2 = _mm_packus_epi16(v1_16, v2_16); \ 368 /* save Y values */ \ 369 SAVE_SI128((__m128i*)(y_ptr2+16), y); \ 370 /* vertical subsampling of u/v values */ \ 371 u2_tmp = _mm_avg_epu8(u1, u2); \ 372 v2_tmp = _mm_avg_epu8(v1, v2); \ 373 /* horizontal subsampling of u/v values */ \ 374 u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \ 375 v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \ 376 u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \ 377 v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \ 378 u1 = _mm_avg_epu8(u1, u2); \ 379 v1 = _mm_avg_epu8(v1, v2); \ 380 SAVE_SI128((__m128i*)(u_ptr), u1); \ 381 SAVE_SI128((__m128i*)(v_ptr), v1); 382#endif 383 384/* SDL doesn't use these atm and compiling them adds seconds onto the build. --ryan. 385void SDL_TARGETING("sse2") rgb24_yuv420_sse(uint32_t width, uint32_t height, 386 const uint8_t *RGB, uint32_t RGB_stride, 387 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 388 YCbCrType yuv_type) 389{ 390 #define LOAD_SI128 _mm_load_si128 391 #define SAVE_SI128 _mm_stream_si128 392 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); 393 394 uint32_t xpos, ypos; 395 for(ypos=0; ypos<(height-1); ypos+=2) 396 { 397 const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride, 398 *rgb_ptr2=RGB+(ypos+1)*RGB_stride; 399 400 uint8_t *y_ptr1=Y+ypos*Y_stride, 401 *y_ptr2=Y+(ypos+1)*Y_stride, 402 *u_ptr=U+(ypos/2)*UV_stride, 403 *v_ptr=V+(ypos/2)*UV_stride; 404 405 for(xpos=0; xpos<(width-31); xpos+=32) 406 { 407 RGB2YUV_32 408 409 rgb_ptr1+=96; 410 rgb_ptr2+=96; 411 y_ptr1+=32; 412 y_ptr2+=32; 413 u_ptr+=16; 414 v_ptr+=16; 415 } 416 } 417 #undef LOAD_SI128 418 #undef SAVE_SI128 419} 420 421void SDL_TARGETING("sse2") rgb24_yuv420_sseu(uint32_t width, uint32_t height, 422 const uint8_t *RGB, uint32_t RGB_stride, 423 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 424 YCbCrType yuv_type) 425{ 426 #define LOAD_SI128 _mm_loadu_si128 427 #define SAVE_SI128 _mm_storeu_si128 428 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); 429 430 uint32_t xpos, ypos; 431 for(ypos=0; ypos<(height-1); ypos+=2) 432 { 433 const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride, 434 *rgb_ptr2=RGB+(ypos+1)*RGB_stride; 435 436 uint8_t *y_ptr1=Y+ypos*Y_stride, 437 *y_ptr2=Y+(ypos+1)*Y_stride, 438 *u_ptr=U+(ypos/2)*UV_stride, 439 *v_ptr=V+(ypos/2)*UV_stride; 440 441 for(xpos=0; xpos<(width-31); xpos+=32) 442 { 443 RGB2YUV_32 444 445 rgb_ptr1+=96; 446 rgb_ptr2+=96; 447 y_ptr1+=32; 448 y_ptr2+=32; 449 u_ptr+=16; 450 v_ptr+=16; 451 } 452 } 453 #undef LOAD_SI128 454 #undef SAVE_SI128 455} 456*/ 457 458#endif // SDL_SSE2_INTRINSICS 459 460#endif // SDL_HAVE_YUV 461[FILE END](C) 2025 0x4248 (C) 2025 4248 Media and 4248 Systems, All part of 0x4248 See LICENCE files for more information. Not all files are by 0x4248 always check Licencing.