Atlas - yuv_rgb.c
Home / ext / SDL2 / src / video / yuv2rgb Lines: 1 | Size: 25629 bytes [Download] [Show on GitHub] [Search similar files] [Raw] [Raw (proxy)][FILE BEGIN]1// Copyright 2016 Adrien Descamps 2// Distributed under BSD 3-Clause License 3#include "../../SDL_internal.h" 4 5#include "yuv_rgb.h" 6 7#include "SDL_cpuinfo.h" 8/*#include <x86intrin.h>*/ 9 10#define PRECISION 6 11#define PRECISION_FACTOR (1<<PRECISION) 12 13typedef struct 14{ 15 uint8_t y_shift; 16 int16_t matrix[3][3]; 17} RGB2YUVParam; 18// |Y| |y_shift| |matrix[0][0] matrix[0][1] matrix[0][2]| |R| 19// |U| = | 128 | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G| 20// |V| | 128 | |matrix[2][0] matrix[2][1] matrix[2][2]| |B| 21 22typedef struct 23{ 24 uint8_t y_shift; 25 int16_t y_factor; 26 int16_t v_r_factor; 27 int16_t u_g_factor; 28 int16_t v_g_factor; 29 int16_t u_b_factor; 30} YUV2RGBParam; 31// |R| |y_factor 0 v_r_factor| |Y-y_shift| 32// |G| = 1/PRECISION_FACTOR * |y_factor u_g_factor v_g_factor| * | U-128 | 33// |B| |y_factor u_b_factor 0 | | V-128 | 34 35#define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5) 36 37// for ITU-T T.871, values can be found in section 7 38// for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255]) 39// for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255]) 40// all values are rounded to the fourth decimal 41 42static const YUV2RGBParam YUV2RGB[3] = { 43 // ITU-T T.871 (JPEG) 44 {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)}, 45 // ITU-R BT.601-7 46 {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)}, 47 // ITU-R BT.709-6 48 {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)} 49}; 50 51static const RGB2YUVParam RGB2YUV[3] = { 52 // ITU-T T.871 (JPEG) 53 {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}}, 54 // ITU-R BT.601-7 55 {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}}, 56 // ITU-R BT.709-6 57 {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}} 58}; 59 60/* The various layouts of YUV data we support */ 61#define YUV_FORMAT_420 1 62#define YUV_FORMAT_422 2 63#define YUV_FORMAT_NV12 3 64 65/* The various formats of RGB pixel that we support */ 66#define RGB_FORMAT_RGB565 1 67#define RGB_FORMAT_RGB24 2 68#define RGB_FORMAT_RGBA 3 69#define RGB_FORMAT_BGRA 4 70#define RGB_FORMAT_ARGB 5 71#define RGB_FORMAT_ABGR 6 72 73// divide by PRECISION_FACTOR and clamp to [0:255] interval 74// input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range 75static uint8_t clampU8(int32_t v) 76{ 77 static const uint8_t lut[512] = 78 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 79 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 80 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46, 81 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 82 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, 83 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158, 84 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, 85 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224, 86 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, 87 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 88 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 89 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 90 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 91 }; 92 return lut[(v+128*PRECISION_FACTOR)>>PRECISION]; 93} 94 95 96#define STD_FUNCTION_NAME yuv420_rgb565_std 97#define YUV_FORMAT YUV_FORMAT_420 98#define RGB_FORMAT RGB_FORMAT_RGB565 99#include "yuv_rgb_std_func.h" 100 101#define STD_FUNCTION_NAME yuv420_rgb24_std 102#define YUV_FORMAT YUV_FORMAT_420 103#define RGB_FORMAT RGB_FORMAT_RGB24 104#include "yuv_rgb_std_func.h" 105 106#define STD_FUNCTION_NAME yuv420_rgba_std 107#define YUV_FORMAT YUV_FORMAT_420 108#define RGB_FORMAT RGB_FORMAT_RGBA 109#include "yuv_rgb_std_func.h" 110 111#define STD_FUNCTION_NAME yuv420_bgra_std 112#define YUV_FORMAT YUV_FORMAT_420 113#define RGB_FORMAT RGB_FORMAT_BGRA 114#include "yuv_rgb_std_func.h" 115 116#define STD_FUNCTION_NAME yuv420_argb_std 117#define YUV_FORMAT YUV_FORMAT_420 118#define RGB_FORMAT RGB_FORMAT_ARGB 119#include "yuv_rgb_std_func.h" 120 121#define STD_FUNCTION_NAME yuv420_abgr_std 122#define YUV_FORMAT YUV_FORMAT_420 123#define RGB_FORMAT RGB_FORMAT_ABGR 124#include "yuv_rgb_std_func.h" 125 126#define STD_FUNCTION_NAME yuv422_rgb565_std 127#define YUV_FORMAT YUV_FORMAT_422 128#define RGB_FORMAT RGB_FORMAT_RGB565 129#include "yuv_rgb_std_func.h" 130 131#define STD_FUNCTION_NAME yuv422_rgb24_std 132#define YUV_FORMAT YUV_FORMAT_422 133#define RGB_FORMAT RGB_FORMAT_RGB24 134#include "yuv_rgb_std_func.h" 135 136#define STD_FUNCTION_NAME yuv422_rgba_std 137#define YUV_FORMAT YUV_FORMAT_422 138#define RGB_FORMAT RGB_FORMAT_RGBA 139#include "yuv_rgb_std_func.h" 140 141#define STD_FUNCTION_NAME yuv422_bgra_std 142#define YUV_FORMAT YUV_FORMAT_422 143#define RGB_FORMAT RGB_FORMAT_BGRA 144#include "yuv_rgb_std_func.h" 145 146#define STD_FUNCTION_NAME yuv422_argb_std 147#define YUV_FORMAT YUV_FORMAT_422 148#define RGB_FORMAT RGB_FORMAT_ARGB 149#include "yuv_rgb_std_func.h" 150 151#define STD_FUNCTION_NAME yuv422_abgr_std 152#define YUV_FORMAT YUV_FORMAT_422 153#define RGB_FORMAT RGB_FORMAT_ABGR 154#include "yuv_rgb_std_func.h" 155 156#define STD_FUNCTION_NAME yuvnv12_rgb565_std 157#define YUV_FORMAT YUV_FORMAT_NV12 158#define RGB_FORMAT RGB_FORMAT_RGB565 159#include "yuv_rgb_std_func.h" 160 161#define STD_FUNCTION_NAME yuvnv12_rgb24_std 162#define YUV_FORMAT YUV_FORMAT_NV12 163#define RGB_FORMAT RGB_FORMAT_RGB24 164#include "yuv_rgb_std_func.h" 165 166#define STD_FUNCTION_NAME yuvnv12_rgba_std 167#define YUV_FORMAT YUV_FORMAT_NV12 168#define RGB_FORMAT RGB_FORMAT_RGBA 169#include "yuv_rgb_std_func.h" 170 171#define STD_FUNCTION_NAME yuvnv12_bgra_std 172#define YUV_FORMAT YUV_FORMAT_NV12 173#define RGB_FORMAT RGB_FORMAT_BGRA 174#include "yuv_rgb_std_func.h" 175 176#define STD_FUNCTION_NAME yuvnv12_argb_std 177#define YUV_FORMAT YUV_FORMAT_NV12 178#define RGB_FORMAT RGB_FORMAT_ARGB 179#include "yuv_rgb_std_func.h" 180 181#define STD_FUNCTION_NAME yuvnv12_abgr_std 182#define YUV_FORMAT YUV_FORMAT_NV12 183#define RGB_FORMAT RGB_FORMAT_ABGR 184#include "yuv_rgb_std_func.h" 185 186void rgb24_yuv420_std( 187 uint32_t width, uint32_t height, 188 const uint8_t *RGB, uint32_t RGB_stride, 189 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 190 YCbCrType yuv_type) 191{ 192 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); 193 194 uint32_t x, y; 195 for(y=0; y<(height-1); y+=2) 196 { 197 const uint8_t *rgb_ptr1=RGB+y*RGB_stride, 198 *rgb_ptr2=RGB+(y+1)*RGB_stride; 199 200 uint8_t *y_ptr1=Y+y*Y_stride, 201 *y_ptr2=Y+(y+1)*Y_stride, 202 *u_ptr=U+(y/2)*UV_stride, 203 *v_ptr=V+(y/2)*UV_stride; 204 205 for(x=0; x<(width-1); x+=2) 206 { 207 // compute yuv for the four pixels, u and v values are summed 208 int32_t y_tmp, u_tmp, v_tmp; 209 210 y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2]; 211 u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2]; 212 v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2]; 213 y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); 214 215 y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5]; 216 u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5]; 217 v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5]; 218 y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); 219 220 y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2]; 221 u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2]; 222 v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2]; 223 y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); 224 225 y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5]; 226 u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5]; 227 v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5]; 228 y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); 229 230 u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION)); 231 v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION)); 232 233 rgb_ptr1 += 6; 234 rgb_ptr2 += 6; 235 y_ptr1 += 2; 236 y_ptr2 += 2; 237 u_ptr += 1; 238 v_ptr += 1; 239 } 240 } 241} 242 243#ifdef __SSE2__ 244 245#define SSE_FUNCTION_NAME yuv420_rgb565_sse 246#define STD_FUNCTION_NAME yuv420_rgb565_std 247#define YUV_FORMAT YUV_FORMAT_420 248#define RGB_FORMAT RGB_FORMAT_RGB565 249#define SSE_ALIGNED 250#include "yuv_rgb_sse_func.h" 251 252#define SSE_FUNCTION_NAME yuv420_rgb565_sseu 253#define STD_FUNCTION_NAME yuv420_rgb565_std 254#define YUV_FORMAT YUV_FORMAT_420 255#define RGB_FORMAT RGB_FORMAT_RGB565 256#include "yuv_rgb_sse_func.h" 257 258#define SSE_FUNCTION_NAME yuv420_rgb24_sse 259#define STD_FUNCTION_NAME yuv420_rgb24_std 260#define YUV_FORMAT YUV_FORMAT_420 261#define RGB_FORMAT RGB_FORMAT_RGB24 262#define SSE_ALIGNED 263#include "yuv_rgb_sse_func.h" 264 265#define SSE_FUNCTION_NAME yuv420_rgb24_sseu 266#define STD_FUNCTION_NAME yuv420_rgb24_std 267#define YUV_FORMAT YUV_FORMAT_420 268#define RGB_FORMAT RGB_FORMAT_RGB24 269#include "yuv_rgb_sse_func.h" 270 271#define SSE_FUNCTION_NAME yuv420_rgba_sse 272#define STD_FUNCTION_NAME yuv420_rgba_std 273#define YUV_FORMAT YUV_FORMAT_420 274#define RGB_FORMAT RGB_FORMAT_RGBA 275#define SSE_ALIGNED 276#include "yuv_rgb_sse_func.h" 277 278#define SSE_FUNCTION_NAME yuv420_rgba_sseu 279#define STD_FUNCTION_NAME yuv420_rgba_std 280#define YUV_FORMAT YUV_FORMAT_420 281#define RGB_FORMAT RGB_FORMAT_RGBA 282#include "yuv_rgb_sse_func.h" 283 284#define SSE_FUNCTION_NAME yuv420_bgra_sse 285#define STD_FUNCTION_NAME yuv420_bgra_std 286#define YUV_FORMAT YUV_FORMAT_420 287#define RGB_FORMAT RGB_FORMAT_BGRA 288#define SSE_ALIGNED 289#include "yuv_rgb_sse_func.h" 290 291#define SSE_FUNCTION_NAME yuv420_bgra_sseu 292#define STD_FUNCTION_NAME yuv420_bgra_std 293#define YUV_FORMAT YUV_FORMAT_420 294#define RGB_FORMAT RGB_FORMAT_BGRA 295#include "yuv_rgb_sse_func.h" 296 297#define SSE_FUNCTION_NAME yuv420_argb_sse 298#define STD_FUNCTION_NAME yuv420_argb_std 299#define YUV_FORMAT YUV_FORMAT_420 300#define RGB_FORMAT RGB_FORMAT_ARGB 301#define SSE_ALIGNED 302#include "yuv_rgb_sse_func.h" 303 304#define SSE_FUNCTION_NAME yuv420_argb_sseu 305#define STD_FUNCTION_NAME yuv420_argb_std 306#define YUV_FORMAT YUV_FORMAT_420 307#define RGB_FORMAT RGB_FORMAT_ARGB 308#include "yuv_rgb_sse_func.h" 309 310#define SSE_FUNCTION_NAME yuv420_abgr_sse 311#define STD_FUNCTION_NAME yuv420_abgr_std 312#define YUV_FORMAT YUV_FORMAT_420 313#define RGB_FORMAT RGB_FORMAT_ABGR 314#define SSE_ALIGNED 315#include "yuv_rgb_sse_func.h" 316 317#define SSE_FUNCTION_NAME yuv420_abgr_sseu 318#define STD_FUNCTION_NAME yuv420_abgr_std 319#define YUV_FORMAT YUV_FORMAT_420 320#define RGB_FORMAT RGB_FORMAT_ABGR 321#include "yuv_rgb_sse_func.h" 322 323#define SSE_FUNCTION_NAME yuv422_rgb565_sse 324#define STD_FUNCTION_NAME yuv422_rgb565_std 325#define YUV_FORMAT YUV_FORMAT_422 326#define RGB_FORMAT RGB_FORMAT_RGB565 327#define SSE_ALIGNED 328#include "yuv_rgb_sse_func.h" 329 330#define SSE_FUNCTION_NAME yuv422_rgb565_sseu 331#define STD_FUNCTION_NAME yuv422_rgb565_std 332#define YUV_FORMAT YUV_FORMAT_422 333#define RGB_FORMAT RGB_FORMAT_RGB565 334#include "yuv_rgb_sse_func.h" 335 336#define SSE_FUNCTION_NAME yuv422_rgb24_sse 337#define STD_FUNCTION_NAME yuv422_rgb24_std 338#define YUV_FORMAT YUV_FORMAT_422 339#define RGB_FORMAT RGB_FORMAT_RGB24 340#define SSE_ALIGNED 341#include "yuv_rgb_sse_func.h" 342 343#define SSE_FUNCTION_NAME yuv422_rgb24_sseu 344#define STD_FUNCTION_NAME yuv422_rgb24_std 345#define YUV_FORMAT YUV_FORMAT_422 346#define RGB_FORMAT RGB_FORMAT_RGB24 347#include "yuv_rgb_sse_func.h" 348 349#define SSE_FUNCTION_NAME yuv422_rgba_sse 350#define STD_FUNCTION_NAME yuv422_rgba_std 351#define YUV_FORMAT YUV_FORMAT_422 352#define RGB_FORMAT RGB_FORMAT_RGBA 353#define SSE_ALIGNED 354#include "yuv_rgb_sse_func.h" 355 356#define SSE_FUNCTION_NAME yuv422_rgba_sseu 357#define STD_FUNCTION_NAME yuv422_rgba_std 358#define YUV_FORMAT YUV_FORMAT_422 359#define RGB_FORMAT RGB_FORMAT_RGBA 360#include "yuv_rgb_sse_func.h" 361 362#define SSE_FUNCTION_NAME yuv422_bgra_sse 363#define STD_FUNCTION_NAME yuv422_bgra_std 364#define YUV_FORMAT YUV_FORMAT_422 365#define RGB_FORMAT RGB_FORMAT_BGRA 366#define SSE_ALIGNED 367#include "yuv_rgb_sse_func.h" 368 369#define SSE_FUNCTION_NAME yuv422_bgra_sseu 370#define STD_FUNCTION_NAME yuv422_bgra_std 371#define YUV_FORMAT YUV_FORMAT_422 372#define RGB_FORMAT RGB_FORMAT_BGRA 373#include "yuv_rgb_sse_func.h" 374 375#define SSE_FUNCTION_NAME yuv422_argb_sse 376#define STD_FUNCTION_NAME yuv422_argb_std 377#define YUV_FORMAT YUV_FORMAT_422 378#define RGB_FORMAT RGB_FORMAT_ARGB 379#define SSE_ALIGNED 380#include "yuv_rgb_sse_func.h" 381 382#define SSE_FUNCTION_NAME yuv422_argb_sseu 383#define STD_FUNCTION_NAME yuv422_argb_std 384#define YUV_FORMAT YUV_FORMAT_422 385#define RGB_FORMAT RGB_FORMAT_ARGB 386#include "yuv_rgb_sse_func.h" 387 388#define SSE_FUNCTION_NAME yuv422_abgr_sse 389#define STD_FUNCTION_NAME yuv422_abgr_std 390#define YUV_FORMAT YUV_FORMAT_422 391#define RGB_FORMAT RGB_FORMAT_ABGR 392#define SSE_ALIGNED 393#include "yuv_rgb_sse_func.h" 394 395#define SSE_FUNCTION_NAME yuv422_abgr_sseu 396#define STD_FUNCTION_NAME yuv422_abgr_std 397#define YUV_FORMAT YUV_FORMAT_422 398#define RGB_FORMAT RGB_FORMAT_ABGR 399#include "yuv_rgb_sse_func.h" 400 401#define SSE_FUNCTION_NAME yuvnv12_rgb565_sse 402#define STD_FUNCTION_NAME yuvnv12_rgb565_std 403#define YUV_FORMAT YUV_FORMAT_NV12 404#define RGB_FORMAT RGB_FORMAT_RGB565 405#define SSE_ALIGNED 406#include "yuv_rgb_sse_func.h" 407 408#define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu 409#define STD_FUNCTION_NAME yuvnv12_rgb565_std 410#define YUV_FORMAT YUV_FORMAT_NV12 411#define RGB_FORMAT RGB_FORMAT_RGB565 412#include "yuv_rgb_sse_func.h" 413 414#define SSE_FUNCTION_NAME yuvnv12_rgb24_sse 415#define STD_FUNCTION_NAME yuvnv12_rgb24_std 416#define YUV_FORMAT YUV_FORMAT_NV12 417#define RGB_FORMAT RGB_FORMAT_RGB24 418#define SSE_ALIGNED 419#include "yuv_rgb_sse_func.h" 420 421#define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu 422#define STD_FUNCTION_NAME yuvnv12_rgb24_std 423#define YUV_FORMAT YUV_FORMAT_NV12 424#define RGB_FORMAT RGB_FORMAT_RGB24 425#include "yuv_rgb_sse_func.h" 426 427#define SSE_FUNCTION_NAME yuvnv12_rgba_sse 428#define STD_FUNCTION_NAME yuvnv12_rgba_std 429#define YUV_FORMAT YUV_FORMAT_NV12 430#define RGB_FORMAT RGB_FORMAT_RGBA 431#define SSE_ALIGNED 432#include "yuv_rgb_sse_func.h" 433 434#define SSE_FUNCTION_NAME yuvnv12_rgba_sseu 435#define STD_FUNCTION_NAME yuvnv12_rgba_std 436#define YUV_FORMAT YUV_FORMAT_NV12 437#define RGB_FORMAT RGB_FORMAT_RGBA 438#include "yuv_rgb_sse_func.h" 439 440#define SSE_FUNCTION_NAME yuvnv12_bgra_sse 441#define STD_FUNCTION_NAME yuvnv12_bgra_std 442#define YUV_FORMAT YUV_FORMAT_NV12 443#define RGB_FORMAT RGB_FORMAT_BGRA 444#define SSE_ALIGNED 445#include "yuv_rgb_sse_func.h" 446 447#define SSE_FUNCTION_NAME yuvnv12_bgra_sseu 448#define STD_FUNCTION_NAME yuvnv12_bgra_std 449#define YUV_FORMAT YUV_FORMAT_NV12 450#define RGB_FORMAT RGB_FORMAT_BGRA 451#include "yuv_rgb_sse_func.h" 452 453#define SSE_FUNCTION_NAME yuvnv12_argb_sse 454#define STD_FUNCTION_NAME yuvnv12_argb_std 455#define YUV_FORMAT YUV_FORMAT_NV12 456#define RGB_FORMAT RGB_FORMAT_ARGB 457#define SSE_ALIGNED 458#include "yuv_rgb_sse_func.h" 459 460#define SSE_FUNCTION_NAME yuvnv12_argb_sseu 461#define STD_FUNCTION_NAME yuvnv12_argb_std 462#define YUV_FORMAT YUV_FORMAT_NV12 463#define RGB_FORMAT RGB_FORMAT_ARGB 464#include "yuv_rgb_sse_func.h" 465 466#define SSE_FUNCTION_NAME yuvnv12_abgr_sse 467#define STD_FUNCTION_NAME yuvnv12_abgr_std 468#define YUV_FORMAT YUV_FORMAT_NV12 469#define RGB_FORMAT RGB_FORMAT_ABGR 470#define SSE_ALIGNED 471#include "yuv_rgb_sse_func.h" 472 473#define SSE_FUNCTION_NAME yuvnv12_abgr_sseu 474#define STD_FUNCTION_NAME yuvnv12_abgr_std 475#define YUV_FORMAT YUV_FORMAT_NV12 476#define RGB_FORMAT RGB_FORMAT_ABGR 477#include "yuv_rgb_sse_func.h" 478 479 480#define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 481R1 = _mm_unpacklo_epi8(RGB1, RGB4); \ 482R2 = _mm_unpackhi_epi8(RGB1, RGB4); \ 483G1 = _mm_unpacklo_epi8(RGB2, RGB5); \ 484G2 = _mm_unpackhi_epi8(RGB2, RGB5); \ 485B1 = _mm_unpacklo_epi8(RGB3, RGB6); \ 486B2 = _mm_unpackhi_epi8(RGB3, RGB6); 487 488#define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 489RGB1 = _mm_unpacklo_epi8(R1, G2); \ 490RGB2 = _mm_unpackhi_epi8(R1, G2); \ 491RGB3 = _mm_unpacklo_epi8(R2, B1); \ 492RGB4 = _mm_unpackhi_epi8(R2, B1); \ 493RGB5 = _mm_unpacklo_epi8(G1, B2); \ 494RGB6 = _mm_unpackhi_epi8(G1, B2); \ 495 496#define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 497UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 498UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 499UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 500UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 501UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 502 503#define RGB2YUV_16(R, G, B, Y, U, V) \ 504Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \ 505 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \ 506Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \ 507Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \ 508Y = _mm_srai_epi16(Y, PRECISION); \ 509U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \ 510 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \ 511U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \ 512U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \ 513U = _mm_srai_epi16(U, PRECISION); \ 514V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \ 515 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \ 516V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \ 517V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \ 518V = _mm_srai_epi16(V, PRECISION); 519 520#define RGB2YUV_32 \ 521 __m128i r1, r2, b1, b2, g1, g2; \ 522 __m128i r_16, g_16, b_16; \ 523 __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \ 524 __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \ 525 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \ 526 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \ 527 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \ 528 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \ 529 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \ 530 /* unpack rgb24 data to r, g and b data in separate channels*/ \ 531 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 532 /* process pixels of first line */ \ 533 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 534 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 535 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 536 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 537 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 538 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 539 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 540 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 541 y = _mm_packus_epi16(y1_16, y2_16); \ 542 u1 = _mm_packus_epi16(u1_16, u2_16); \ 543 v1 = _mm_packus_epi16(v1_16, v2_16); \ 544 /* save Y values */ \ 545 SAVE_SI128((__m128i*)(y_ptr1), y); \ 546 /* process pixels of second line */ \ 547 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 548 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 549 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 550 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 551 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 552 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 553 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 554 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 555 y = _mm_packus_epi16(y1_16, y2_16); \ 556 u2 = _mm_packus_epi16(u1_16, u2_16); \ 557 v2 = _mm_packus_epi16(v1_16, v2_16); \ 558 /* save Y values */ \ 559 SAVE_SI128((__m128i*)(y_ptr2), y); \ 560 /* vertical subsampling of u/v values */ \ 561 u1_tmp = _mm_avg_epu8(u1, u2); \ 562 v1_tmp = _mm_avg_epu8(v1, v2); \ 563 /* do the same again with next data */ \ 564 rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \ 565 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \ 566 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \ 567 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \ 568 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \ 569 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \ 570 /* unpack rgb24 data to r, g and b data in separate channels*/ \ 571 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 572 /* process pixels of first line */ \ 573 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 574 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 575 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 576 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 577 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 578 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 579 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 580 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 581 y = _mm_packus_epi16(y1_16, y2_16); \ 582 u1 = _mm_packus_epi16(u1_16, u2_16); \ 583 v1 = _mm_packus_epi16(v1_16, v2_16); \ 584 /* save Y values */ \ 585 SAVE_SI128((__m128i*)(y_ptr1+16), y); \ 586 /* process pixels of second line */ \ 587 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 588 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 589 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 590 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 591 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 592 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 593 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 594 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 595 y = _mm_packus_epi16(y1_16, y2_16); \ 596 u2 = _mm_packus_epi16(u1_16, u2_16); \ 597 v2 = _mm_packus_epi16(v1_16, v2_16); \ 598 /* save Y values */ \ 599 SAVE_SI128((__m128i*)(y_ptr2+16), y); \ 600 /* vertical subsampling of u/v values */ \ 601 u2_tmp = _mm_avg_epu8(u1, u2); \ 602 v2_tmp = _mm_avg_epu8(v1, v2); \ 603 /* horizontal subsampling of u/v values */ \ 604 u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \ 605 v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \ 606 u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \ 607 v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \ 608 u1 = _mm_avg_epu8(u1, u2); \ 609 v1 = _mm_avg_epu8(v1, v2); \ 610 SAVE_SI128((__m128i*)(u_ptr), u1); \ 611 SAVE_SI128((__m128i*)(v_ptr), v1); 612 613void rgb24_yuv420_sse(uint32_t width, uint32_t height, 614 const uint8_t *RGB, uint32_t RGB_stride, 615 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 616 YCbCrType yuv_type) 617{ 618 #define LOAD_SI128 _mm_load_si128 619 #define SAVE_SI128 _mm_stream_si128 620 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); 621 622 uint32_t xpos, ypos; 623 for(ypos=0; ypos<(height-1); ypos+=2) 624 { 625 const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride, 626 *rgb_ptr2=RGB+(ypos+1)*RGB_stride; 627 628 uint8_t *y_ptr1=Y+ypos*Y_stride, 629 *y_ptr2=Y+(ypos+1)*Y_stride, 630 *u_ptr=U+(ypos/2)*UV_stride, 631 *v_ptr=V+(ypos/2)*UV_stride; 632 633 for(xpos=0; xpos<(width-31); xpos+=32) 634 { 635 RGB2YUV_32 636 637 rgb_ptr1+=96; 638 rgb_ptr2+=96; 639 y_ptr1+=32; 640 y_ptr2+=32; 641 u_ptr+=16; 642 v_ptr+=16; 643 } 644 } 645 #undef LOAD_SI128 646 #undef SAVE_SI128 647} 648 649void rgb24_yuv420_sseu(uint32_t width, uint32_t height, 650 const uint8_t *RGB, uint32_t RGB_stride, 651 uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 652 YCbCrType yuv_type) 653{ 654 #define LOAD_SI128 _mm_loadu_si128 655 #define SAVE_SI128 _mm_storeu_si128 656 const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); 657 658 uint32_t xpos, ypos; 659 for(ypos=0; ypos<(height-1); ypos+=2) 660 { 661 const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride, 662 *rgb_ptr2=RGB+(ypos+1)*RGB_stride; 663 664 uint8_t *y_ptr1=Y+ypos*Y_stride, 665 *y_ptr2=Y+(ypos+1)*Y_stride, 666 *u_ptr=U+(ypos/2)*UV_stride, 667 *v_ptr=V+(ypos/2)*UV_stride; 668 669 for(xpos=0; xpos<(width-31); xpos+=32) 670 { 671 RGB2YUV_32 672 673 rgb_ptr1+=96; 674 rgb_ptr2+=96; 675 y_ptr1+=32; 676 y_ptr2+=32; 677 u_ptr+=16; 678 v_ptr+=16; 679 } 680 } 681 #undef LOAD_SI128 682 #undef SAVE_SI128 683} 684 685 686#endif //__SSE2__ 687 688[FILE END](C) 2025 0x4248 (C) 2025 4248 Media and 4248 Systems, All part of 0x4248 See LICENCE files for more information. Not all files are by 0x4248 always check Licencing.