Atlas - yuv_rgb_lsx_func.h

Home / ext / SDL / src / video / yuv2rgb Lines: 1 | Size: 18412 bytes [Download] [Show on GitHub] [Search similar files] [Raw] [Raw (proxy)]
[FILE BEGIN]
1// Copyright 2016 Adrien Descamps 2// // Distributed under BSD 3-Clause License 3 4#include <lsxintrin.h> 5 6#if YUV_FORMAT == YUV_FORMAT_420 7 8#define READ_Y(y_ptr) \ 9 y = __lsx_vld(y_ptr, 0); \ 10 11#define READ_UV \ 12 u_temp = __lsx_vld(u_ptr, 0); \ 13 v_temp = __lsx_vld(v_ptr, 0); \ 14 15#else 16#error READ_UV unimplemented 17#endif 18 19#define PACK_RGBA_32(R1, R2, G1, G2, B1, B2, A1, A2, RGB1, RGB2, \ 20 RGB3, RGB4, RGB5, RGB6, RGB7, RGB8) \ 21{ \ 22 __m128i ab_l, ab_h, gr_l, gr_h; \ 23 ab_l = __lsx_vilvl_b(B1, A1); \ 24 ab_h = __lsx_vilvh_b(B1, A1); \ 25 gr_l = __lsx_vilvl_b(R1, G1); \ 26 gr_h = __lsx_vilvh_b(R1, G1); \ 27 RGB1 = __lsx_vilvl_h(gr_l, ab_l); \ 28 RGB2 = __lsx_vilvh_h(gr_l, ab_l); \ 29 RGB3 = __lsx_vilvl_h(gr_h, ab_h); \ 30 RGB4 = __lsx_vilvh_h(gr_h, ab_h); \ 31 ab_l = __lsx_vilvl_b(B2, A2); \ 32 ab_h = __lsx_vilvh_b(B2, A2); \ 33 gr_l = __lsx_vilvl_b(R2, G2); \ 34 gr_h = __lsx_vilvh_b(R2, G2); \ 35 RGB5 = __lsx_vilvl_h(gr_l, ab_l); \ 36 RGB6 = __lsx_vilvh_h(gr_l, ab_l); \ 37 RGB7 = __lsx_vilvl_h(gr_h, ab_h); \ 38 RGB8 = __lsx_vilvh_h(gr_h, ab_h); \ 39} 40 41#define PACK_RGB24_32_STEP(R, G, B, RGB1, RGB2, RGB3) \ 42 RGB1 = __lsx_vilvl_b(G, R); \ 43 RGB1 = __lsx_vshuf_b(B, RGB1, mask1); \ 44 RGB2 = __lsx_vshuf_b(B, G, mask2); \ 45 RGB2 = __lsx_vshuf_b(R, RGB2, mask3); \ 46 RGB3 = __lsx_vshuf_b(R, B, mask4); \ 47 RGB3 = __lsx_vshuf_b(G, RGB3, mask5); \ 48 49#define PACK_RGB24_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \ 50 PACK_RGB24_32_STEP(R1, G1, B1, RGB1, RGB2, RGB3); \ 51 PACK_RGB24_32_STEP(R2, G2, B2, RGB4, RGB5, RGB6); \ 52 53#if RGB_FORMAT == RGB_FORMAT_RGB24 54 55#define PACK_PIXEL \ 56 __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6; \ 57 __m128i rgb_7, rgb_8, rgb_9, rgb_10, rgb_11, rgb_12; \ 58 PACK_RGB24_32(r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, \ 59 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6) \ 60 PACK_RGB24_32(r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, \ 61 rgb_7, rgb_8, rgb_9, rgb_10, rgb_11, rgb_12) \ 62 63#elif RGB_FORMAT == RGB_FORMAT_RGBA 64 65#define PACK_PIXEL \ 66 __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ 67 __m128i rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ 68 __m128i a = __lsx_vldi(0xFF); \ 69 PACK_RGBA_32(r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, a, a, \ 70 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ 71 PACK_RGBA_32(r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, a, a, \ 72 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16) \ 73 74#elif RGB_FORMAT == RGB_FORMAT_BGRA 75 76#define PACK_PIXEL \ 77 __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ 78 __m128i rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ 79 __m128i a = __lsx_vldi(0xFF); \ 80 PACK_RGBA_32(b_8_11, b_8_12, g_8_11, g_8_12, r_8_11, r_8_12, a, a, \ 81 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ 82 PACK_RGBA_32(b_8_21, b_8_22, g_8_21, g_8_22, r_8_21, r_8_22, a, a, \ 83 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16) \ 84 85#elif RGB_FORMAT == RGB_FORMAT_ARGB 86 87#define PACK_PIXEL \ 88 __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ 89 __m128i rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ 90 __m128i a = __lsx_vldi(0xFF); \ 91 PACK_RGBA_32(a, a, r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, \ 92 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ 93 PACK_RGBA_32(a, a, r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, \ 94 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16) \ 95 96#elif RGB_FORMAT == RGB_FORMAT_ABGR 97 98#define PACK_PIXEL \ 99 __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ 100 __m128i rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ 101 __m128i a = __lsx_vldi(0xFF); \ 102 PACK_RGBA_32(a, a, b_8_11, b_8_12, g_8_11, g_8_12, r_8_11, r_8_12, \ 103 rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ 104 PACK_RGBA_32(a, a, b_8_21, b_8_22, g_8_21, g_8_22, r_8_21, r_8_22, \ 105 rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16) \ 106 107#else 108#error PACK_PIXEL unimplemented 109#endif 110 111#define LSX_ST_UB2(in0, in1, pdst, stride) \ 112{ \ 113 __lsx_vst(in0, pdst, 0); \ 114 __lsx_vst(in1, pdst + stride, 0); \ 115} 116 117#if RGB_FORMAT == RGB_FORMAT_RGB24 \ 118 119#define SAVE_LINE1 \ 120 LSX_ST_UB2(rgb_1, rgb_2, rgb_ptr1, 16); \ 121 LSX_ST_UB2(rgb_3, rgb_4, rgb_ptr1 + 32, 16); \ 122 LSX_ST_UB2(rgb_5, rgb_6, rgb_ptr1 + 64, 16); \ 123 124#define SAVE_LINE2 \ 125 LSX_ST_UB2(rgb_7, rgb_8, rgb_ptr2, 16); \ 126 LSX_ST_UB2(rgb_9, rgb_10, rgb_ptr2 + 32, 16); \ 127 LSX_ST_UB2(rgb_11, rgb_12, rgb_ptr2 + 64, 16); \ 128 129#elif RGB_FORMAT == RGB_FORMAT_RGBA || RGB_FORMAT == RGB_FORMAT_BGRA || \ 130 RGB_FORMAT == RGB_FORMAT_ARGB || RGB_FORMAT == RGB_FORMAT_ABGR \ 131 132#define SAVE_LINE1 \ 133 LSX_ST_UB2(rgb_1, rgb_2, rgb_ptr1, 16); \ 134 LSX_ST_UB2(rgb_3, rgb_4, rgb_ptr1 + 32, 16); \ 135 LSX_ST_UB2(rgb_5, rgb_6, rgb_ptr1 + 64, 16); \ 136 LSX_ST_UB2(rgb_7, rgb_8, rgb_ptr1 + 96, 16); \ 137 138#define SAVE_LINE2 \ 139 LSX_ST_UB2(rgb_9, rgb_10, rgb_ptr2, 16); \ 140 LSX_ST_UB2(rgb_11, rgb_12, rgb_ptr2 + 32, 16); \ 141 LSX_ST_UB2(rgb_13, rgb_14, rgb_ptr2 + 64, 16); \ 142 LSX_ST_UB2(rgb_15, rgb_16, rgb_ptr2 + 96, 16); \ 143 144#else 145#error SAVE_LINE unimplemented 146#endif 147 148// = u*vr g=u*ug+v*vg b=u*ub 149#define UV2RGB_16(U, V, R1, G1, B1, R2, G2, B2) \ 150 r_temp = __lsx_vmul_h(V, v2r); \ 151 g_temp = __lsx_vmul_h(U, u2g); \ 152 g_temp = __lsx_vmadd_h(g_temp, V, v2g); \ 153 b_temp = __lsx_vmul_h(U, u2b); \ 154 R1 = __lsx_vilvl_h(r_temp, r_temp); \ 155 G1 = __lsx_vilvl_h(g_temp, g_temp); \ 156 B1 = __lsx_vilvl_h(b_temp, b_temp); \ 157 R2 = __lsx_vilvh_h(r_temp, r_temp); \ 158 G2 = __lsx_vilvh_h(g_temp, g_temp); \ 159 B2 = __lsx_vilvh_h(b_temp, b_temp); \ 160 161// Y=(Y-shift)*shift R=(Y+R)>>6,G=(Y+G)>>6,B=(B+Y)>>6 162#define ADD_Y2RGB_16(Y1, Y2, R1, G1, B1, R2, G2, B2) \ 163 Y1 = __lsx_vsub_h(Y1, shift); \ 164 Y2 = __lsx_vsub_h(Y2, shift); \ 165 Y1 = __lsx_vmul_h(Y1, yf); \ 166 Y2 = __lsx_vmul_h(Y2, yf); \ 167 R1 = __lsx_vadd_h(R1, Y1); \ 168 G1 = __lsx_vadd_h(G1, Y1); \ 169 B1 = __lsx_vadd_h(B1, Y1); \ 170 R2 = __lsx_vadd_h(R2, Y2); \ 171 G2 = __lsx_vadd_h(G2, Y2); \ 172 B2 = __lsx_vadd_h(B2, Y2); \ 173 R1 = __lsx_vsrai_h(R1, PRECISION); \ 174 G1 = __lsx_vsrai_h(G1, PRECISION); \ 175 B1 = __lsx_vsrai_h(B1, PRECISION); \ 176 R2 = __lsx_vsrai_h(R2, PRECISION); \ 177 G2 = __lsx_vsrai_h(G2, PRECISION); \ 178 B2 = __lsx_vsrai_h(B2, PRECISION); \ 179 180#define CLIP(in0, in1, in2, in3, in4, in5) \ 181{ \ 182 in0 = __lsx_vmaxi_h(in0, 0); \ 183 in1 = __lsx_vmaxi_h(in1, 0); \ 184 in2 = __lsx_vmaxi_h(in2, 0); \ 185 in3 = __lsx_vmaxi_h(in3, 0); \ 186 in4 = __lsx_vmaxi_h(in4, 0); \ 187 in5 = __lsx_vmaxi_h(in5, 0); \ 188 in0 = __lsx_vsat_hu(in0, 7); \ 189 in1 = __lsx_vsat_hu(in1, 7); \ 190 in2 = __lsx_vsat_hu(in2, 7); \ 191 in3 = __lsx_vsat_hu(in3, 7); \ 192 in4 = __lsx_vsat_hu(in4, 7); \ 193 in5 = __lsx_vsat_hu(in5, 7); \ 194} 195 196#define YUV2RGB_32 \ 197 __m128i y, u_temp, v_temp; \ 198 __m128i r_8_11, g_8_11, b_8_11, r_8_21, g_8_21, b_8_21; \ 199 __m128i r_8_12, g_8_12, b_8_12, r_8_22, g_8_22, b_8_22; \ 200 __m128i u, v, r_temp, g_temp, b_temp; \ 201 __m128i r_1, g_1, b_1, r_2, g_2, b_2; \ 202 __m128i y_1, y_2; \ 203 __m128i r_uv_1, g_uv_1, b_uv_1, r_uv_2, g_uv_2, b_uv_2; \ 204 \ 205 READ_UV \ 206 \ 207 /* process first 16 pixels of first line */ \ 208 u = __lsx_vilvl_b(zero, u_temp); \ 209 v = __lsx_vilvl_b(zero, v_temp); \ 210 u = __lsx_vsub_h(u, bias); \ 211 v = __lsx_vsub_h(v, bias); \ 212 UV2RGB_16(u, v, r_1, g_1, b_1, r_2, g_2, b_2); \ 213 r_uv_1 = r_1; g_uv_1 = g_1; b_uv_1 = b_1; \ 214 r_uv_2 = r_2; g_uv_2 = g_2; b_uv_2 = b_2; \ 215 READ_Y(y_ptr1) \ 216 y_1 = __lsx_vilvl_b(zero, y); \ 217 y_2 = __lsx_vilvh_b(zero, y); \ 218 ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ 219 CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ 220 r_8_11 = __lsx_vpickev_b(r_2, r_1); \ 221 g_8_11 = __lsx_vpickev_b(g_2, g_1); \ 222 b_8_11 = __lsx_vpickev_b(b_2, b_1); \ 223 \ 224 /* process first 16 pixels of second line */ \ 225 r_1 = r_uv_1; g_1 = g_uv_1; b_1 = b_uv_1; \ 226 r_2 = r_uv_2; g_2 = g_uv_2; b_2 = b_uv_2; \ 227 \ 228 READ_Y(y_ptr2) \ 229 y_1 = __lsx_vilvl_b(zero, y); \ 230 y_2 = __lsx_vilvh_b(zero, y); \ 231 ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ 232 CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ 233 r_8_21 = __lsx_vpickev_b(r_2, r_1); \ 234 g_8_21 = __lsx_vpickev_b(g_2, g_1); \ 235 b_8_21 = __lsx_vpickev_b(b_2, b_1); \ 236 \ 237 /* process last 16 pixels of first line */ \ 238 u = __lsx_vilvh_b(zero, u_temp); \ 239 v = __lsx_vilvh_b(zero, v_temp); \ 240 u = __lsx_vsub_h(u, bias); \ 241 v = __lsx_vsub_h(v, bias); \ 242 UV2RGB_16(u, v, r_1, g_1, b_1, r_2, g_2, b_2); \ 243 r_uv_1 = r_1; g_uv_1 = g_1; b_uv_1 = b_1; \ 244 r_uv_2 = r_2; g_uv_2 = g_2; b_uv_2 = b_2; \ 245 READ_Y(y_ptr1 + 16 * y_pixel_stride) \ 246 y_1 = __lsx_vilvl_b(zero, y); \ 247 y_2 = __lsx_vilvh_b(zero, y); \ 248 ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ 249 CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ 250 r_8_12 = __lsx_vpickev_b(r_2, r_1); \ 251 g_8_12 = __lsx_vpickev_b(g_2, g_1); \ 252 b_8_12 = __lsx_vpickev_b(b_2, b_1); \ 253 \ 254 /* process last 16 pixels of second line */ \ 255 r_1 = r_uv_1; g_1 = g_uv_1; b_1 = b_uv_1; \ 256 r_2 = r_uv_2; g_2 = g_uv_2; b_2 = b_uv_2; \ 257 \ 258 READ_Y(y_ptr2 + 16 * y_pixel_stride) \ 259 y_1 = __lsx_vilvl_b(zero, y); \ 260 y_2 = __lsx_vilvh_b(zero, y); \ 261 ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ 262 CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ 263 r_8_22 = __lsx_vpickev_b(r_2, r_1); \ 264 g_8_22 = __lsx_vpickev_b(g_2, g_1); \ 265 b_8_22 = __lsx_vpickev_b(b_2, b_1); \ 266 \ 267 268void LSX_FUNCTION_NAME(uint32_t width, uint32_t height, const uint8_t *Y, 269 const uint8_t *U, const uint8_t *V, uint32_t Y_stride, 270 uint32_t UV_stride, uint8_t *RGB, uint32_t RGB_stride, 271 YCbCrType yuv_type) 272{ 273 const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); 274#if YUV_FORMAT == YUV_FORMAT_420 275 const int y_pixel_stride = 1; 276 const int uv_pixel_stride = 1; 277 const int uv_x_sample_interval = 2; 278 const int uv_y_sample_interval = 2; 279#endif 280 281#if RGB_FORMAT == RGB_FORMAT_RGB565 282 const int rgb_pixel_stride = 2; 283#elif RGB_FORMAT == RGB_FORMAT_RGB24 284 const int rgb_pixel_stride = 3; 285 __m128i mask1 = {0x0504110302100100, 0x0A14090813070612}; 286 __m128i mask2 = {0x1808170716061505, 0x00000000000A1909}; 287 __m128i mask3 = {0x0504170302160100, 0x0A1A090819070618}; 288 __m128i mask4 = {0x1E0D1D0C1C0B1B0A, 0x00000000000F1F0E}; 289 __m128i mask5 = {0x05041C03021B0100, 0x0A1F09081E07061D}; 290#elif RGB_FORMAT == RGB_FORMAT_RGBA || RGB_FORMAT_BGRA || \ 291 RGB_FORMAT == RGB_FORMAT_ARGB || RGB_FORMAT_ABGR 292 const int rgb_pixel_stride = 4; 293#else 294#error Unknown RGB pixel size 295#endif 296 297 uint32_t xpos, ypos; 298 __m128i v2r = __lsx_vreplgr2vr_h(param->v_r_factor); 299 __m128i v2g = __lsx_vreplgr2vr_h(param->v_g_factor); 300 __m128i u2g = __lsx_vreplgr2vr_h(param->u_g_factor); 301 __m128i u2b = __lsx_vreplgr2vr_h(param->u_b_factor); 302 __m128i bias = __lsx_vreplgr2vr_h(128); 303 __m128i shift = __lsx_vreplgr2vr_h(param->y_shift); 304 __m128i yf = __lsx_vreplgr2vr_h(param->y_factor); 305 __m128i zero = __lsx_vldi(0); 306 307 if (width >= 32) { 308 for (ypos = 0; ypos < (height - (uv_y_sample_interval - 1)); ypos += uv_y_sample_interval) { 309 const uint8_t *y_ptr1 = Y + ypos * Y_stride, 310 *y_ptr2 = Y + (ypos + 1) * Y_stride, 311 *u_ptr = U + (ypos/uv_y_sample_interval) * UV_stride, 312 *v_ptr = V + (ypos/uv_y_sample_interval) * UV_stride; 313 uint8_t *rgb_ptr1 = RGB + ypos * RGB_stride, 314 *rgb_ptr2 = RGB + (ypos + 1) * RGB_stride; 315 316 for (xpos = 0; xpos < (width - 31); xpos += 32){ 317 YUV2RGB_32 318 { 319 PACK_PIXEL 320 SAVE_LINE1 321 if (uv_y_sample_interval > 1) 322 { 323 SAVE_LINE2 324 } 325 } 326 y_ptr1 += 32 * y_pixel_stride; 327 y_ptr2 += 32 * y_pixel_stride; 328 u_ptr += 32 * uv_pixel_stride/uv_x_sample_interval; 329 v_ptr += 32 * uv_pixel_stride/uv_x_sample_interval; 330 rgb_ptr1 += 32 * rgb_pixel_stride; 331 rgb_ptr2 += 32 * rgb_pixel_stride; 332 } 333 } 334 if (uv_y_sample_interval == 2 && ypos == (height - 1)) { 335 const uint8_t *y_ptr = Y + ypos * Y_stride, 336 *u_ptr = U + (ypos/uv_y_sample_interval) * UV_stride, 337 *v_ptr = V + (ypos/uv_y_sample_interval) * UV_stride; 338 uint8_t *rgb_ptr = RGB + ypos * RGB_stride; 339 STD_FUNCTION_NAME(width, 1, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type); 340 } 341 } 342 { 343 int converted = (width & ~31); 344 if (converted != width) 345 { 346 const uint8_t *y_ptr = Y + converted * y_pixel_stride, 347 *u_ptr = U + converted * uv_pixel_stride / uv_x_sample_interval, 348 *v_ptr = V + converted * uv_pixel_stride / uv_x_sample_interval; 349 uint8_t *rgb_ptr = RGB + converted * rgb_pixel_stride; 350 351 STD_FUNCTION_NAME(width-converted, height, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type); 352 } 353 } 354} 355 356#undef LSX_FUNCTION_NAME 357#undef STD_FUNCTION_NAME 358#undef YUV_FORMAT 359#undef RGB_FORMAT 360#undef LSX_ALIGNED 361#undef LSX_ST_UB2 362#undef UV2RGB_16 363#undef ADD_Y2RGB_16 364#undef PACK_RGB24_32_STEP 365#undef PACK_RGB24_32 366#undef PACK_PIXEL 367#undef PACK_RGBA_32 368#undef SAVE_LINE1 369#undef SAVE_LINE2 370#undef READ_Y 371#undef READ_UV 372#undef YUV2RGB_32 373
[FILE END]
(C) 2025 0x4248 (C) 2025 4248 Media and 4248 Systems, All part of 0x4248 See LICENCE files for more information. Not all files are by 0x4248 always check Licencing.