00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #undef MOVNTQ
00030 #undef EMMS
00031 #undef SFENCE
00032
00033 #if HAVE_AMD3DNOW
00034
00035 #define EMMS "femms"
00036 #else
00037 #define EMMS "emms"
00038 #endif
00039
00040 #if HAVE_MMX2
00041 #define MOVNTQ "movntq"
00042 #define SFENCE "sfence"
00043 #else
00044 #define MOVNTQ "movq"
00045 #define SFENCE " # nop"
00046 #endif
00047
00048 #define YUV2RGB \
00049
00050
00051
00052
00053 \
00054 \
00055 "punpcklbw %%mm4, %%mm0;" \
00056 "punpcklbw %%mm4, %%mm1;" \
00057 \
00058 "psllw $3, %%mm0;" \
00059 "psllw $3, %%mm1;" \
00060 \
00061 "psubsw "U_OFFSET"(%4), %%mm0;" \
00062 "psubsw "V_OFFSET"(%4), %%mm1;" \
00063 \
00064 "movq %%mm0, %%mm2;" \
00065 "movq %%mm1, %%mm3;" \
00066 \
00067 "pmulhw "UG_COEFF"(%4), %%mm2;" \
00068 "pmulhw "VG_COEFF"(%4), %%mm3;" \
00069 \
00070 "pmulhw "UB_COEFF"(%4), %%mm0;" \
00071 "pmulhw "VR_COEFF"(%4), %%mm1;" \
00072 \
00073 "paddsw %%mm3, %%mm2;" \
00074 \
00075 \
00076 "movq %%mm6, %%mm7;" \
00077 "pand "MANGLE(mmx_00ffw)", %%mm6;" \
00078 \
00079 "psrlw $8, %%mm7;" \
00080 \
00081 "psllw $3, %%mm6;" \
00082 "psllw $3, %%mm7;" \
00083 \
00084 "psubw "Y_OFFSET"(%4), %%mm6;" \
00085 "psubw "Y_OFFSET"(%4), %%mm7;" \
00086 \
00087 "pmulhw "Y_COEFF"(%4), %%mm6;" \
00088 "pmulhw "Y_COEFF"(%4), %%mm7;" \
00089 \
00090
00091
00092
00093
00094 \
00095 "movq %%mm0, %%mm3;" \
00096 "movq %%mm1, %%mm4;" \
00097 "movq %%mm2, %%mm5;" \
00098 \
00099 "paddsw %%mm6, %%mm0;" \
00100 "paddsw %%mm7, %%mm3;" \
00101 \
00102 "paddsw %%mm6, %%mm1;" \
00103 "paddsw %%mm7, %%mm4;" \
00104 \
00105 "paddsw %%mm6, %%mm2;" \
00106 "paddsw %%mm7, %%mm5;" \
00107 \
00108 \
00109 "packuswb %%mm0, %%mm0;" \
00110 "packuswb %%mm1, %%mm1;" \
00111 "packuswb %%mm2, %%mm2;" \
00112 \
00113 \
00114 "packuswb %%mm3, %%mm3;" \
00115 "packuswb %%mm4, %%mm4;" \
00116 "packuswb %%mm5, %%mm5;" \
00117 \
00118 \
00119 "punpcklbw %%mm3, %%mm0;" \
00120 "punpcklbw %%mm4, %%mm1;" \
00121 "punpcklbw %%mm5, %%mm2;" \
00122
00123
00124 #define YUV422_UNSHIFT \
00125 if(c->srcFormat == PIX_FMT_YUV422P) {\
00126 srcStride[1] *= 2; \
00127 srcStride[2] *= 2; \
00128 } \
00129
00130 #define YUV2RGB_LOOP(depth) \
00131 h_size= (c->dstW+7)&~7; \
00132 if(h_size*depth > FFABS(dstStride[0])) h_size-=8; \
00133 \
00134 __asm__ volatile ("pxor %mm4, %mm4;" ); \
00135 for (y= 0; y<srcSliceH; y++ ) { \
00136 uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0]; \
00137 const uint8_t *py = src[0] + y*srcStride[0]; \
00138 const uint8_t *pu = src[1] + (y>>1)*srcStride[1]; \
00139 const uint8_t *pv = src[2] + (y>>1)*srcStride[2]; \
00140 x86_reg index= -h_size/2; \
00141
00142 #define YUV2RGB_INIT \
00143
00144 \
00145 __asm__ volatile ( \
00146 \
00147 "movd (%2, %0), %%mm0;" \
00148 "movd (%3, %0), %%mm1;" \
00149 "movq (%5, %0, 2), %%mm6;" \
00150
00151
00152 \
00153 "1: \n\t" \
00154
00155
00156
00157
00158
00159 \
00160
00161 #define YUV2RGB_ENDLOOP(depth) \
00162 "add $"AV_STRINGIFY(depth*8)", %1 \n\t" \
00163 "add $4, %0 \n\t" \
00164 " js 1b \n\t" \
00165
00166 #define YUV2RGB_OPERANDS \
00167 : "+r" (index), "+r" (image) \
00168 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index) \
00169 ); \
00170 } \
00171 __asm__ volatile (SFENCE"\n\t"EMMS); \
00172 return srcSliceH; \
00173
00174 #define YUV2RGB_OPERANDS_ALPHA \
00175 : "+r" (index), "+r" (image) \
00176 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index), "r" (pa - 2*index) \
00177 ); \
00178 } \
00179 __asm__ volatile (SFENCE"\n\t"EMMS); \
00180 return srcSliceH; \
00181
00182 static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
00183 int srcSliceH, uint8_t* dst[], int dstStride[])
00184 {
00185 int y, h_size;
00186
00187 YUV422_UNSHIFT
00188 YUV2RGB_LOOP(2)
00189
00190 c->blueDither= ff_dither8[y&1];
00191 c->greenDither= ff_dither4[y&1];
00192 c->redDither= ff_dither8[(y+1)&1];
00193
00194 YUV2RGB_INIT
00195 YUV2RGB
00196
00197 #ifdef DITHER1XBPP
00198 "paddusb "BLUE_DITHER"(%4), %%mm0;"
00199 "paddusb "GREEN_DITHER"(%4), %%mm2;"
00200 "paddusb "RED_DITHER"(%4), %%mm1;"
00201 #endif
00202
00203 "pand "MANGLE(mmx_redmask)", %%mm0;"
00204 "pand "MANGLE(mmx_grnmask)", %%mm2;"
00205 "pand "MANGLE(mmx_redmask)", %%mm1;"
00206
00207 "psrlw $3, %%mm0;"
00208 "pxor %%mm4, %%mm4;"
00209
00210 "movq %%mm0, %%mm5;"
00211 "movq %%mm2, %%mm7;"
00212
00213
00214 "punpcklbw %%mm4, %%mm2;"
00215 "punpcklbw %%mm1, %%mm0;"
00216
00217 "psllw $3, %%mm2;"
00218 "por %%mm2, %%mm0;"
00219
00220 "movq 8 (%5, %0, 2), %%mm6;"
00221 MOVNTQ " %%mm0, (%1);"
00222
00223
00224 "punpckhbw %%mm4, %%mm7;"
00225 "punpckhbw %%mm1, %%mm5;"
00226
00227 "psllw $3, %%mm7;"
00228 "movd 4 (%2, %0), %%mm0;"
00229
00230 "por %%mm7, %%mm5;"
00231 "movd 4 (%3, %0), %%mm1;"
00232
00233 MOVNTQ " %%mm5, 8 (%1);"
00234
00235 YUV2RGB_ENDLOOP(2)
00236 YUV2RGB_OPERANDS
00237 }
00238
00239 static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
00240 int srcSliceH, uint8_t* dst[], int dstStride[])
00241 {
00242 int y, h_size;
00243
00244 YUV422_UNSHIFT
00245 YUV2RGB_LOOP(2)
00246
00247 c->blueDither= ff_dither8[y&1];
00248 c->greenDither= ff_dither8[y&1];
00249 c->redDither= ff_dither8[(y+1)&1];
00250
00251 YUV2RGB_INIT
00252 YUV2RGB
00253
00254 #ifdef DITHER1XBPP
00255 "paddusb "BLUE_DITHER"(%4), %%mm0 \n\t"
00256 "paddusb "GREEN_DITHER"(%4), %%mm2 \n\t"
00257 "paddusb "RED_DITHER"(%4), %%mm1 \n\t"
00258 #endif
00259
00260
00261 "pand "MANGLE(mmx_redmask)", %%mm0;"
00262 "pand "MANGLE(mmx_redmask)", %%mm2;"
00263 "pand "MANGLE(mmx_redmask)", %%mm1;"
00264
00265 "psrlw $3, %%mm0;"
00266 "psrlw $1, %%mm1;"
00267 "pxor %%mm4, %%mm4;"
00268
00269 "movq %%mm0, %%mm5;"
00270 "movq %%mm2, %%mm7;"
00271
00272
00273 "punpcklbw %%mm4, %%mm2;"
00274 "punpcklbw %%mm1, %%mm0;"
00275
00276 "psllw $2, %%mm2;"
00277 "por %%mm2, %%mm0;"
00278
00279 "movq 8 (%5, %0, 2), %%mm6;"
00280 MOVNTQ " %%mm0, (%1);"
00281
00282
00283 "punpckhbw %%mm4, %%mm7;"
00284 "punpckhbw %%mm1, %%mm5;"
00285
00286 "psllw $2, %%mm7;"
00287 "movd 4 (%2, %0), %%mm0;"
00288
00289 "por %%mm7, %%mm5;"
00290 "movd 4 (%3, %0), %%mm1;"
00291
00292 MOVNTQ " %%mm5, 8 (%1);"
00293
00294 YUV2RGB_ENDLOOP(2)
00295 YUV2RGB_OPERANDS
00296 }
00297
00298 #undef RGB_PLANAR2PACKED24
00299 #if HAVE_MMX2
00300 #define RGB_PLANAR2PACKED24(red, blue)\
00301 "movq "MANGLE(ff_M24A)", %%mm4 \n\t"\
00302 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
00303 "pshufw $0x50, %%mm"blue", %%mm5 \n\t" \
00304 "pshufw $0x50, %%mm2, %%mm3 \n\t" \
00305 "pshufw $0x00, %%mm"red", %%mm6 \n\t" \
00306 \
00307 "pand %%mm4, %%mm5 \n\t" \
00308 "pand %%mm4, %%mm3 \n\t" \
00309 "pand %%mm7, %%mm6 \n\t" \
00310 \
00311 "psllq $8, %%mm3 \n\t" \
00312 "por %%mm5, %%mm6 \n\t"\
00313 "por %%mm3, %%mm6 \n\t"\
00314 MOVNTQ" %%mm6, (%1) \n\t"\
00315 \
00316 "psrlq $8, %%mm2 \n\t" \
00317 "pshufw $0xA5, %%mm"blue", %%mm5\n\t" \
00318 "pshufw $0x55, %%mm2, %%mm3 \n\t" \
00319 "pshufw $0xA5, %%mm"red", %%mm6 \n\t" \
00320 \
00321 "pand "MANGLE(ff_M24B)", %%mm5 \n\t" \
00322 "pand %%mm7, %%mm3 \n\t" \
00323 "pand %%mm4, %%mm6 \n\t" \
00324 \
00325 "por %%mm5, %%mm3 \n\t" \
00326 "por %%mm3, %%mm6 \n\t"\
00327 MOVNTQ" %%mm6, 8(%1) \n\t"\
00328 \
00329 "pshufw $0xFF, %%mm"blue", %%mm5\n\t" \
00330 "pshufw $0xFA, %%mm2, %%mm3 \n\t" \
00331 "pshufw $0xFA, %%mm"red", %%mm6 \n\t" \
00332 "movd 4 (%2, %0), %%mm0;" \
00333 \
00334 "pand %%mm7, %%mm5 \n\t" \
00335 "pand %%mm4, %%mm3 \n\t" \
00336 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" \
00337 "movd 4 (%3, %0), %%mm1;" \
00338 \
00339 "por %%mm5, %%mm3 \n\t"\
00340 "por %%mm3, %%mm6 \n\t"\
00341 MOVNTQ" %%mm6, 16(%1) \n\t"\
00342 "movq 8 (%5, %0, 2), %%mm6;" \
00343 "pxor %%mm4, %%mm4 \n\t"
00344 #else
00345 #define RGB_PLANAR2PACKED24(red, blue)\
00346 "pxor %%mm4, %%mm4 \n\t"\
00347 "movq %%mm"blue", %%mm5\n\t" \
00348 "movq %%mm"red", %%mm6 \n\t" \
00349 "punpcklbw %%mm2, %%mm"blue"\n\t" \
00350 "punpcklbw %%mm4, %%mm"red" \n\t" \
00351 "punpckhbw %%mm2, %%mm5 \n\t" \
00352 "punpckhbw %%mm4, %%mm6 \n\t" \
00353 "movq %%mm"blue", %%mm7\n\t" \
00354 "movq %%mm5, %%mm3 \n\t" \
00355 "punpcklwd %%mm"red", %%mm7 \n\t" \
00356 "punpckhwd %%mm"red", %%mm"blue"\n\t" \
00357 "punpcklwd %%mm6, %%mm5 \n\t" \
00358 "punpckhwd %%mm6, %%mm3 \n\t" \
00359 \
00360 "movq %%mm7, %%mm2 \n\t" \
00361 "movq %%mm"blue", %%mm6\n\t" \
00362 "movq %%mm5, %%mm"red" \n\t" \
00363 "movq %%mm3, %%mm4 \n\t" \
00364 \
00365 "psllq $40, %%mm7 \n\t" \
00366 "psllq $40, %%mm"blue"\n\t" \
00367 "psllq $40, %%mm5 \n\t" \
00368 "psllq $40, %%mm3 \n\t" \
00369 \
00370 "punpckhdq %%mm2, %%mm7 \n\t" \
00371 "punpckhdq %%mm6, %%mm"blue"\n\t" \
00372 "punpckhdq %%mm"red", %%mm5 \n\t" \
00373 "punpckhdq %%mm4, %%mm3 \n\t" \
00374 \
00375 "psrlq $8, %%mm7 \n\t" \
00376 "movq %%mm"blue", %%mm6\n\t" \
00377 "psllq $40, %%mm"blue"\n\t" \
00378 "por %%mm"blue", %%mm7\n\t" \
00379 MOVNTQ" %%mm7, (%1) \n\t"\
00380 \
00381 "psrlq $24, %%mm6 \n\t" \
00382 "movq %%mm5, %%mm"red" \n\t" \
00383 "psllq $24, %%mm5 \n\t" \
00384 "por %%mm5, %%mm6 \n\t" \
00385 MOVNTQ" %%mm6, 8(%1) \n\t"\
00386 \
00387 "movq 8 (%5, %0, 2), %%mm6;" \
00388 \
00389 "psrlq $40, %%mm"red" \n\t" \
00390 "psllq $8, %%mm3 \n\t" \
00391 "por %%mm3, %%mm"red" \n\t" \
00392 MOVNTQ" %%mm"red", 16(%1)\n\t"\
00393 \
00394 "movd 4 (%3, %0), %%mm1;" \
00395 "movd 4 (%2, %0), %%mm0;" \
00396 "pxor %%mm4, %%mm4 \n\t"
00397 #endif
00398
00399 static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
00400 int srcSliceH, uint8_t* dst[], int dstStride[])
00401 {
00402 int y, h_size;
00403
00404 YUV422_UNSHIFT
00405 YUV2RGB_LOOP(3)
00406
00407 YUV2RGB_INIT
00408 YUV2RGB
00409
00410 RGB_PLANAR2PACKED24("0", "1")
00411
00412 YUV2RGB_ENDLOOP(3)
00413 YUV2RGB_OPERANDS
00414 }
00415
00416 static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
00417 int srcSliceH, uint8_t* dst[], int dstStride[])
00418 {
00419 int y, h_size;
00420
00421 YUV422_UNSHIFT
00422 YUV2RGB_LOOP(3)
00423
00424 YUV2RGB_INIT
00425 YUV2RGB
00426
00427 RGB_PLANAR2PACKED24("1", "0")
00428
00429 YUV2RGB_ENDLOOP(3)
00430 YUV2RGB_OPERANDS
00431 }
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448 #define REG_BLUE "0"
00449 #define REG_RED "1"
00450 #define REG_GREEN "2"
00451 #define REG_ALPHA "3"
00452
00453 #define RGB_PLANAR2PACKED32(red,green,blue,alpha) \
00454
00455
00456
00457 \
00458 "movq %%mm" blue ", %%mm6;" \
00459 "movq %%mm" red ", %%mm7;" \
00460 \
00461 "movq %%mm" blue ", %%mm4;" \
00462 "movq %%mm" red ", %%mm5;" \
00463 \
00464 "punpcklbw %%mm" green ", %%mm6;" \
00465 "punpcklbw %%mm" alpha ", %%mm7;" \
00466 \
00467 "punpcklwd %%mm7, %%mm6;" \
00468 MOVNTQ " %%mm6, (%1);" \
00469 \
00470 "movq %%mm" blue ", %%mm6;" \
00471 "punpcklbw %%mm" green ", %%mm6;" \
00472 \
00473 "punpckhwd %%mm7, %%mm6;" \
00474 MOVNTQ " %%mm6, 8 (%1);" \
00475 \
00476 "punpckhbw %%mm" green ", %%mm4;" \
00477 "punpckhbw %%mm" alpha ", %%mm5;" \
00478 \
00479 "punpcklwd %%mm5, %%mm4;" \
00480 MOVNTQ " %%mm4, 16 (%1);" \
00481 \
00482 "movq %%mm" blue ", %%mm4;" \
00483 "punpckhbw %%mm" green ", %%mm4;" \
00484 \
00485 "punpckhwd %%mm5, %%mm4;" \
00486 MOVNTQ " %%mm4, 24 (%1);" \
00487 \
00488 "movd 4 (%2, %0), %%mm0;" \
00489 "movd 4 (%3, %0), %%mm1;" \
00490 \
00491 "pxor %%mm4, %%mm4;" \
00492 "movq 8 (%5, %0, 2), %%mm6;" \
00493
00494 static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
00495 int srcSliceH, uint8_t* dst[], int dstStride[])
00496 {
00497 int y, h_size;
00498
00499 YUV422_UNSHIFT
00500 YUV2RGB_LOOP(4)
00501
00502 YUV2RGB_INIT
00503 YUV2RGB
00504 "pcmpeqd %%mm3, %%mm3;"
00505 RGB_PLANAR2PACKED32(REG_RED,REG_GREEN,REG_BLUE,REG_ALPHA)
00506
00507 YUV2RGB_ENDLOOP(4)
00508 YUV2RGB_OPERANDS
00509 }
00510
00511 static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
00512 int srcSliceH, uint8_t* dst[], int dstStride[])
00513 {
00514 #if HAVE_7REGS
00515 int y, h_size;
00516
00517 YUV2RGB_LOOP(4)
00518
00519 const uint8_t *pa = src[3] + y*srcStride[3];
00520 YUV2RGB_INIT
00521 YUV2RGB
00522 "movq (%6, %0, 2), %%mm3;"
00523 RGB_PLANAR2PACKED32(REG_RED,REG_GREEN,REG_BLUE,REG_ALPHA)
00524
00525 YUV2RGB_ENDLOOP(4)
00526 YUV2RGB_OPERANDS_ALPHA
00527 #endif
00528 }
00529
00530 static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
00531 int srcSliceH, uint8_t* dst[], int dstStride[])
00532 {
00533 int y, h_size;
00534
00535 YUV422_UNSHIFT
00536 YUV2RGB_LOOP(4)
00537
00538 YUV2RGB_INIT
00539 YUV2RGB
00540 "pcmpeqd %%mm3, %%mm3;"
00541 RGB_PLANAR2PACKED32(REG_BLUE,REG_GREEN,REG_RED,REG_ALPHA)
00542
00543 YUV2RGB_ENDLOOP(4)
00544 YUV2RGB_OPERANDS
00545 }
00546
00547 static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
00548 int srcSliceH, uint8_t* dst[], int dstStride[])
00549 {
00550 #if HAVE_7REGS
00551 int y, h_size;
00552
00553 YUV2RGB_LOOP(4)
00554
00555 const uint8_t *pa = src[3] + y*srcStride[3];
00556 YUV2RGB_INIT
00557 YUV2RGB
00558 "movq (%6, %0, 2), %%mm3;"
00559 RGB_PLANAR2PACKED32(REG_BLUE,REG_GREEN,REG_RED,REG_ALPHA)
00560
00561 YUV2RGB_ENDLOOP(4)
00562 YUV2RGB_OPERANDS_ALPHA
00563 #endif
00564 }