00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #undef REAL_MOVNTQ
00022 #undef MOVNTQ
00023 #undef MOVNTQ2
00024 #undef PREFETCH
00025
00026 #if COMPILE_TEMPLATE_MMX2
00027 #define PREFETCH "prefetchnta"
00028 #else
00029 #define PREFETCH " # nop"
00030 #endif
00031
00032 #if COMPILE_TEMPLATE_MMX2
00033 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
00034 #define MOVNTQ2 "movntq "
00035 #else
00036 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
00037 #define MOVNTQ2 "movq "
00038 #endif
00039 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
00040
00041 #if !COMPILE_TEMPLATE_MMX2
00042 static av_always_inline void
00043 dither_8to16(const uint8_t *srcDither, int rot)
00044 {
00045 if (rot) {
00046 __asm__ volatile("pxor %%mm0, %%mm0\n\t"
00047 "movq (%0), %%mm3\n\t"
00048 "movq %%mm3, %%mm4\n\t"
00049 "psrlq $24, %%mm3\n\t"
00050 "psllq $40, %%mm4\n\t"
00051 "por %%mm4, %%mm3\n\t"
00052 "movq %%mm3, %%mm4\n\t"
00053 "punpcklbw %%mm0, %%mm3\n\t"
00054 "punpckhbw %%mm0, %%mm4\n\t"
00055 :: "r"(srcDither)
00056 );
00057 } else {
00058 __asm__ volatile("pxor %%mm0, %%mm0\n\t"
00059 "movq (%0), %%mm3\n\t"
00060 "movq %%mm3, %%mm4\n\t"
00061 "punpcklbw %%mm0, %%mm3\n\t"
00062 "punpckhbw %%mm0, %%mm4\n\t"
00063 :: "r"(srcDither)
00064 );
00065 }
00066 }
00067 #endif
00068
00069 static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
00070 const int16_t **src, uint8_t *dest, int dstW,
00071 const uint8_t *dither, int offset)
00072 {
00073 dither_8to16(dither, offset);
00074 __asm__ volatile(\
00075 "psraw $4, %%mm3\n\t"
00076 "psraw $4, %%mm4\n\t"
00077 "movq %%mm3, %%mm6\n\t"
00078 "movq %%mm4, %%mm7\n\t"
00079 "movl %3, %%ecx\n\t"
00080 "mov %0, %%"REG_d" \n\t"\
00081 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00082 ".p2align 4 \n\t" \
00083 "1: \n\t"\
00084 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00085 "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" \
00086 "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" \
00087 "add $16, %%"REG_d" \n\t"\
00088 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00089 "test %%"REG_S", %%"REG_S" \n\t"\
00090 "pmulhw %%mm0, %%mm2 \n\t"\
00091 "pmulhw %%mm0, %%mm5 \n\t"\
00092 "paddw %%mm2, %%mm3 \n\t"\
00093 "paddw %%mm5, %%mm4 \n\t"\
00094 " jnz 1b \n\t"\
00095 "psraw $3, %%mm3 \n\t"\
00096 "psraw $3, %%mm4 \n\t"\
00097 "packuswb %%mm4, %%mm3 \n\t"
00098 MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t"
00099 "add $8, %%"REG_c" \n\t"\
00100 "cmp %2, %%"REG_c" \n\t"\
00101 "movq %%mm6, %%mm3\n\t"
00102 "movq %%mm7, %%mm4\n\t"
00103 "mov %0, %%"REG_d" \n\t"\
00104 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00105 "jb 1b \n\t"\
00106 :: "g" (filter),
00107 "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
00108 : "%"REG_d, "%"REG_S, "%"REG_c
00109 );
00110 }
00111
00112 #define YSCALEYUV2PACKEDX_UV \
00113 __asm__ volatile(\
00114 "xor %%"REG_a", %%"REG_a" \n\t"\
00115 ".p2align 4 \n\t"\
00116 "nop \n\t"\
00117 "1: \n\t"\
00118 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00119 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00120 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00121 "movq %%mm3, %%mm4 \n\t"\
00122 ".p2align 4 \n\t"\
00123 "2: \n\t"\
00124 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00125 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00126 "add %6, %%"REG_S" \n\t" \
00127 "movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" \
00128 "add $16, %%"REG_d" \n\t"\
00129 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00130 "pmulhw %%mm0, %%mm2 \n\t"\
00131 "pmulhw %%mm0, %%mm5 \n\t"\
00132 "paddw %%mm2, %%mm3 \n\t"\
00133 "paddw %%mm5, %%mm4 \n\t"\
00134 "test %%"REG_S", %%"REG_S" \n\t"\
00135 " jnz 2b \n\t"\
00136
00137 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
00138 "lea "offset"(%0), %%"REG_d" \n\t"\
00139 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00140 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
00141 "movq "#dst1", "#dst2" \n\t"\
00142 ".p2align 4 \n\t"\
00143 "2: \n\t"\
00144 "movq 8(%%"REG_d"), "#coeff" \n\t" \
00145 "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" \
00146 "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" \
00147 "add $16, %%"REG_d" \n\t"\
00148 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00149 "pmulhw "#coeff", "#src1" \n\t"\
00150 "pmulhw "#coeff", "#src2" \n\t"\
00151 "paddw "#src1", "#dst1" \n\t"\
00152 "paddw "#src2", "#dst2" \n\t"\
00153 "test %%"REG_S", %%"REG_S" \n\t"\
00154 " jnz 2b \n\t"\
00155
00156 #define YSCALEYUV2PACKEDX \
00157 YSCALEYUV2PACKEDX_UV \
00158 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
00159
00160 #define YSCALEYUV2PACKEDX_END \
00161 :: "r" (&c->redDither), \
00162 "m" (dummy), "m" (dummy), "m" (dummy),\
00163 "r" (dest), "m" (dstW_reg), "m"(uv_off) \
00164 : "%"REG_a, "%"REG_d, "%"REG_S \
00165 );
00166
00167 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
00168 __asm__ volatile(\
00169 "xor %%"REG_a", %%"REG_a" \n\t"\
00170 ".p2align 4 \n\t"\
00171 "nop \n\t"\
00172 "1: \n\t"\
00173 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00174 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00175 "pxor %%mm4, %%mm4 \n\t"\
00176 "pxor %%mm5, %%mm5 \n\t"\
00177 "pxor %%mm6, %%mm6 \n\t"\
00178 "pxor %%mm7, %%mm7 \n\t"\
00179 ".p2align 4 \n\t"\
00180 "2: \n\t"\
00181 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" \
00182 "add %6, %%"REG_S" \n\t" \
00183 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00184 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00185 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" \
00186 "movq %%mm0, %%mm3 \n\t"\
00187 "punpcklwd %%mm1, %%mm0 \n\t"\
00188 "punpckhwd %%mm1, %%mm3 \n\t"\
00189 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" \
00190 "pmaddwd %%mm1, %%mm0 \n\t"\
00191 "pmaddwd %%mm1, %%mm3 \n\t"\
00192 "paddd %%mm0, %%mm4 \n\t"\
00193 "paddd %%mm3, %%mm5 \n\t"\
00194 "add %6, %%"REG_S" \n\t" \
00195 "movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" \
00196 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00197 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00198 "test %%"REG_S", %%"REG_S" \n\t"\
00199 "movq %%mm2, %%mm0 \n\t"\
00200 "punpcklwd %%mm3, %%mm2 \n\t"\
00201 "punpckhwd %%mm3, %%mm0 \n\t"\
00202 "pmaddwd %%mm1, %%mm2 \n\t"\
00203 "pmaddwd %%mm1, %%mm0 \n\t"\
00204 "paddd %%mm2, %%mm6 \n\t"\
00205 "paddd %%mm0, %%mm7 \n\t"\
00206 " jnz 2b \n\t"\
00207 "psrad $16, %%mm4 \n\t"\
00208 "psrad $16, %%mm5 \n\t"\
00209 "psrad $16, %%mm6 \n\t"\
00210 "psrad $16, %%mm7 \n\t"\
00211 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00212 "packssdw %%mm5, %%mm4 \n\t"\
00213 "packssdw %%mm7, %%mm6 \n\t"\
00214 "paddw %%mm0, %%mm4 \n\t"\
00215 "paddw %%mm0, %%mm6 \n\t"\
00216 "movq %%mm4, "U_TEMP"(%0) \n\t"\
00217 "movq %%mm6, "V_TEMP"(%0) \n\t"\
00218
00219 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
00220 "lea "offset"(%0), %%"REG_d" \n\t"\
00221 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00222 "pxor %%mm1, %%mm1 \n\t"\
00223 "pxor %%mm5, %%mm5 \n\t"\
00224 "pxor %%mm7, %%mm7 \n\t"\
00225 "pxor %%mm6, %%mm6 \n\t"\
00226 ".p2align 4 \n\t"\
00227 "2: \n\t"\
00228 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00229 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00230 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00231 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" \
00232 "movq %%mm0, %%mm3 \n\t"\
00233 "punpcklwd %%mm4, %%mm0 \n\t"\
00234 "punpckhwd %%mm4, %%mm3 \n\t"\
00235 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" \
00236 "pmaddwd %%mm4, %%mm0 \n\t"\
00237 "pmaddwd %%mm4, %%mm3 \n\t"\
00238 "paddd %%mm0, %%mm1 \n\t"\
00239 "paddd %%mm3, %%mm5 \n\t"\
00240 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00241 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00242 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00243 "test %%"REG_S", %%"REG_S" \n\t"\
00244 "movq %%mm2, %%mm0 \n\t"\
00245 "punpcklwd %%mm3, %%mm2 \n\t"\
00246 "punpckhwd %%mm3, %%mm0 \n\t"\
00247 "pmaddwd %%mm4, %%mm2 \n\t"\
00248 "pmaddwd %%mm4, %%mm0 \n\t"\
00249 "paddd %%mm2, %%mm7 \n\t"\
00250 "paddd %%mm0, %%mm6 \n\t"\
00251 " jnz 2b \n\t"\
00252 "psrad $16, %%mm1 \n\t"\
00253 "psrad $16, %%mm5 \n\t"\
00254 "psrad $16, %%mm7 \n\t"\
00255 "psrad $16, %%mm6 \n\t"\
00256 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00257 "packssdw %%mm5, %%mm1 \n\t"\
00258 "packssdw %%mm6, %%mm7 \n\t"\
00259 "paddw %%mm0, %%mm1 \n\t"\
00260 "paddw %%mm0, %%mm7 \n\t"\
00261 "movq "U_TEMP"(%0), %%mm3 \n\t"\
00262 "movq "V_TEMP"(%0), %%mm4 \n\t"\
00263
00264 #define YSCALEYUV2PACKEDX_ACCURATE \
00265 YSCALEYUV2PACKEDX_ACCURATE_UV \
00266 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
00267
00268 #define YSCALEYUV2RGBX \
00269 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
00270 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
00271 "movq %%mm3, %%mm2 \n\t" \
00272 "movq %%mm4, %%mm5 \n\t" \
00273 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
00274 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
00275 \
00276 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
00277 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
00278 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
00279 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
00280 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
00281 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
00282 \
00283 "paddw %%mm3, %%mm4 \n\t"\
00284 "movq %%mm2, %%mm0 \n\t"\
00285 "movq %%mm5, %%mm6 \n\t"\
00286 "movq %%mm4, %%mm3 \n\t"\
00287 "punpcklwd %%mm2, %%mm2 \n\t"\
00288 "punpcklwd %%mm5, %%mm5 \n\t"\
00289 "punpcklwd %%mm4, %%mm4 \n\t"\
00290 "paddw %%mm1, %%mm2 \n\t"\
00291 "paddw %%mm1, %%mm5 \n\t"\
00292 "paddw %%mm1, %%mm4 \n\t"\
00293 "punpckhwd %%mm0, %%mm0 \n\t"\
00294 "punpckhwd %%mm6, %%mm6 \n\t"\
00295 "punpckhwd %%mm3, %%mm3 \n\t"\
00296 "paddw %%mm7, %%mm0 \n\t"\
00297 "paddw %%mm7, %%mm6 \n\t"\
00298 "paddw %%mm7, %%mm3 \n\t"\
00299 \
00300 "packuswb %%mm0, %%mm2 \n\t"\
00301 "packuswb %%mm6, %%mm5 \n\t"\
00302 "packuswb %%mm3, %%mm4 \n\t"\
00303
00304 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
00305 "movq "#b", "#q2" \n\t" \
00306 "movq "#r", "#t" \n\t" \
00307 "punpcklbw "#g", "#b" \n\t" \
00308 "punpcklbw "#a", "#r" \n\t" \
00309 "punpckhbw "#g", "#q2" \n\t" \
00310 "punpckhbw "#a", "#t" \n\t" \
00311 "movq "#b", "#q0" \n\t" \
00312 "movq "#q2", "#q3" \n\t" \
00313 "punpcklwd "#r", "#q0" \n\t" \
00314 "punpckhwd "#r", "#b" \n\t" \
00315 "punpcklwd "#t", "#q2" \n\t" \
00316 "punpckhwd "#t", "#q3" \n\t" \
00317 \
00318 MOVNTQ( q0, (dst, index, 4))\
00319 MOVNTQ( b, 8(dst, index, 4))\
00320 MOVNTQ( q2, 16(dst, index, 4))\
00321 MOVNTQ( q3, 24(dst, index, 4))\
00322 \
00323 "add $8, "#index" \n\t"\
00324 "cmp "#dstw", "#index" \n\t"\
00325 " jb 1b \n\t"
00326 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
00327
00328 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
00329 const int16_t **lumSrc, int lumFilterSize,
00330 const int16_t *chrFilter, const int16_t **chrUSrc,
00331 const int16_t **chrVSrc,
00332 int chrFilterSize, const int16_t **alpSrc,
00333 uint8_t *dest, int dstW, int dstY)
00334 {
00335 x86_reg dummy=0;
00336 x86_reg dstW_reg = dstW;
00337 x86_reg uv_off = c->uv_offx2;
00338
00339 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00340 YSCALEYUV2PACKEDX_ACCURATE
00341 YSCALEYUV2RGBX
00342 "movq %%mm2, "U_TEMP"(%0) \n\t"
00343 "movq %%mm4, "V_TEMP"(%0) \n\t"
00344 "movq %%mm5, "Y_TEMP"(%0) \n\t"
00345 YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
00346 "movq "Y_TEMP"(%0), %%mm5 \n\t"
00347 "psraw $3, %%mm1 \n\t"
00348 "psraw $3, %%mm7 \n\t"
00349 "packuswb %%mm7, %%mm1 \n\t"
00350 WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
00351 YSCALEYUV2PACKEDX_END
00352 } else {
00353 YSCALEYUV2PACKEDX_ACCURATE
00354 YSCALEYUV2RGBX
00355 "pcmpeqd %%mm7, %%mm7 \n\t"
00356 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00357 YSCALEYUV2PACKEDX_END
00358 }
00359 }
00360
00361 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
00362 const int16_t **lumSrc, int lumFilterSize,
00363 const int16_t *chrFilter, const int16_t **chrUSrc,
00364 const int16_t **chrVSrc,
00365 int chrFilterSize, const int16_t **alpSrc,
00366 uint8_t *dest, int dstW, int dstY)
00367 {
00368 x86_reg dummy=0;
00369 x86_reg dstW_reg = dstW;
00370 x86_reg uv_off = c->uv_offx2;
00371
00372 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00373 YSCALEYUV2PACKEDX
00374 YSCALEYUV2RGBX
00375 YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
00376 "psraw $3, %%mm1 \n\t"
00377 "psraw $3, %%mm7 \n\t"
00378 "packuswb %%mm7, %%mm1 \n\t"
00379 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00380 YSCALEYUV2PACKEDX_END
00381 } else {
00382 YSCALEYUV2PACKEDX
00383 YSCALEYUV2RGBX
00384 "pcmpeqd %%mm7, %%mm7 \n\t"
00385 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00386 YSCALEYUV2PACKEDX_END
00387 }
00388 }
00389
00390 #define REAL_WRITERGB16(dst, dstw, index) \
00391 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00392 "pand "MANGLE(bFC)", %%mm4 \n\t" \
00393 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00394 "psrlq $3, %%mm2 \n\t"\
00395 \
00396 "movq %%mm2, %%mm1 \n\t"\
00397 "movq %%mm4, %%mm3 \n\t"\
00398 \
00399 "punpcklbw %%mm7, %%mm3 \n\t"\
00400 "punpcklbw %%mm5, %%mm2 \n\t"\
00401 "punpckhbw %%mm7, %%mm4 \n\t"\
00402 "punpckhbw %%mm5, %%mm1 \n\t"\
00403 \
00404 "psllq $3, %%mm3 \n\t"\
00405 "psllq $3, %%mm4 \n\t"\
00406 \
00407 "por %%mm3, %%mm2 \n\t"\
00408 "por %%mm4, %%mm1 \n\t"\
00409 \
00410 MOVNTQ(%%mm2, (dst, index, 2))\
00411 MOVNTQ(%%mm1, 8(dst, index, 2))\
00412 \
00413 "add $8, "#index" \n\t"\
00414 "cmp "#dstw", "#index" \n\t"\
00415 " jb 1b \n\t"
00416 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
00417
00418 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
00419 const int16_t **lumSrc, int lumFilterSize,
00420 const int16_t *chrFilter, const int16_t **chrUSrc,
00421 const int16_t **chrVSrc,
00422 int chrFilterSize, const int16_t **alpSrc,
00423 uint8_t *dest, int dstW, int dstY)
00424 {
00425 x86_reg dummy=0;
00426 x86_reg dstW_reg = dstW;
00427 x86_reg uv_off = c->uv_offx2;
00428
00429 YSCALEYUV2PACKEDX_ACCURATE
00430 YSCALEYUV2RGBX
00431 "pxor %%mm7, %%mm7 \n\t"
00432
00433 #ifdef DITHER1XBPP
00434 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00435 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00436 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00437 #endif
00438 WRITERGB16(%4, %5, %%REGa)
00439 YSCALEYUV2PACKEDX_END
00440 }
00441
00442 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
00443 const int16_t **lumSrc, int lumFilterSize,
00444 const int16_t *chrFilter, const int16_t **chrUSrc,
00445 const int16_t **chrVSrc,
00446 int chrFilterSize, const int16_t **alpSrc,
00447 uint8_t *dest, int dstW, int dstY)
00448 {
00449 x86_reg dummy=0;
00450 x86_reg dstW_reg = dstW;
00451 x86_reg uv_off = c->uv_offx2;
00452
00453 YSCALEYUV2PACKEDX
00454 YSCALEYUV2RGBX
00455 "pxor %%mm7, %%mm7 \n\t"
00456
00457 #ifdef DITHER1XBPP
00458 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00459 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00460 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00461 #endif
00462 WRITERGB16(%4, %5, %%REGa)
00463 YSCALEYUV2PACKEDX_END
00464 }
00465
00466 #define REAL_WRITERGB15(dst, dstw, index) \
00467 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00468 "pand "MANGLE(bF8)", %%mm4 \n\t" \
00469 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00470 "psrlq $3, %%mm2 \n\t"\
00471 "psrlq $1, %%mm5 \n\t"\
00472 \
00473 "movq %%mm2, %%mm1 \n\t"\
00474 "movq %%mm4, %%mm3 \n\t"\
00475 \
00476 "punpcklbw %%mm7, %%mm3 \n\t"\
00477 "punpcklbw %%mm5, %%mm2 \n\t"\
00478 "punpckhbw %%mm7, %%mm4 \n\t"\
00479 "punpckhbw %%mm5, %%mm1 \n\t"\
00480 \
00481 "psllq $2, %%mm3 \n\t"\
00482 "psllq $2, %%mm4 \n\t"\
00483 \
00484 "por %%mm3, %%mm2 \n\t"\
00485 "por %%mm4, %%mm1 \n\t"\
00486 \
00487 MOVNTQ(%%mm2, (dst, index, 2))\
00488 MOVNTQ(%%mm1, 8(dst, index, 2))\
00489 \
00490 "add $8, "#index" \n\t"\
00491 "cmp "#dstw", "#index" \n\t"\
00492 " jb 1b \n\t"
00493 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
00494
00495 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
00496 const int16_t **lumSrc, int lumFilterSize,
00497 const int16_t *chrFilter, const int16_t **chrUSrc,
00498 const int16_t **chrVSrc,
00499 int chrFilterSize, const int16_t **alpSrc,
00500 uint8_t *dest, int dstW, int dstY)
00501 {
00502 x86_reg dummy=0;
00503 x86_reg dstW_reg = dstW;
00504 x86_reg uv_off = c->uv_offx2;
00505
00506 YSCALEYUV2PACKEDX_ACCURATE
00507 YSCALEYUV2RGBX
00508 "pxor %%mm7, %%mm7 \n\t"
00509
00510 #ifdef DITHER1XBPP
00511 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00512 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00513 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00514 #endif
00515 WRITERGB15(%4, %5, %%REGa)
00516 YSCALEYUV2PACKEDX_END
00517 }
00518
00519 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
00520 const int16_t **lumSrc, int lumFilterSize,
00521 const int16_t *chrFilter, const int16_t **chrUSrc,
00522 const int16_t **chrVSrc,
00523 int chrFilterSize, const int16_t **alpSrc,
00524 uint8_t *dest, int dstW, int dstY)
00525 {
00526 x86_reg dummy=0;
00527 x86_reg dstW_reg = dstW;
00528 x86_reg uv_off = c->uv_offx2;
00529
00530 YSCALEYUV2PACKEDX
00531 YSCALEYUV2RGBX
00532 "pxor %%mm7, %%mm7 \n\t"
00533
00534 #ifdef DITHER1XBPP
00535 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00536 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00537 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00538 #endif
00539 WRITERGB15(%4, %5, %%REGa)
00540 YSCALEYUV2PACKEDX_END
00541 }
00542
00543 #define WRITEBGR24MMX(dst, dstw, index) \
00544 \
00545 "movq %%mm2, %%mm1 \n\t" \
00546 "movq %%mm5, %%mm6 \n\t" \
00547 "punpcklbw %%mm4, %%mm2 \n\t" \
00548 "punpcklbw %%mm7, %%mm5 \n\t" \
00549 "punpckhbw %%mm4, %%mm1 \n\t" \
00550 "punpckhbw %%mm7, %%mm6 \n\t" \
00551 "movq %%mm2, %%mm0 \n\t" \
00552 "movq %%mm1, %%mm3 \n\t" \
00553 "punpcklwd %%mm5, %%mm0 \n\t" \
00554 "punpckhwd %%mm5, %%mm2 \n\t" \
00555 "punpcklwd %%mm6, %%mm1 \n\t" \
00556 "punpckhwd %%mm6, %%mm3 \n\t" \
00557 \
00558 "movq %%mm0, %%mm4 \n\t" \
00559 "movq %%mm2, %%mm6 \n\t" \
00560 "movq %%mm1, %%mm5 \n\t" \
00561 "movq %%mm3, %%mm7 \n\t" \
00562 \
00563 "psllq $40, %%mm0 \n\t" \
00564 "psllq $40, %%mm2 \n\t" \
00565 "psllq $40, %%mm1 \n\t" \
00566 "psllq $40, %%mm3 \n\t" \
00567 \
00568 "punpckhdq %%mm4, %%mm0 \n\t" \
00569 "punpckhdq %%mm6, %%mm2 \n\t" \
00570 "punpckhdq %%mm5, %%mm1 \n\t" \
00571 "punpckhdq %%mm7, %%mm3 \n\t" \
00572 \
00573 "psrlq $8, %%mm0 \n\t" \
00574 "movq %%mm2, %%mm6 \n\t" \
00575 "psllq $40, %%mm2 \n\t" \
00576 "por %%mm2, %%mm0 \n\t" \
00577 MOVNTQ(%%mm0, (dst))\
00578 \
00579 "psrlq $24, %%mm6 \n\t" \
00580 "movq %%mm1, %%mm5 \n\t" \
00581 "psllq $24, %%mm1 \n\t" \
00582 "por %%mm1, %%mm6 \n\t" \
00583 MOVNTQ(%%mm6, 8(dst))\
00584 \
00585 "psrlq $40, %%mm5 \n\t" \
00586 "psllq $8, %%mm3 \n\t" \
00587 "por %%mm3, %%mm5 \n\t" \
00588 MOVNTQ(%%mm5, 16(dst))\
00589 \
00590 "add $24, "#dst" \n\t"\
00591 \
00592 "add $8, "#index" \n\t"\
00593 "cmp "#dstw", "#index" \n\t"\
00594 " jb 1b \n\t"
00595
00596 #define WRITEBGR24MMX2(dst, dstw, index) \
00597 \
00598 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
00599 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
00600 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
00601 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
00602 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
00603 \
00604 "pand %%mm0, %%mm1 \n\t" \
00605 "pand %%mm0, %%mm3 \n\t" \
00606 "pand %%mm7, %%mm6 \n\t" \
00607 \
00608 "psllq $8, %%mm3 \n\t" \
00609 "por %%mm1, %%mm6 \n\t"\
00610 "por %%mm3, %%mm6 \n\t"\
00611 MOVNTQ(%%mm6, (dst))\
00612 \
00613 "psrlq $8, %%mm4 \n\t" \
00614 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
00615 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
00616 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
00617 \
00618 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" \
00619 "pand %%mm7, %%mm3 \n\t" \
00620 "pand %%mm0, %%mm6 \n\t" \
00621 \
00622 "por %%mm1, %%mm3 \n\t" \
00623 "por %%mm3, %%mm6 \n\t"\
00624 MOVNTQ(%%mm6, 8(dst))\
00625 \
00626 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
00627 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
00628 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
00629 \
00630 "pand %%mm7, %%mm1 \n\t" \
00631 "pand %%mm0, %%mm3 \n\t" \
00632 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" \
00633 \
00634 "por %%mm1, %%mm3 \n\t"\
00635 "por %%mm3, %%mm6 \n\t"\
00636 MOVNTQ(%%mm6, 16(dst))\
00637 \
00638 "add $24, "#dst" \n\t"\
00639 \
00640 "add $8, "#index" \n\t"\
00641 "cmp "#dstw", "#index" \n\t"\
00642 " jb 1b \n\t"
00643
00644 #if COMPILE_TEMPLATE_MMX2
00645 #undef WRITEBGR24
00646 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
00647 #else
00648 #undef WRITEBGR24
00649 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
00650 #endif
00651
00652 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
00653 const int16_t **lumSrc, int lumFilterSize,
00654 const int16_t *chrFilter, const int16_t **chrUSrc,
00655 const int16_t **chrVSrc,
00656 int chrFilterSize, const int16_t **alpSrc,
00657 uint8_t *dest, int dstW, int dstY)
00658 {
00659 x86_reg dummy=0;
00660 x86_reg dstW_reg = dstW;
00661 x86_reg uv_off = c->uv_offx2;
00662
00663 YSCALEYUV2PACKEDX_ACCURATE
00664 YSCALEYUV2RGBX
00665 "pxor %%mm7, %%mm7 \n\t"
00666 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t"
00667 "add %4, %%"REG_c" \n\t"
00668 WRITEBGR24(%%REGc, %5, %%REGa)
00669 :: "r" (&c->redDither),
00670 "m" (dummy), "m" (dummy), "m" (dummy),
00671 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00672 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00673 );
00674 }
00675
00676 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
00677 const int16_t **lumSrc, int lumFilterSize,
00678 const int16_t *chrFilter, const int16_t **chrUSrc,
00679 const int16_t **chrVSrc,
00680 int chrFilterSize, const int16_t **alpSrc,
00681 uint8_t *dest, int dstW, int dstY)
00682 {
00683 x86_reg dummy=0;
00684 x86_reg dstW_reg = dstW;
00685 x86_reg uv_off = c->uv_offx2;
00686
00687 YSCALEYUV2PACKEDX
00688 YSCALEYUV2RGBX
00689 "pxor %%mm7, %%mm7 \n\t"
00690 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t"
00691 "add %4, %%"REG_c" \n\t"
00692 WRITEBGR24(%%REGc, %5, %%REGa)
00693 :: "r" (&c->redDither),
00694 "m" (dummy), "m" (dummy), "m" (dummy),
00695 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00696 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00697 );
00698 }
00699
00700 #define REAL_WRITEYUY2(dst, dstw, index) \
00701 "packuswb %%mm3, %%mm3 \n\t"\
00702 "packuswb %%mm4, %%mm4 \n\t"\
00703 "packuswb %%mm7, %%mm1 \n\t"\
00704 "punpcklbw %%mm4, %%mm3 \n\t"\
00705 "movq %%mm1, %%mm7 \n\t"\
00706 "punpcklbw %%mm3, %%mm1 \n\t"\
00707 "punpckhbw %%mm3, %%mm7 \n\t"\
00708 \
00709 MOVNTQ(%%mm1, (dst, index, 2))\
00710 MOVNTQ(%%mm7, 8(dst, index, 2))\
00711 \
00712 "add $8, "#index" \n\t"\
00713 "cmp "#dstw", "#index" \n\t"\
00714 " jb 1b \n\t"
00715 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
00716
00717 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
00718 const int16_t **lumSrc, int lumFilterSize,
00719 const int16_t *chrFilter, const int16_t **chrUSrc,
00720 const int16_t **chrVSrc,
00721 int chrFilterSize, const int16_t **alpSrc,
00722 uint8_t *dest, int dstW, int dstY)
00723 {
00724 x86_reg dummy=0;
00725 x86_reg dstW_reg = dstW;
00726 x86_reg uv_off = c->uv_offx2;
00727
00728 YSCALEYUV2PACKEDX_ACCURATE
00729
00730 "psraw $3, %%mm3 \n\t"
00731 "psraw $3, %%mm4 \n\t"
00732 "psraw $3, %%mm1 \n\t"
00733 "psraw $3, %%mm7 \n\t"
00734 WRITEYUY2(%4, %5, %%REGa)
00735 YSCALEYUV2PACKEDX_END
00736 }
00737
00738 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
00739 const int16_t **lumSrc, int lumFilterSize,
00740 const int16_t *chrFilter, const int16_t **chrUSrc,
00741 const int16_t **chrVSrc,
00742 int chrFilterSize, const int16_t **alpSrc,
00743 uint8_t *dest, int dstW, int dstY)
00744 {
00745 x86_reg dummy=0;
00746 x86_reg dstW_reg = dstW;
00747 x86_reg uv_off = c->uv_offx2;
00748
00749 YSCALEYUV2PACKEDX
00750
00751 "psraw $3, %%mm3 \n\t"
00752 "psraw $3, %%mm4 \n\t"
00753 "psraw $3, %%mm1 \n\t"
00754 "psraw $3, %%mm7 \n\t"
00755 WRITEYUY2(%4, %5, %%REGa)
00756 YSCALEYUV2PACKEDX_END
00757 }
00758
00759 #define REAL_YSCALEYUV2RGB_UV(index, c) \
00760 "xor "#index", "#index" \n\t"\
00761 ".p2align 4 \n\t"\
00762 "1: \n\t"\
00763 "movq (%2, "#index"), %%mm2 \n\t" \
00764 "movq (%3, "#index"), %%mm3 \n\t" \
00765 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00766 "movq (%2, "#index"), %%mm5 \n\t" \
00767 "movq (%3, "#index"), %%mm4 \n\t" \
00768 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00769 "psubw %%mm3, %%mm2 \n\t" \
00770 "psubw %%mm4, %%mm5 \n\t" \
00771 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00772 "pmulhw %%mm0, %%mm2 \n\t" \
00773 "pmulhw %%mm0, %%mm5 \n\t" \
00774 "psraw $4, %%mm3 \n\t" \
00775 "psraw $4, %%mm4 \n\t" \
00776 "paddw %%mm2, %%mm3 \n\t" \
00777 "paddw %%mm5, %%mm4 \n\t" \
00778 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00779 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00780 "movq %%mm3, %%mm2 \n\t" \
00781 "movq %%mm4, %%mm5 \n\t" \
00782 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00783 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00784 \
00785
00786 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
00787 "movq ("#b1", "#index", 2), %%mm0 \n\t" \
00788 "movq ("#b2", "#index", 2), %%mm1 \n\t" \
00789 "movq 8("#b1", "#index", 2), %%mm6 \n\t" \
00790 "movq 8("#b2", "#index", 2), %%mm7 \n\t" \
00791 "psubw %%mm1, %%mm0 \n\t" \
00792 "psubw %%mm7, %%mm6 \n\t" \
00793 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00794 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00795 "psraw $4, %%mm1 \n\t" \
00796 "psraw $4, %%mm7 \n\t" \
00797 "paddw %%mm0, %%mm1 \n\t" \
00798 "paddw %%mm6, %%mm7 \n\t" \
00799
00800 #define REAL_YSCALEYUV2RGB_COEFF(c) \
00801 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00802 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00803 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00804 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00805 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00806 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00807 \
00808 "paddw %%mm3, %%mm4 \n\t"\
00809 "movq %%mm2, %%mm0 \n\t"\
00810 "movq %%mm5, %%mm6 \n\t"\
00811 "movq %%mm4, %%mm3 \n\t"\
00812 "punpcklwd %%mm2, %%mm2 \n\t"\
00813 "punpcklwd %%mm5, %%mm5 \n\t"\
00814 "punpcklwd %%mm4, %%mm4 \n\t"\
00815 "paddw %%mm1, %%mm2 \n\t"\
00816 "paddw %%mm1, %%mm5 \n\t"\
00817 "paddw %%mm1, %%mm4 \n\t"\
00818 "punpckhwd %%mm0, %%mm0 \n\t"\
00819 "punpckhwd %%mm6, %%mm6 \n\t"\
00820 "punpckhwd %%mm3, %%mm3 \n\t"\
00821 "paddw %%mm7, %%mm0 \n\t"\
00822 "paddw %%mm7, %%mm6 \n\t"\
00823 "paddw %%mm7, %%mm3 \n\t"\
00824 \
00825 "packuswb %%mm0, %%mm2 \n\t"\
00826 "packuswb %%mm6, %%mm5 \n\t"\
00827 "packuswb %%mm3, %%mm4 \n\t"\
00828
00829 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
00830
00831 #define YSCALEYUV2RGB(index, c) \
00832 REAL_YSCALEYUV2RGB_UV(index, c) \
00833 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
00834 REAL_YSCALEYUV2RGB_COEFF(c)
00835
00839 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
00840 const int16_t *ubuf[2], const int16_t *vbuf[2],
00841 const int16_t *abuf[2], uint8_t *dest,
00842 int dstW, int yalpha, int uvalpha, int y)
00843 {
00844 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00845 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00846
00847 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00848 const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
00849 #if ARCH_X86_64
00850 __asm__ volatile(
00851 YSCALEYUV2RGB(%%r8, %5)
00852 YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
00853 "psraw $3, %%mm1 \n\t"
00854 "psraw $3, %%mm7 \n\t"
00855 "packuswb %%mm7, %%mm1 \n\t"
00856 WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00857 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
00858 "a" (&c->redDither),
00859 "r" (abuf0), "r" (abuf1)
00860 : "%r8"
00861 );
00862 #else
00863 c->u_temp=(intptr_t)abuf0;
00864 c->v_temp=(intptr_t)abuf1;
00865 __asm__ volatile(
00866 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00867 "mov %4, %%"REG_b" \n\t"
00868 "push %%"REG_BP" \n\t"
00869 YSCALEYUV2RGB(%%REGBP, %5)
00870 "push %0 \n\t"
00871 "push %1 \n\t"
00872 "mov "U_TEMP"(%5), %0 \n\t"
00873 "mov "V_TEMP"(%5), %1 \n\t"
00874 YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
00875 "psraw $3, %%mm1 \n\t"
00876 "psraw $3, %%mm7 \n\t"
00877 "packuswb %%mm7, %%mm1 \n\t"
00878 "pop %1 \n\t"
00879 "pop %0 \n\t"
00880 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00881 "pop %%"REG_BP" \n\t"
00882 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00883 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00884 "a" (&c->redDither)
00885 );
00886 #endif
00887 } else {
00888 __asm__ volatile(
00889 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00890 "mov %4, %%"REG_b" \n\t"
00891 "push %%"REG_BP" \n\t"
00892 YSCALEYUV2RGB(%%REGBP, %5)
00893 "pcmpeqd %%mm7, %%mm7 \n\t"
00894 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00895 "pop %%"REG_BP" \n\t"
00896 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00897 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00898 "a" (&c->redDither)
00899 );
00900 }
00901 }
00902
00903 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
00904 const int16_t *ubuf[2], const int16_t *vbuf[2],
00905 const int16_t *abuf[2], uint8_t *dest,
00906 int dstW, int yalpha, int uvalpha, int y)
00907 {
00908 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00909 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00910
00911
00912 __asm__ volatile(
00913 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00914 "mov %4, %%"REG_b" \n\t"
00915 "push %%"REG_BP" \n\t"
00916 YSCALEYUV2RGB(%%REGBP, %5)
00917 "pxor %%mm7, %%mm7 \n\t"
00918 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
00919 "pop %%"REG_BP" \n\t"
00920 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00921 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00922 "a" (&c->redDither)
00923 );
00924 }
00925
00926 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
00927 const int16_t *ubuf[2], const int16_t *vbuf[2],
00928 const int16_t *abuf[2], uint8_t *dest,
00929 int dstW, int yalpha, int uvalpha, int y)
00930 {
00931 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00932 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00933
00934
00935 __asm__ volatile(
00936 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00937 "mov %4, %%"REG_b" \n\t"
00938 "push %%"REG_BP" \n\t"
00939 YSCALEYUV2RGB(%%REGBP, %5)
00940 "pxor %%mm7, %%mm7 \n\t"
00941
00942 #ifdef DITHER1XBPP
00943 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
00944 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
00945 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
00946 #endif
00947 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
00948 "pop %%"REG_BP" \n\t"
00949 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00950 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00951 "a" (&c->redDither)
00952 );
00953 }
00954
00955 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
00956 const int16_t *ubuf[2], const int16_t *vbuf[2],
00957 const int16_t *abuf[2], uint8_t *dest,
00958 int dstW, int yalpha, int uvalpha, int y)
00959 {
00960 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00961 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00962
00963
00964 __asm__ volatile(
00965 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00966 "mov %4, %%"REG_b" \n\t"
00967 "push %%"REG_BP" \n\t"
00968 YSCALEYUV2RGB(%%REGBP, %5)
00969 "pxor %%mm7, %%mm7 \n\t"
00970
00971 #ifdef DITHER1XBPP
00972 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
00973 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
00974 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
00975 #endif
00976 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
00977 "pop %%"REG_BP" \n\t"
00978 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00979 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00980 "a" (&c->redDither)
00981 );
00982 }
00983
00984 #define REAL_YSCALEYUV2PACKED(index, c) \
00985 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00986 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
00987 "psraw $3, %%mm0 \n\t"\
00988 "psraw $3, %%mm1 \n\t"\
00989 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00990 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00991 "xor "#index", "#index" \n\t"\
00992 ".p2align 4 \n\t"\
00993 "1: \n\t"\
00994 "movq (%2, "#index"), %%mm2 \n\t" \
00995 "movq (%3, "#index"), %%mm3 \n\t" \
00996 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00997 "movq (%2, "#index"), %%mm5 \n\t" \
00998 "movq (%3, "#index"), %%mm4 \n\t" \
00999 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01000 "psubw %%mm3, %%mm2 \n\t" \
01001 "psubw %%mm4, %%mm5 \n\t" \
01002 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
01003 "pmulhw %%mm0, %%mm2 \n\t" \
01004 "pmulhw %%mm0, %%mm5 \n\t" \
01005 "psraw $7, %%mm3 \n\t" \
01006 "psraw $7, %%mm4 \n\t" \
01007 "paddw %%mm2, %%mm3 \n\t" \
01008 "paddw %%mm5, %%mm4 \n\t" \
01009 "movq (%0, "#index", 2), %%mm0 \n\t" \
01010 "movq (%1, "#index", 2), %%mm1 \n\t" \
01011 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
01012 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
01013 "psubw %%mm1, %%mm0 \n\t" \
01014 "psubw %%mm7, %%mm6 \n\t" \
01015 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
01016 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
01017 "psraw $7, %%mm1 \n\t" \
01018 "psraw $7, %%mm7 \n\t" \
01019 "paddw %%mm0, %%mm1 \n\t" \
01020 "paddw %%mm6, %%mm7 \n\t" \
01021
01022 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
01023
01024 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
01025 const int16_t *ubuf[2], const int16_t *vbuf[2],
01026 const int16_t *abuf[2], uint8_t *dest,
01027 int dstW, int yalpha, int uvalpha, int y)
01028 {
01029 const int16_t *buf0 = buf[0], *buf1 = buf[1],
01030 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01031
01032
01033 __asm__ volatile(
01034 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01035 "mov %4, %%"REG_b" \n\t"
01036 "push %%"REG_BP" \n\t"
01037 YSCALEYUV2PACKED(%%REGBP, %5)
01038 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01039 "pop %%"REG_BP" \n\t"
01040 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01041 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01042 "a" (&c->redDither)
01043 );
01044 }
01045
01046 #define REAL_YSCALEYUV2RGB1(index, c) \
01047 "xor "#index", "#index" \n\t"\
01048 ".p2align 4 \n\t"\
01049 "1: \n\t"\
01050 "movq (%2, "#index"), %%mm3 \n\t" \
01051 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01052 "movq (%2, "#index"), %%mm4 \n\t" \
01053 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01054 "psraw $4, %%mm3 \n\t" \
01055 "psraw $4, %%mm4 \n\t" \
01056 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
01057 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
01058 "movq %%mm3, %%mm2 \n\t" \
01059 "movq %%mm4, %%mm5 \n\t" \
01060 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
01061 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
01062 \
01063 "movq (%0, "#index", 2), %%mm1 \n\t" \
01064 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01065 "psraw $4, %%mm1 \n\t" \
01066 "psraw $4, %%mm7 \n\t" \
01067 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
01068 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
01069 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
01070 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
01071 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
01072 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
01073 \
01074 "paddw %%mm3, %%mm4 \n\t"\
01075 "movq %%mm2, %%mm0 \n\t"\
01076 "movq %%mm5, %%mm6 \n\t"\
01077 "movq %%mm4, %%mm3 \n\t"\
01078 "punpcklwd %%mm2, %%mm2 \n\t"\
01079 "punpcklwd %%mm5, %%mm5 \n\t"\
01080 "punpcklwd %%mm4, %%mm4 \n\t"\
01081 "paddw %%mm1, %%mm2 \n\t"\
01082 "paddw %%mm1, %%mm5 \n\t"\
01083 "paddw %%mm1, %%mm4 \n\t"\
01084 "punpckhwd %%mm0, %%mm0 \n\t"\
01085 "punpckhwd %%mm6, %%mm6 \n\t"\
01086 "punpckhwd %%mm3, %%mm3 \n\t"\
01087 "paddw %%mm7, %%mm0 \n\t"\
01088 "paddw %%mm7, %%mm6 \n\t"\
01089 "paddw %%mm7, %%mm3 \n\t"\
01090 \
01091 "packuswb %%mm0, %%mm2 \n\t"\
01092 "packuswb %%mm6, %%mm5 \n\t"\
01093 "packuswb %%mm3, %%mm4 \n\t"\
01094
01095 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
01096
01097
01098 #define REAL_YSCALEYUV2RGB1b(index, c) \
01099 "xor "#index", "#index" \n\t"\
01100 ".p2align 4 \n\t"\
01101 "1: \n\t"\
01102 "movq (%2, "#index"), %%mm2 \n\t" \
01103 "movq (%3, "#index"), %%mm3 \n\t" \
01104 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01105 "movq (%2, "#index"), %%mm5 \n\t" \
01106 "movq (%3, "#index"), %%mm4 \n\t" \
01107 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01108 "paddw %%mm2, %%mm3 \n\t" \
01109 "paddw %%mm5, %%mm4 \n\t" \
01110 "psrlw $5, %%mm3 \n\t" \
01111 "psrlw $5, %%mm4 \n\t" \
01112 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
01113 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
01114 "movq %%mm3, %%mm2 \n\t" \
01115 "movq %%mm4, %%mm5 \n\t" \
01116 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
01117 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
01118 \
01119 "movq (%0, "#index", 2), %%mm1 \n\t" \
01120 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01121 "psraw $4, %%mm1 \n\t" \
01122 "psraw $4, %%mm7 \n\t" \
01123 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
01124 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
01125 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
01126 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
01127 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
01128 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
01129 \
01130 "paddw %%mm3, %%mm4 \n\t"\
01131 "movq %%mm2, %%mm0 \n\t"\
01132 "movq %%mm5, %%mm6 \n\t"\
01133 "movq %%mm4, %%mm3 \n\t"\
01134 "punpcklwd %%mm2, %%mm2 \n\t"\
01135 "punpcklwd %%mm5, %%mm5 \n\t"\
01136 "punpcklwd %%mm4, %%mm4 \n\t"\
01137 "paddw %%mm1, %%mm2 \n\t"\
01138 "paddw %%mm1, %%mm5 \n\t"\
01139 "paddw %%mm1, %%mm4 \n\t"\
01140 "punpckhwd %%mm0, %%mm0 \n\t"\
01141 "punpckhwd %%mm6, %%mm6 \n\t"\
01142 "punpckhwd %%mm3, %%mm3 \n\t"\
01143 "paddw %%mm7, %%mm0 \n\t"\
01144 "paddw %%mm7, %%mm6 \n\t"\
01145 "paddw %%mm7, %%mm3 \n\t"\
01146 \
01147 "packuswb %%mm0, %%mm2 \n\t"\
01148 "packuswb %%mm6, %%mm5 \n\t"\
01149 "packuswb %%mm3, %%mm4 \n\t"\
01150
01151 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
01152
01153 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
01154 "movq (%1, "#index", 2), %%mm7 \n\t" \
01155 "movq 8(%1, "#index", 2), %%mm1 \n\t" \
01156 "psraw $7, %%mm7 \n\t" \
01157 "psraw $7, %%mm1 \n\t" \
01158 "packuswb %%mm1, %%mm7 \n\t"
01159 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
01160
01164 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
01165 const int16_t *ubuf[2], const int16_t *bguf[2],
01166 const int16_t *abuf0, uint8_t *dest,
01167 int dstW, int uvalpha, int y)
01168 {
01169 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01170 const int16_t *buf1= buf0;
01171
01172 if (uvalpha < 2048) {
01173 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01174 __asm__ volatile(
01175 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01176 "mov %4, %%"REG_b" \n\t"
01177 "push %%"REG_BP" \n\t"
01178 YSCALEYUV2RGB1(%%REGBP, %5)
01179 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01180 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01181 "pop %%"REG_BP" \n\t"
01182 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01183 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01184 "a" (&c->redDither)
01185 );
01186 } else {
01187 __asm__ volatile(
01188 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01189 "mov %4, %%"REG_b" \n\t"
01190 "push %%"REG_BP" \n\t"
01191 YSCALEYUV2RGB1(%%REGBP, %5)
01192 "pcmpeqd %%mm7, %%mm7 \n\t"
01193 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01194 "pop %%"REG_BP" \n\t"
01195 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01196 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01197 "a" (&c->redDither)
01198 );
01199 }
01200 } else {
01201 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01202 __asm__ volatile(
01203 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01204 "mov %4, %%"REG_b" \n\t"
01205 "push %%"REG_BP" \n\t"
01206 YSCALEYUV2RGB1b(%%REGBP, %5)
01207 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01208 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01209 "pop %%"REG_BP" \n\t"
01210 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01211 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01212 "a" (&c->redDither)
01213 );
01214 } else {
01215 __asm__ volatile(
01216 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01217 "mov %4, %%"REG_b" \n\t"
01218 "push %%"REG_BP" \n\t"
01219 YSCALEYUV2RGB1b(%%REGBP, %5)
01220 "pcmpeqd %%mm7, %%mm7 \n\t"
01221 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01222 "pop %%"REG_BP" \n\t"
01223 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01224 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01225 "a" (&c->redDither)
01226 );
01227 }
01228 }
01229 }
01230
01231 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
01232 const int16_t *ubuf[2], const int16_t *bguf[2],
01233 const int16_t *abuf0, uint8_t *dest,
01234 int dstW, int uvalpha, int y)
01235 {
01236 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01237 const int16_t *buf1= buf0;
01238
01239 if (uvalpha < 2048) {
01240 __asm__ volatile(
01241 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01242 "mov %4, %%"REG_b" \n\t"
01243 "push %%"REG_BP" \n\t"
01244 YSCALEYUV2RGB1(%%REGBP, %5)
01245 "pxor %%mm7, %%mm7 \n\t"
01246 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01247 "pop %%"REG_BP" \n\t"
01248 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01249 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01250 "a" (&c->redDither)
01251 );
01252 } else {
01253 __asm__ volatile(
01254 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01255 "mov %4, %%"REG_b" \n\t"
01256 "push %%"REG_BP" \n\t"
01257 YSCALEYUV2RGB1b(%%REGBP, %5)
01258 "pxor %%mm7, %%mm7 \n\t"
01259 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01260 "pop %%"REG_BP" \n\t"
01261 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01262 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01263 "a" (&c->redDither)
01264 );
01265 }
01266 }
01267
01268 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
01269 const int16_t *ubuf[2], const int16_t *bguf[2],
01270 const int16_t *abuf0, uint8_t *dest,
01271 int dstW, int uvalpha, int y)
01272 {
01273 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01274 const int16_t *buf1= buf0;
01275
01276 if (uvalpha < 2048) {
01277 __asm__ volatile(
01278 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01279 "mov %4, %%"REG_b" \n\t"
01280 "push %%"REG_BP" \n\t"
01281 YSCALEYUV2RGB1(%%REGBP, %5)
01282 "pxor %%mm7, %%mm7 \n\t"
01283
01284 #ifdef DITHER1XBPP
01285 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01286 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01287 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01288 #endif
01289 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01290 "pop %%"REG_BP" \n\t"
01291 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01292 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01293 "a" (&c->redDither)
01294 );
01295 } else {
01296 __asm__ volatile(
01297 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01298 "mov %4, %%"REG_b" \n\t"
01299 "push %%"REG_BP" \n\t"
01300 YSCALEYUV2RGB1b(%%REGBP, %5)
01301 "pxor %%mm7, %%mm7 \n\t"
01302
01303 #ifdef DITHER1XBPP
01304 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01305 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01306 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01307 #endif
01308 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01309 "pop %%"REG_BP" \n\t"
01310 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01311 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01312 "a" (&c->redDither)
01313 );
01314 }
01315 }
01316
01317 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
01318 const int16_t *ubuf[2], const int16_t *bguf[2],
01319 const int16_t *abuf0, uint8_t *dest,
01320 int dstW, int uvalpha, int y)
01321 {
01322 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01323 const int16_t *buf1= buf0;
01324
01325 if (uvalpha < 2048) {
01326 __asm__ volatile(
01327 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01328 "mov %4, %%"REG_b" \n\t"
01329 "push %%"REG_BP" \n\t"
01330 YSCALEYUV2RGB1(%%REGBP, %5)
01331 "pxor %%mm7, %%mm7 \n\t"
01332
01333 #ifdef DITHER1XBPP
01334 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01335 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01336 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01337 #endif
01338 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01339 "pop %%"REG_BP" \n\t"
01340 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01341 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01342 "a" (&c->redDither)
01343 );
01344 } else {
01345 __asm__ volatile(
01346 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01347 "mov %4, %%"REG_b" \n\t"
01348 "push %%"REG_BP" \n\t"
01349 YSCALEYUV2RGB1b(%%REGBP, %5)
01350 "pxor %%mm7, %%mm7 \n\t"
01351
01352 #ifdef DITHER1XBPP
01353 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01354 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01355 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01356 #endif
01357 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01358 "pop %%"REG_BP" \n\t"
01359 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01360 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01361 "a" (&c->redDither)
01362 );
01363 }
01364 }
01365
01366 #define REAL_YSCALEYUV2PACKED1(index, c) \
01367 "xor "#index", "#index" \n\t"\
01368 ".p2align 4 \n\t"\
01369 "1: \n\t"\
01370 "movq (%2, "#index"), %%mm3 \n\t" \
01371 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01372 "movq (%2, "#index"), %%mm4 \n\t" \
01373 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01374 "psraw $7, %%mm3 \n\t" \
01375 "psraw $7, %%mm4 \n\t" \
01376 "movq (%0, "#index", 2), %%mm1 \n\t" \
01377 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01378 "psraw $7, %%mm1 \n\t" \
01379 "psraw $7, %%mm7 \n\t" \
01380
01381 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
01382
01383 #define REAL_YSCALEYUV2PACKED1b(index, c) \
01384 "xor "#index", "#index" \n\t"\
01385 ".p2align 4 \n\t"\
01386 "1: \n\t"\
01387 "movq (%2, "#index"), %%mm2 \n\t" \
01388 "movq (%3, "#index"), %%mm3 \n\t" \
01389 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01390 "movq (%2, "#index"), %%mm5 \n\t" \
01391 "movq (%3, "#index"), %%mm4 \n\t" \
01392 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01393 "paddw %%mm2, %%mm3 \n\t" \
01394 "paddw %%mm5, %%mm4 \n\t" \
01395 "psrlw $8, %%mm3 \n\t" \
01396 "psrlw $8, %%mm4 \n\t" \
01397 "movq (%0, "#index", 2), %%mm1 \n\t" \
01398 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01399 "psraw $7, %%mm1 \n\t" \
01400 "psraw $7, %%mm7 \n\t"
01401 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
01402
01403 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
01404 const int16_t *ubuf[2], const int16_t *bguf[2],
01405 const int16_t *abuf0, uint8_t *dest,
01406 int dstW, int uvalpha, int y)
01407 {
01408 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01409 const int16_t *buf1= buf0;
01410
01411 if (uvalpha < 2048) {
01412 __asm__ volatile(
01413 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01414 "mov %4, %%"REG_b" \n\t"
01415 "push %%"REG_BP" \n\t"
01416 YSCALEYUV2PACKED1(%%REGBP, %5)
01417 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01418 "pop %%"REG_BP" \n\t"
01419 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01420 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01421 "a" (&c->redDither)
01422 );
01423 } else {
01424 __asm__ volatile(
01425 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01426 "mov %4, %%"REG_b" \n\t"
01427 "push %%"REG_BP" \n\t"
01428 YSCALEYUV2PACKED1b(%%REGBP, %5)
01429 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01430 "pop %%"REG_BP" \n\t"
01431 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01432 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01433 "a" (&c->redDither)
01434 );
01435 }
01436 }
01437
01438 static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src,
01439 int width, enum PixelFormat srcFormat)
01440 {
01441
01442 if(srcFormat == PIX_FMT_BGR24) {
01443 __asm__ volatile(
01444 "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
01445 "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
01446 :
01447 );
01448 } else {
01449 __asm__ volatile(
01450 "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
01451 "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
01452 :
01453 );
01454 }
01455
01456 __asm__ volatile(
01457 "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t"
01458 "mov %2, %%"REG_a" \n\t"
01459 "pxor %%mm7, %%mm7 \n\t"
01460 "1: \n\t"
01461 PREFETCH" 64(%0) \n\t"
01462 "movd (%0), %%mm0 \n\t"
01463 "movd 2(%0), %%mm1 \n\t"
01464 "movd 6(%0), %%mm2 \n\t"
01465 "movd 8(%0), %%mm3 \n\t"
01466 "add $12, %0 \n\t"
01467 "punpcklbw %%mm7, %%mm0 \n\t"
01468 "punpcklbw %%mm7, %%mm1 \n\t"
01469 "punpcklbw %%mm7, %%mm2 \n\t"
01470 "punpcklbw %%mm7, %%mm3 \n\t"
01471 "pmaddwd %%mm5, %%mm0 \n\t"
01472 "pmaddwd %%mm6, %%mm1 \n\t"
01473 "pmaddwd %%mm5, %%mm2 \n\t"
01474 "pmaddwd %%mm6, %%mm3 \n\t"
01475 "paddd %%mm1, %%mm0 \n\t"
01476 "paddd %%mm3, %%mm2 \n\t"
01477 "paddd %%mm4, %%mm0 \n\t"
01478 "paddd %%mm4, %%mm2 \n\t"
01479 "psrad $9, %%mm0 \n\t"
01480 "psrad $9, %%mm2 \n\t"
01481 "packssdw %%mm2, %%mm0 \n\t"
01482 "movq %%mm0, (%1, %%"REG_a") \n\t"
01483 "add $8, %%"REG_a" \n\t"
01484 " js 1b \n\t"
01485 : "+r" (src)
01486 : "r" (dst+width), "g" ((x86_reg)-2*width)
01487 : "%"REG_a
01488 );
01489 }
01490
01491 static void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
01492 int width, uint32_t *unused)
01493 {
01494 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
01495 }
01496
01497 static void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
01498 int width, uint32_t *unused)
01499 {
01500 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
01501 }
01502
01503 static av_always_inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV,
01504 const uint8_t *src, int width,
01505 enum PixelFormat srcFormat)
01506 {
01507 __asm__ volatile(
01508 "movq 24(%4), %%mm6 \n\t"
01509 "mov %3, %%"REG_a" \n\t"
01510 "pxor %%mm7, %%mm7 \n\t"
01511 "1: \n\t"
01512 PREFETCH" 64(%0) \n\t"
01513 "movd (%0), %%mm0 \n\t"
01514 "movd 2(%0), %%mm1 \n\t"
01515 "punpcklbw %%mm7, %%mm0 \n\t"
01516 "punpcklbw %%mm7, %%mm1 \n\t"
01517 "movq %%mm0, %%mm2 \n\t"
01518 "movq %%mm1, %%mm3 \n\t"
01519 "pmaddwd (%4), %%mm0 \n\t"
01520 "pmaddwd 8(%4), %%mm1 \n\t"
01521 "pmaddwd 16(%4), %%mm2 \n\t"
01522 "pmaddwd %%mm6, %%mm3 \n\t"
01523 "paddd %%mm1, %%mm0 \n\t"
01524 "paddd %%mm3, %%mm2 \n\t"
01525
01526 "movd 6(%0), %%mm1 \n\t"
01527 "movd 8(%0), %%mm3 \n\t"
01528 "add $12, %0 \n\t"
01529 "punpcklbw %%mm7, %%mm1 \n\t"
01530 "punpcklbw %%mm7, %%mm3 \n\t"
01531 "movq %%mm1, %%mm4 \n\t"
01532 "movq %%mm3, %%mm5 \n\t"
01533 "pmaddwd (%4), %%mm1 \n\t"
01534 "pmaddwd 8(%4), %%mm3 \n\t"
01535 "pmaddwd 16(%4), %%mm4 \n\t"
01536 "pmaddwd %%mm6, %%mm5 \n\t"
01537 "paddd %%mm3, %%mm1 \n\t"
01538 "paddd %%mm5, %%mm4 \n\t"
01539
01540 "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t"
01541 "paddd %%mm3, %%mm0 \n\t"
01542 "paddd %%mm3, %%mm2 \n\t"
01543 "paddd %%mm3, %%mm1 \n\t"
01544 "paddd %%mm3, %%mm4 \n\t"
01545 "psrad $9, %%mm0 \n\t"
01546 "psrad $9, %%mm2 \n\t"
01547 "psrad $9, %%mm1 \n\t"
01548 "psrad $9, %%mm4 \n\t"
01549 "packssdw %%mm1, %%mm0 \n\t"
01550 "packssdw %%mm4, %%mm2 \n\t"
01551 "movq %%mm0, (%1, %%"REG_a") \n\t"
01552 "movq %%mm2, (%2, %%"REG_a") \n\t"
01553 "add $8, %%"REG_a" \n\t"
01554 " js 1b \n\t"
01555 : "+r" (src)
01556 : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
01557 : "%"REG_a
01558 );
01559 }
01560
01561 static void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV,
01562 const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
01563 int width, uint32_t *unused)
01564 {
01565 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
01566 assert(src1 == src2);
01567 }
01568
01569 static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV,
01570 const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
01571 int width, uint32_t *unused)
01572 {
01573 assert(src1==src2);
01574 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
01575 }
01576
01577 #if COMPILE_TEMPLATE_MMX2
01578 static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
01579 int dstWidth, const uint8_t *src,
01580 int srcW, int xInc)
01581 {
01582 int32_t *filterPos = c->hLumFilterPos;
01583 int16_t *filter = c->hLumFilter;
01584 void *mmx2FilterCode= c->lumMmx2FilterCode;
01585 int i;
01586 #if defined(PIC)
01587 uint64_t ebxsave;
01588 #endif
01589 #if ARCH_X86_64
01590 uint64_t retsave;
01591 #endif
01592
01593 __asm__ volatile(
01594 #if defined(PIC)
01595 "mov %%"REG_b", %5 \n\t"
01596 #if ARCH_X86_64
01597 "mov -8(%%rsp), %%"REG_a" \n\t"
01598 "mov %%"REG_a", %6 \n\t"
01599 #endif
01600 #else
01601 #if ARCH_X86_64
01602 "mov -8(%%rsp), %%"REG_a" \n\t"
01603 "mov %%"REG_a", %5 \n\t"
01604 #endif
01605 #endif
01606 "pxor %%mm7, %%mm7 \n\t"
01607 "mov %0, %%"REG_c" \n\t"
01608 "mov %1, %%"REG_D" \n\t"
01609 "mov %2, %%"REG_d" \n\t"
01610 "mov %3, %%"REG_b" \n\t"
01611 "xor %%"REG_a", %%"REG_a" \n\t"
01612 PREFETCH" (%%"REG_c") \n\t"
01613 PREFETCH" 32(%%"REG_c") \n\t"
01614 PREFETCH" 64(%%"REG_c") \n\t"
01615
01616 #if ARCH_X86_64
01617 #define CALL_MMX2_FILTER_CODE \
01618 "movl (%%"REG_b"), %%esi \n\t"\
01619 "call *%4 \n\t"\
01620 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
01621 "add %%"REG_S", %%"REG_c" \n\t"\
01622 "add %%"REG_a", %%"REG_D" \n\t"\
01623 "xor %%"REG_a", %%"REG_a" \n\t"\
01624
01625 #else
01626 #define CALL_MMX2_FILTER_CODE \
01627 "movl (%%"REG_b"), %%esi \n\t"\
01628 "call *%4 \n\t"\
01629 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
01630 "add %%"REG_a", %%"REG_D" \n\t"\
01631 "xor %%"REG_a", %%"REG_a" \n\t"\
01632
01633 #endif
01634
01635 CALL_MMX2_FILTER_CODE
01636 CALL_MMX2_FILTER_CODE
01637 CALL_MMX2_FILTER_CODE
01638 CALL_MMX2_FILTER_CODE
01639 CALL_MMX2_FILTER_CODE
01640 CALL_MMX2_FILTER_CODE
01641 CALL_MMX2_FILTER_CODE
01642 CALL_MMX2_FILTER_CODE
01643
01644 #if defined(PIC)
01645 "mov %5, %%"REG_b" \n\t"
01646 #if ARCH_X86_64
01647 "mov %6, %%"REG_a" \n\t"
01648 "mov %%"REG_a", -8(%%rsp) \n\t"
01649 #endif
01650 #else
01651 #if ARCH_X86_64
01652 "mov %5, %%"REG_a" \n\t"
01653 "mov %%"REG_a", -8(%%rsp) \n\t"
01654 #endif
01655 #endif
01656 :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
01657 "m" (mmx2FilterCode)
01658 #if defined(PIC)
01659 ,"m" (ebxsave)
01660 #endif
01661 #if ARCH_X86_64
01662 ,"m"(retsave)
01663 #endif
01664 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
01665 #if !defined(PIC)
01666 ,"%"REG_b
01667 #endif
01668 );
01669
01670 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
01671 dst[i] = src[srcW-1]*128;
01672 }
01673
01674 static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
01675 int dstWidth, const uint8_t *src1,
01676 const uint8_t *src2, int srcW, int xInc)
01677 {
01678 int32_t *filterPos = c->hChrFilterPos;
01679 int16_t *filter = c->hChrFilter;
01680 void *mmx2FilterCode= c->chrMmx2FilterCode;
01681 int i;
01682 #if defined(PIC)
01683 DECLARE_ALIGNED(8, uint64_t, ebxsave);
01684 #endif
01685 #if ARCH_X86_64
01686 DECLARE_ALIGNED(8, uint64_t, retsave);
01687 #endif
01688
01689 __asm__ volatile(
01690 #if defined(PIC)
01691 "mov %%"REG_b", %7 \n\t"
01692 #if ARCH_X86_64
01693 "mov -8(%%rsp), %%"REG_a" \n\t"
01694 "mov %%"REG_a", %8 \n\t"
01695 #endif
01696 #else
01697 #if ARCH_X86_64
01698 "mov -8(%%rsp), %%"REG_a" \n\t"
01699 "mov %%"REG_a", %7 \n\t"
01700 #endif
01701 #endif
01702 "pxor %%mm7, %%mm7 \n\t"
01703 "mov %0, %%"REG_c" \n\t"
01704 "mov %1, %%"REG_D" \n\t"
01705 "mov %2, %%"REG_d" \n\t"
01706 "mov %3, %%"REG_b" \n\t"
01707 "xor %%"REG_a", %%"REG_a" \n\t"
01708 PREFETCH" (%%"REG_c") \n\t"
01709 PREFETCH" 32(%%"REG_c") \n\t"
01710 PREFETCH" 64(%%"REG_c") \n\t"
01711
01712 CALL_MMX2_FILTER_CODE
01713 CALL_MMX2_FILTER_CODE
01714 CALL_MMX2_FILTER_CODE
01715 CALL_MMX2_FILTER_CODE
01716 "xor %%"REG_a", %%"REG_a" \n\t"
01717 "mov %5, %%"REG_c" \n\t"
01718 "mov %6, %%"REG_D" \n\t"
01719 PREFETCH" (%%"REG_c") \n\t"
01720 PREFETCH" 32(%%"REG_c") \n\t"
01721 PREFETCH" 64(%%"REG_c") \n\t"
01722
01723 CALL_MMX2_FILTER_CODE
01724 CALL_MMX2_FILTER_CODE
01725 CALL_MMX2_FILTER_CODE
01726 CALL_MMX2_FILTER_CODE
01727
01728 #if defined(PIC)
01729 "mov %7, %%"REG_b" \n\t"
01730 #if ARCH_X86_64
01731 "mov %8, %%"REG_a" \n\t"
01732 "mov %%"REG_a", -8(%%rsp) \n\t"
01733 #endif
01734 #else
01735 #if ARCH_X86_64
01736 "mov %7, %%"REG_a" \n\t"
01737 "mov %%"REG_a", -8(%%rsp) \n\t"
01738 #endif
01739 #endif
01740 :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
01741 "m" (mmx2FilterCode), "m" (src2), "m"(dst2)
01742 #if defined(PIC)
01743 ,"m" (ebxsave)
01744 #endif
01745 #if ARCH_X86_64
01746 ,"m"(retsave)
01747 #endif
01748 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
01749 #if !defined(PIC)
01750 ,"%"REG_b
01751 #endif
01752 );
01753
01754 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
01755 dst1[i] = src1[srcW-1]*128;
01756 dst2[i] = src2[srcW-1]*128;
01757 }
01758 }
01759 #endif
01760
01761 static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
01762 {
01763 enum PixelFormat srcFormat = c->srcFormat,
01764 dstFormat = c->dstFormat;
01765 c->use_mmx_vfilter= 0;
01766 if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12
01767 && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
01768 if (c->flags & SWS_ACCURATE_RND) {
01769 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01770 switch (c->dstFormat) {
01771 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
01772 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
01773 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
01774 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
01775 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
01776 default: break;
01777 }
01778 }
01779 } else {
01780 c->use_mmx_vfilter= 1;
01781 c->yuv2planeX = RENAME(yuv2yuvX );
01782 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01783 switch (c->dstFormat) {
01784 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
01785 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
01786 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
01787 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
01788 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
01789 default: break;
01790 }
01791 }
01792 }
01793 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01794 switch (c->dstFormat) {
01795 case PIX_FMT_RGB32:
01796 c->yuv2packed1 = RENAME(yuv2rgb32_1);
01797 c->yuv2packed2 = RENAME(yuv2rgb32_2);
01798 break;
01799 case PIX_FMT_BGR24:
01800 c->yuv2packed1 = RENAME(yuv2bgr24_1);
01801 c->yuv2packed2 = RENAME(yuv2bgr24_2);
01802 break;
01803 case PIX_FMT_RGB555:
01804 c->yuv2packed1 = RENAME(yuv2rgb555_1);
01805 c->yuv2packed2 = RENAME(yuv2rgb555_2);
01806 break;
01807 case PIX_FMT_RGB565:
01808 c->yuv2packed1 = RENAME(yuv2rgb565_1);
01809 c->yuv2packed2 = RENAME(yuv2rgb565_2);
01810 break;
01811 case PIX_FMT_YUYV422:
01812 c->yuv2packed1 = RENAME(yuv2yuyv422_1);
01813 c->yuv2packed2 = RENAME(yuv2yuyv422_2);
01814 break;
01815 default:
01816 break;
01817 }
01818 }
01819 }
01820
01821 if (c->srcBpc == 8 && c->dstBpc <= 10) {
01822
01823 #if COMPILE_TEMPLATE_MMX2
01824 if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
01825 {
01826 c->hyscale_fast = RENAME(hyscale_fast);
01827 c->hcscale_fast = RENAME(hcscale_fast);
01828 } else {
01829 #endif
01830 c->hyscale_fast = NULL;
01831 c->hcscale_fast = NULL;
01832 #if COMPILE_TEMPLATE_MMX2
01833 }
01834 #endif
01835 }
01836
01837 if (!c->chrSrcHSubSample) {
01838 switch(srcFormat) {
01839 case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
01840 case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break;
01841 default: break;
01842 }
01843 }
01844
01845 switch (srcFormat) {
01846 case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
01847 case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
01848 default: break;
01849 }
01850 }