00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #undef REAL_MOVNTQ
00022 #undef MOVNTQ
00023 #undef MOVNTQ2
00024 #undef PREFETCH
00025
00026 #if COMPILE_TEMPLATE_MMX2
00027 #define PREFETCH "prefetchnta"
00028 #else
00029 #define PREFETCH " # nop"
00030 #endif
00031
00032 #if COMPILE_TEMPLATE_MMX2
00033 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
00034 #define MOVNTQ2 "movntq "
00035 #else
00036 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
00037 #define MOVNTQ2 "movq "
00038 #endif
00039 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
00040
00041 #if !COMPILE_TEMPLATE_MMX2
00042 static av_always_inline void
00043 dither_8to16(const uint8_t *srcDither, int rot)
00044 {
00045 if (rot) {
00046 __asm__ volatile("pxor %%mm0, %%mm0\n\t"
00047 "movq (%0), %%mm3\n\t"
00048 "movq %%mm3, %%mm4\n\t"
00049 "psrlq $24, %%mm3\n\t"
00050 "psllq $40, %%mm4\n\t"
00051 "por %%mm4, %%mm3\n\t"
00052 "movq %%mm3, %%mm4\n\t"
00053 "punpcklbw %%mm0, %%mm3\n\t"
00054 "punpckhbw %%mm0, %%mm4\n\t"
00055 :: "r"(srcDither)
00056 );
00057 } else {
00058 __asm__ volatile("pxor %%mm0, %%mm0\n\t"
00059 "movq (%0), %%mm3\n\t"
00060 "movq %%mm3, %%mm4\n\t"
00061 "punpcklbw %%mm0, %%mm3\n\t"
00062 "punpckhbw %%mm0, %%mm4\n\t"
00063 :: "r"(srcDither)
00064 );
00065 }
00066 }
00067 #endif
00068
00069 static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
00070 const int16_t **src, uint8_t *dest, int dstW,
00071 const uint8_t *dither, int offset)
00072 {
00073 dither_8to16(dither, offset);
00074 __asm__ volatile(\
00075 "psraw $4, %%mm3\n\t"
00076 "psraw $4, %%mm4\n\t"
00077 "movq %%mm3, %%mm6\n\t"
00078 "movq %%mm4, %%mm7\n\t"
00079 "movl %3, %%ecx\n\t"
00080 "mov %0, %%"REG_d" \n\t"\
00081 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00082 ".p2align 4 \n\t" \
00083 "1: \n\t"\
00084 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00085 "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" \
00086 "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" \
00087 "add $16, %%"REG_d" \n\t"\
00088 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00089 "test %%"REG_S", %%"REG_S" \n\t"\
00090 "pmulhw %%mm0, %%mm2 \n\t"\
00091 "pmulhw %%mm0, %%mm5 \n\t"\
00092 "paddw %%mm2, %%mm3 \n\t"\
00093 "paddw %%mm5, %%mm4 \n\t"\
00094 " jnz 1b \n\t"\
00095 "psraw $3, %%mm3 \n\t"\
00096 "psraw $3, %%mm4 \n\t"\
00097 "packuswb %%mm4, %%mm3 \n\t"
00098 MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t"
00099 "add $8, %%"REG_c" \n\t"\
00100 "cmp %2, %%"REG_c" \n\t"\
00101 "movq %%mm6, %%mm3\n\t"
00102 "movq %%mm7, %%mm4\n\t"
00103 "mov %0, %%"REG_d" \n\t"\
00104 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00105 "jb 1b \n\t"\
00106 :: "g" (filter),
00107 "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
00108 : "%"REG_d, "%"REG_S, "%"REG_c
00109 );
00110 }
00111
00112 #define YSCALEYUV2PACKEDX_UV \
00113 __asm__ volatile(\
00114 "xor %%"REG_a", %%"REG_a" \n\t"\
00115 ".p2align 4 \n\t"\
00116 "nop \n\t"\
00117 "1: \n\t"\
00118 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00119 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00120 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00121 "movq %%mm3, %%mm4 \n\t"\
00122 ".p2align 4 \n\t"\
00123 "2: \n\t"\
00124 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00125 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00126 "add %6, %%"REG_S" \n\t" \
00127 "movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" \
00128 "add $16, %%"REG_d" \n\t"\
00129 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00130 "pmulhw %%mm0, %%mm2 \n\t"\
00131 "pmulhw %%mm0, %%mm5 \n\t"\
00132 "paddw %%mm2, %%mm3 \n\t"\
00133 "paddw %%mm5, %%mm4 \n\t"\
00134 "test %%"REG_S", %%"REG_S" \n\t"\
00135 " jnz 2b \n\t"\
00136
00137 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
00138 "lea "offset"(%0), %%"REG_d" \n\t"\
00139 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00140 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
00141 "movq "#dst1", "#dst2" \n\t"\
00142 ".p2align 4 \n\t"\
00143 "2: \n\t"\
00144 "movq 8(%%"REG_d"), "#coeff" \n\t" \
00145 "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" \
00146 "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" \
00147 "add $16, %%"REG_d" \n\t"\
00148 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00149 "pmulhw "#coeff", "#src1" \n\t"\
00150 "pmulhw "#coeff", "#src2" \n\t"\
00151 "paddw "#src1", "#dst1" \n\t"\
00152 "paddw "#src2", "#dst2" \n\t"\
00153 "test %%"REG_S", %%"REG_S" \n\t"\
00154 " jnz 2b \n\t"\
00155
00156 #define YSCALEYUV2PACKEDX \
00157 YSCALEYUV2PACKEDX_UV \
00158 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
00159
00160 #define YSCALEYUV2PACKEDX_END \
00161 :: "r" (&c->redDither), \
00162 "m" (dummy), "m" (dummy), "m" (dummy),\
00163 "r" (dest), "m" (dstW_reg), "m"(uv_off) \
00164 : "%"REG_a, "%"REG_d, "%"REG_S \
00165 );
00166
00167 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
00168 __asm__ volatile(\
00169 "xor %%"REG_a", %%"REG_a" \n\t"\
00170 ".p2align 4 \n\t"\
00171 "nop \n\t"\
00172 "1: \n\t"\
00173 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00174 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00175 "pxor %%mm4, %%mm4 \n\t"\
00176 "pxor %%mm5, %%mm5 \n\t"\
00177 "pxor %%mm6, %%mm6 \n\t"\
00178 "pxor %%mm7, %%mm7 \n\t"\
00179 ".p2align 4 \n\t"\
00180 "2: \n\t"\
00181 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" \
00182 "add %6, %%"REG_S" \n\t" \
00183 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00184 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00185 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" \
00186 "movq %%mm0, %%mm3 \n\t"\
00187 "punpcklwd %%mm1, %%mm0 \n\t"\
00188 "punpckhwd %%mm1, %%mm3 \n\t"\
00189 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" \
00190 "pmaddwd %%mm1, %%mm0 \n\t"\
00191 "pmaddwd %%mm1, %%mm3 \n\t"\
00192 "paddd %%mm0, %%mm4 \n\t"\
00193 "paddd %%mm3, %%mm5 \n\t"\
00194 "add %6, %%"REG_S" \n\t" \
00195 "movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" \
00196 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00197 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00198 "test %%"REG_S", %%"REG_S" \n\t"\
00199 "movq %%mm2, %%mm0 \n\t"\
00200 "punpcklwd %%mm3, %%mm2 \n\t"\
00201 "punpckhwd %%mm3, %%mm0 \n\t"\
00202 "pmaddwd %%mm1, %%mm2 \n\t"\
00203 "pmaddwd %%mm1, %%mm0 \n\t"\
00204 "paddd %%mm2, %%mm6 \n\t"\
00205 "paddd %%mm0, %%mm7 \n\t"\
00206 " jnz 2b \n\t"\
00207 "psrad $16, %%mm4 \n\t"\
00208 "psrad $16, %%mm5 \n\t"\
00209 "psrad $16, %%mm6 \n\t"\
00210 "psrad $16, %%mm7 \n\t"\
00211 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00212 "packssdw %%mm5, %%mm4 \n\t"\
00213 "packssdw %%mm7, %%mm6 \n\t"\
00214 "paddw %%mm0, %%mm4 \n\t"\
00215 "paddw %%mm0, %%mm6 \n\t"\
00216 "movq %%mm4, "U_TEMP"(%0) \n\t"\
00217 "movq %%mm6, "V_TEMP"(%0) \n\t"\
00218
00219 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
00220 "lea "offset"(%0), %%"REG_d" \n\t"\
00221 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00222 "pxor %%mm1, %%mm1 \n\t"\
00223 "pxor %%mm5, %%mm5 \n\t"\
00224 "pxor %%mm7, %%mm7 \n\t"\
00225 "pxor %%mm6, %%mm6 \n\t"\
00226 ".p2align 4 \n\t"\
00227 "2: \n\t"\
00228 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00229 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00230 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00231 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" \
00232 "movq %%mm0, %%mm3 \n\t"\
00233 "punpcklwd %%mm4, %%mm0 \n\t"\
00234 "punpckhwd %%mm4, %%mm3 \n\t"\
00235 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" \
00236 "pmaddwd %%mm4, %%mm0 \n\t"\
00237 "pmaddwd %%mm4, %%mm3 \n\t"\
00238 "paddd %%mm0, %%mm1 \n\t"\
00239 "paddd %%mm3, %%mm5 \n\t"\
00240 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00241 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00242 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00243 "test %%"REG_S", %%"REG_S" \n\t"\
00244 "movq %%mm2, %%mm0 \n\t"\
00245 "punpcklwd %%mm3, %%mm2 \n\t"\
00246 "punpckhwd %%mm3, %%mm0 \n\t"\
00247 "pmaddwd %%mm4, %%mm2 \n\t"\
00248 "pmaddwd %%mm4, %%mm0 \n\t"\
00249 "paddd %%mm2, %%mm7 \n\t"\
00250 "paddd %%mm0, %%mm6 \n\t"\
00251 " jnz 2b \n\t"\
00252 "psrad $16, %%mm1 \n\t"\
00253 "psrad $16, %%mm5 \n\t"\
00254 "psrad $16, %%mm7 \n\t"\
00255 "psrad $16, %%mm6 \n\t"\
00256 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00257 "packssdw %%mm5, %%mm1 \n\t"\
00258 "packssdw %%mm6, %%mm7 \n\t"\
00259 "paddw %%mm0, %%mm1 \n\t"\
00260 "paddw %%mm0, %%mm7 \n\t"\
00261 "movq "U_TEMP"(%0), %%mm3 \n\t"\
00262 "movq "V_TEMP"(%0), %%mm4 \n\t"\
00263
00264 #define YSCALEYUV2PACKEDX_ACCURATE \
00265 YSCALEYUV2PACKEDX_ACCURATE_UV \
00266 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
00267
00268 #define YSCALEYUV2RGBX \
00269 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
00270 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
00271 "movq %%mm3, %%mm2 \n\t" \
00272 "movq %%mm4, %%mm5 \n\t" \
00273 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
00274 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
00275 \
00276 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
00277 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
00278 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
00279 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
00280 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
00281 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
00282 \
00283 "paddw %%mm3, %%mm4 \n\t"\
00284 "movq %%mm2, %%mm0 \n\t"\
00285 "movq %%mm5, %%mm6 \n\t"\
00286 "movq %%mm4, %%mm3 \n\t"\
00287 "punpcklwd %%mm2, %%mm2 \n\t"\
00288 "punpcklwd %%mm5, %%mm5 \n\t"\
00289 "punpcklwd %%mm4, %%mm4 \n\t"\
00290 "paddw %%mm1, %%mm2 \n\t"\
00291 "paddw %%mm1, %%mm5 \n\t"\
00292 "paddw %%mm1, %%mm4 \n\t"\
00293 "punpckhwd %%mm0, %%mm0 \n\t"\
00294 "punpckhwd %%mm6, %%mm6 \n\t"\
00295 "punpckhwd %%mm3, %%mm3 \n\t"\
00296 "paddw %%mm7, %%mm0 \n\t"\
00297 "paddw %%mm7, %%mm6 \n\t"\
00298 "paddw %%mm7, %%mm3 \n\t"\
00299 \
00300 "packuswb %%mm0, %%mm2 \n\t"\
00301 "packuswb %%mm6, %%mm5 \n\t"\
00302 "packuswb %%mm3, %%mm4 \n\t"\
00303
00304 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
00305 "movq "#b", "#q2" \n\t" \
00306 "movq "#r", "#t" \n\t" \
00307 "punpcklbw "#g", "#b" \n\t" \
00308 "punpcklbw "#a", "#r" \n\t" \
00309 "punpckhbw "#g", "#q2" \n\t" \
00310 "punpckhbw "#a", "#t" \n\t" \
00311 "movq "#b", "#q0" \n\t" \
00312 "movq "#q2", "#q3" \n\t" \
00313 "punpcklwd "#r", "#q0" \n\t" \
00314 "punpckhwd "#r", "#b" \n\t" \
00315 "punpcklwd "#t", "#q2" \n\t" \
00316 "punpckhwd "#t", "#q3" \n\t" \
00317 \
00318 MOVNTQ( q0, (dst, index, 4))\
00319 MOVNTQ( b, 8(dst, index, 4))\
00320 MOVNTQ( q2, 16(dst, index, 4))\
00321 MOVNTQ( q3, 24(dst, index, 4))\
00322 \
00323 "add $8, "#index" \n\t"\
00324 "cmp "#dstw", "#index" \n\t"\
00325 " jb 1b \n\t"
00326 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
00327
00328 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
00329 const int16_t **lumSrc, int lumFilterSize,
00330 const int16_t *chrFilter, const int16_t **chrUSrc,
00331 const int16_t **chrVSrc,
00332 int chrFilterSize, const int16_t **alpSrc,
00333 uint8_t *dest, int dstW, int dstY)
00334 {
00335 x86_reg dummy=0;
00336 x86_reg dstW_reg = dstW;
00337 x86_reg uv_off = c->uv_offx2;
00338
00339 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00340 YSCALEYUV2PACKEDX_ACCURATE
00341 YSCALEYUV2RGBX
00342 "movq %%mm2, "U_TEMP"(%0) \n\t"
00343 "movq %%mm4, "V_TEMP"(%0) \n\t"
00344 "movq %%mm5, "Y_TEMP"(%0) \n\t"
00345 YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
00346 "movq "Y_TEMP"(%0), %%mm5 \n\t"
00347 "psraw $3, %%mm1 \n\t"
00348 "psraw $3, %%mm7 \n\t"
00349 "packuswb %%mm7, %%mm1 \n\t"
00350 WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
00351 YSCALEYUV2PACKEDX_END
00352 } else {
00353 YSCALEYUV2PACKEDX_ACCURATE
00354 YSCALEYUV2RGBX
00355 "pcmpeqd %%mm7, %%mm7 \n\t"
00356 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00357 YSCALEYUV2PACKEDX_END
00358 }
00359 }
00360
00361 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
00362 const int16_t **lumSrc, int lumFilterSize,
00363 const int16_t *chrFilter, const int16_t **chrUSrc,
00364 const int16_t **chrVSrc,
00365 int chrFilterSize, const int16_t **alpSrc,
00366 uint8_t *dest, int dstW, int dstY)
00367 {
00368 x86_reg dummy=0;
00369 x86_reg dstW_reg = dstW;
00370 x86_reg uv_off = c->uv_offx2;
00371
00372 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00373 YSCALEYUV2PACKEDX
00374 YSCALEYUV2RGBX
00375 YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
00376 "psraw $3, %%mm1 \n\t"
00377 "psraw $3, %%mm7 \n\t"
00378 "packuswb %%mm7, %%mm1 \n\t"
00379 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00380 YSCALEYUV2PACKEDX_END
00381 } else {
00382 YSCALEYUV2PACKEDX
00383 YSCALEYUV2RGBX
00384 "pcmpeqd %%mm7, %%mm7 \n\t"
00385 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00386 YSCALEYUV2PACKEDX_END
00387 }
00388 }
00389
00390 #define REAL_WRITERGB16(dst, dstw, index) \
00391 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00392 "pand "MANGLE(bFC)", %%mm4 \n\t" \
00393 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00394 "psrlq $3, %%mm2 \n\t"\
00395 \
00396 "movq %%mm2, %%mm1 \n\t"\
00397 "movq %%mm4, %%mm3 \n\t"\
00398 \
00399 "punpcklbw %%mm7, %%mm3 \n\t"\
00400 "punpcklbw %%mm5, %%mm2 \n\t"\
00401 "punpckhbw %%mm7, %%mm4 \n\t"\
00402 "punpckhbw %%mm5, %%mm1 \n\t"\
00403 \
00404 "psllq $3, %%mm3 \n\t"\
00405 "psllq $3, %%mm4 \n\t"\
00406 \
00407 "por %%mm3, %%mm2 \n\t"\
00408 "por %%mm4, %%mm1 \n\t"\
00409 \
00410 MOVNTQ(%%mm2, (dst, index, 2))\
00411 MOVNTQ(%%mm1, 8(dst, index, 2))\
00412 \
00413 "add $8, "#index" \n\t"\
00414 "cmp "#dstw", "#index" \n\t"\
00415 " jb 1b \n\t"
00416 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
00417
00418 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
00419 const int16_t **lumSrc, int lumFilterSize,
00420 const int16_t *chrFilter, const int16_t **chrUSrc,
00421 const int16_t **chrVSrc,
00422 int chrFilterSize, const int16_t **alpSrc,
00423 uint8_t *dest, int dstW, int dstY)
00424 {
00425 x86_reg dummy=0;
00426 x86_reg dstW_reg = dstW;
00427 x86_reg uv_off = c->uv_offx2;
00428
00429 YSCALEYUV2PACKEDX_ACCURATE
00430 YSCALEYUV2RGBX
00431 "pxor %%mm7, %%mm7 \n\t"
00432
00433 #ifdef DITHER1XBPP
00434 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00435 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00436 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00437 #endif
00438 WRITERGB16(%4, %5, %%REGa)
00439 YSCALEYUV2PACKEDX_END
00440 }
00441
00442 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
00443 const int16_t **lumSrc, int lumFilterSize,
00444 const int16_t *chrFilter, const int16_t **chrUSrc,
00445 const int16_t **chrVSrc,
00446 int chrFilterSize, const int16_t **alpSrc,
00447 uint8_t *dest, int dstW, int dstY)
00448 {
00449 x86_reg dummy=0;
00450 x86_reg dstW_reg = dstW;
00451 x86_reg uv_off = c->uv_offx2;
00452
00453 YSCALEYUV2PACKEDX
00454 YSCALEYUV2RGBX
00455 "pxor %%mm7, %%mm7 \n\t"
00456
00457 #ifdef DITHER1XBPP
00458 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00459 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00460 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00461 #endif
00462 WRITERGB16(%4, %5, %%REGa)
00463 YSCALEYUV2PACKEDX_END
00464 }
00465
00466 #define REAL_WRITERGB15(dst, dstw, index) \
00467 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00468 "pand "MANGLE(bF8)", %%mm4 \n\t" \
00469 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00470 "psrlq $3, %%mm2 \n\t"\
00471 "psrlq $1, %%mm5 \n\t"\
00472 \
00473 "movq %%mm2, %%mm1 \n\t"\
00474 "movq %%mm4, %%mm3 \n\t"\
00475 \
00476 "punpcklbw %%mm7, %%mm3 \n\t"\
00477 "punpcklbw %%mm5, %%mm2 \n\t"\
00478 "punpckhbw %%mm7, %%mm4 \n\t"\
00479 "punpckhbw %%mm5, %%mm1 \n\t"\
00480 \
00481 "psllq $2, %%mm3 \n\t"\
00482 "psllq $2, %%mm4 \n\t"\
00483 \
00484 "por %%mm3, %%mm2 \n\t"\
00485 "por %%mm4, %%mm1 \n\t"\
00486 \
00487 MOVNTQ(%%mm2, (dst, index, 2))\
00488 MOVNTQ(%%mm1, 8(dst, index, 2))\
00489 \
00490 "add $8, "#index" \n\t"\
00491 "cmp "#dstw", "#index" \n\t"\
00492 " jb 1b \n\t"
00493 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
00494
00495 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
00496 const int16_t **lumSrc, int lumFilterSize,
00497 const int16_t *chrFilter, const int16_t **chrUSrc,
00498 const int16_t **chrVSrc,
00499 int chrFilterSize, const int16_t **alpSrc,
00500 uint8_t *dest, int dstW, int dstY)
00501 {
00502 x86_reg dummy=0;
00503 x86_reg dstW_reg = dstW;
00504 x86_reg uv_off = c->uv_offx2;
00505
00506 YSCALEYUV2PACKEDX_ACCURATE
00507 YSCALEYUV2RGBX
00508 "pxor %%mm7, %%mm7 \n\t"
00509
00510 #ifdef DITHER1XBPP
00511 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00512 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00513 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00514 #endif
00515 WRITERGB15(%4, %5, %%REGa)
00516 YSCALEYUV2PACKEDX_END
00517 }
00518
00519 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
00520 const int16_t **lumSrc, int lumFilterSize,
00521 const int16_t *chrFilter, const int16_t **chrUSrc,
00522 const int16_t **chrVSrc,
00523 int chrFilterSize, const int16_t **alpSrc,
00524 uint8_t *dest, int dstW, int dstY)
00525 {
00526 x86_reg dummy=0;
00527 x86_reg dstW_reg = dstW;
00528 x86_reg uv_off = c->uv_offx2;
00529
00530 YSCALEYUV2PACKEDX
00531 YSCALEYUV2RGBX
00532 "pxor %%mm7, %%mm7 \n\t"
00533
00534 #ifdef DITHER1XBPP
00535 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00536 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00537 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00538 #endif
00539 WRITERGB15(%4, %5, %%REGa)
00540 YSCALEYUV2PACKEDX_END
00541 }
00542
00543 #define WRITEBGR24MMX(dst, dstw, index) \
00544 \
00545 "movq %%mm2, %%mm1 \n\t" \
00546 "movq %%mm5, %%mm6 \n\t" \
00547 "punpcklbw %%mm4, %%mm2 \n\t" \
00548 "punpcklbw %%mm7, %%mm5 \n\t" \
00549 "punpckhbw %%mm4, %%mm1 \n\t" \
00550 "punpckhbw %%mm7, %%mm6 \n\t" \
00551 "movq %%mm2, %%mm0 \n\t" \
00552 "movq %%mm1, %%mm3 \n\t" \
00553 "punpcklwd %%mm5, %%mm0 \n\t" \
00554 "punpckhwd %%mm5, %%mm2 \n\t" \
00555 "punpcklwd %%mm6, %%mm1 \n\t" \
00556 "punpckhwd %%mm6, %%mm3 \n\t" \
00557 \
00558 "movq %%mm0, %%mm4 \n\t" \
00559 "movq %%mm2, %%mm6 \n\t" \
00560 "movq %%mm1, %%mm5 \n\t" \
00561 "movq %%mm3, %%mm7 \n\t" \
00562 \
00563 "psllq $40, %%mm0 \n\t" \
00564 "psllq $40, %%mm2 \n\t" \
00565 "psllq $40, %%mm1 \n\t" \
00566 "psllq $40, %%mm3 \n\t" \
00567 \
00568 "punpckhdq %%mm4, %%mm0 \n\t" \
00569 "punpckhdq %%mm6, %%mm2 \n\t" \
00570 "punpckhdq %%mm5, %%mm1 \n\t" \
00571 "punpckhdq %%mm7, %%mm3 \n\t" \
00572 \
00573 "psrlq $8, %%mm0 \n\t" \
00574 "movq %%mm2, %%mm6 \n\t" \
00575 "psllq $40, %%mm2 \n\t" \
00576 "por %%mm2, %%mm0 \n\t" \
00577 MOVNTQ(%%mm0, (dst))\
00578 \
00579 "psrlq $24, %%mm6 \n\t" \
00580 "movq %%mm1, %%mm5 \n\t" \
00581 "psllq $24, %%mm1 \n\t" \
00582 "por %%mm1, %%mm6 \n\t" \
00583 MOVNTQ(%%mm6, 8(dst))\
00584 \
00585 "psrlq $40, %%mm5 \n\t" \
00586 "psllq $8, %%mm3 \n\t" \
00587 "por %%mm3, %%mm5 \n\t" \
00588 MOVNTQ(%%mm5, 16(dst))\
00589 \
00590 "add $24, "#dst" \n\t"\
00591 \
00592 "add $8, "#index" \n\t"\
00593 "cmp "#dstw", "#index" \n\t"\
00594 " jb 1b \n\t"
00595
00596 #define WRITEBGR24MMX2(dst, dstw, index) \
00597 \
00598 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
00599 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
00600 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
00601 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
00602 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
00603 \
00604 "pand %%mm0, %%mm1 \n\t" \
00605 "pand %%mm0, %%mm3 \n\t" \
00606 "pand %%mm7, %%mm6 \n\t" \
00607 \
00608 "psllq $8, %%mm3 \n\t" \
00609 "por %%mm1, %%mm6 \n\t"\
00610 "por %%mm3, %%mm6 \n\t"\
00611 MOVNTQ(%%mm6, (dst))\
00612 \
00613 "psrlq $8, %%mm4 \n\t" \
00614 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
00615 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
00616 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
00617 \
00618 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" \
00619 "pand %%mm7, %%mm3 \n\t" \
00620 "pand %%mm0, %%mm6 \n\t" \
00621 \
00622 "por %%mm1, %%mm3 \n\t" \
00623 "por %%mm3, %%mm6 \n\t"\
00624 MOVNTQ(%%mm6, 8(dst))\
00625 \
00626 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
00627 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
00628 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
00629 \
00630 "pand %%mm7, %%mm1 \n\t" \
00631 "pand %%mm0, %%mm3 \n\t" \
00632 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" \
00633 \
00634 "por %%mm1, %%mm3 \n\t"\
00635 "por %%mm3, %%mm6 \n\t"\
00636 MOVNTQ(%%mm6, 16(dst))\
00637 \
00638 "add $24, "#dst" \n\t"\
00639 \
00640 "add $8, "#index" \n\t"\
00641 "cmp "#dstw", "#index" \n\t"\
00642 " jb 1b \n\t"
00643
00644 #if COMPILE_TEMPLATE_MMX2
00645 #undef WRITEBGR24
00646 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
00647 #else
00648 #undef WRITEBGR24
00649 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
00650 #endif
00651
00652 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
00653 const int16_t **lumSrc, int lumFilterSize,
00654 const int16_t *chrFilter, const int16_t **chrUSrc,
00655 const int16_t **chrVSrc,
00656 int chrFilterSize, const int16_t **alpSrc,
00657 uint8_t *dest, int dstW, int dstY)
00658 {
00659 x86_reg dummy=0;
00660 x86_reg dstW_reg = dstW;
00661 x86_reg uv_off = c->uv_offx2;
00662
00663 YSCALEYUV2PACKEDX_ACCURATE
00664 YSCALEYUV2RGBX
00665 "pxor %%mm7, %%mm7 \n\t"
00666 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t"
00667 "add %4, %%"REG_c" \n\t"
00668 WRITEBGR24(%%REGc, %5, %%REGa)
00669 :: "r" (&c->redDither),
00670 "m" (dummy), "m" (dummy), "m" (dummy),
00671 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00672 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00673 );
00674 }
00675
00676 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
00677 const int16_t **lumSrc, int lumFilterSize,
00678 const int16_t *chrFilter, const int16_t **chrUSrc,
00679 const int16_t **chrVSrc,
00680 int chrFilterSize, const int16_t **alpSrc,
00681 uint8_t *dest, int dstW, int dstY)
00682 {
00683 x86_reg dummy=0;
00684 x86_reg dstW_reg = dstW;
00685 x86_reg uv_off = c->uv_offx2;
00686
00687 YSCALEYUV2PACKEDX
00688 YSCALEYUV2RGBX
00689 "pxor %%mm7, %%mm7 \n\t"
00690 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t"
00691 "add %4, %%"REG_c" \n\t"
00692 WRITEBGR24(%%REGc, %5, %%REGa)
00693 :: "r" (&c->redDither),
00694 "m" (dummy), "m" (dummy), "m" (dummy),
00695 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00696 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00697 );
00698 }
00699
00700 #define REAL_WRITEYUY2(dst, dstw, index) \
00701 "packuswb %%mm3, %%mm3 \n\t"\
00702 "packuswb %%mm4, %%mm4 \n\t"\
00703 "packuswb %%mm7, %%mm1 \n\t"\
00704 "punpcklbw %%mm4, %%mm3 \n\t"\
00705 "movq %%mm1, %%mm7 \n\t"\
00706 "punpcklbw %%mm3, %%mm1 \n\t"\
00707 "punpckhbw %%mm3, %%mm7 \n\t"\
00708 \
00709 MOVNTQ(%%mm1, (dst, index, 2))\
00710 MOVNTQ(%%mm7, 8(dst, index, 2))\
00711 \
00712 "add $8, "#index" \n\t"\
00713 "cmp "#dstw", "#index" \n\t"\
00714 " jb 1b \n\t"
00715 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
00716
00717 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
00718 const int16_t **lumSrc, int lumFilterSize,
00719 const int16_t *chrFilter, const int16_t **chrUSrc,
00720 const int16_t **chrVSrc,
00721 int chrFilterSize, const int16_t **alpSrc,
00722 uint8_t *dest, int dstW, int dstY)
00723 {
00724 x86_reg dummy=0;
00725 x86_reg dstW_reg = dstW;
00726 x86_reg uv_off = c->uv_offx2;
00727
00728 YSCALEYUV2PACKEDX_ACCURATE
00729
00730 "psraw $3, %%mm3 \n\t"
00731 "psraw $3, %%mm4 \n\t"
00732 "psraw $3, %%mm1 \n\t"
00733 "psraw $3, %%mm7 \n\t"
00734 WRITEYUY2(%4, %5, %%REGa)
00735 YSCALEYUV2PACKEDX_END
00736 }
00737
00738 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
00739 const int16_t **lumSrc, int lumFilterSize,
00740 const int16_t *chrFilter, const int16_t **chrUSrc,
00741 const int16_t **chrVSrc,
00742 int chrFilterSize, const int16_t **alpSrc,
00743 uint8_t *dest, int dstW, int dstY)
00744 {
00745 x86_reg dummy=0;
00746 x86_reg dstW_reg = dstW;
00747 x86_reg uv_off = c->uv_offx2;
00748
00749 YSCALEYUV2PACKEDX
00750
00751 "psraw $3, %%mm3 \n\t"
00752 "psraw $3, %%mm4 \n\t"
00753 "psraw $3, %%mm1 \n\t"
00754 "psraw $3, %%mm7 \n\t"
00755 WRITEYUY2(%4, %5, %%REGa)
00756 YSCALEYUV2PACKEDX_END
00757 }
00758
00759 #define REAL_YSCALEYUV2RGB_UV(index, c) \
00760 "xor "#index", "#index" \n\t"\
00761 ".p2align 4 \n\t"\
00762 "1: \n\t"\
00763 "movq (%2, "#index"), %%mm2 \n\t" \
00764 "movq (%3, "#index"), %%mm3 \n\t" \
00765 "add "UV_OFFx2"("#c"), "#index" \n\t" \
00766 "movq (%2, "#index"), %%mm5 \n\t" \
00767 "movq (%3, "#index"), %%mm4 \n\t" \
00768 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
00769 "psubw %%mm3, %%mm2 \n\t" \
00770 "psubw %%mm4, %%mm5 \n\t" \
00771 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00772 "pmulhw %%mm0, %%mm2 \n\t" \
00773 "pmulhw %%mm0, %%mm5 \n\t" \
00774 "psraw $4, %%mm3 \n\t" \
00775 "psraw $4, %%mm4 \n\t" \
00776 "paddw %%mm2, %%mm3 \n\t" \
00777 "paddw %%mm5, %%mm4 \n\t" \
00778 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00779 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00780 "movq %%mm3, %%mm2 \n\t" \
00781 "movq %%mm4, %%mm5 \n\t" \
00782 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00783 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00784 \
00785
00786 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
00787 "movq ("#b1", "#index", 2), %%mm0 \n\t" \
00788 "movq ("#b2", "#index", 2), %%mm1 \n\t" \
00789 "movq 8("#b1", "#index", 2), %%mm6 \n\t" \
00790 "movq 8("#b2", "#index", 2), %%mm7 \n\t" \
00791 "psubw %%mm1, %%mm0 \n\t" \
00792 "psubw %%mm7, %%mm6 \n\t" \
00793 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00794 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00795 "psraw $4, %%mm1 \n\t" \
00796 "psraw $4, %%mm7 \n\t" \
00797 "paddw %%mm0, %%mm1 \n\t" \
00798 "paddw %%mm6, %%mm7 \n\t" \
00799
00800 #define REAL_YSCALEYUV2RGB_COEFF(c) \
00801 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00802 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00803 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00804 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00805 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00806 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00807 \
00808 "paddw %%mm3, %%mm4 \n\t"\
00809 "movq %%mm2, %%mm0 \n\t"\
00810 "movq %%mm5, %%mm6 \n\t"\
00811 "movq %%mm4, %%mm3 \n\t"\
00812 "punpcklwd %%mm2, %%mm2 \n\t"\
00813 "punpcklwd %%mm5, %%mm5 \n\t"\
00814 "punpcklwd %%mm4, %%mm4 \n\t"\
00815 "paddw %%mm1, %%mm2 \n\t"\
00816 "paddw %%mm1, %%mm5 \n\t"\
00817 "paddw %%mm1, %%mm4 \n\t"\
00818 "punpckhwd %%mm0, %%mm0 \n\t"\
00819 "punpckhwd %%mm6, %%mm6 \n\t"\
00820 "punpckhwd %%mm3, %%mm3 \n\t"\
00821 "paddw %%mm7, %%mm0 \n\t"\
00822 "paddw %%mm7, %%mm6 \n\t"\
00823 "paddw %%mm7, %%mm3 \n\t"\
00824 \
00825 "packuswb %%mm0, %%mm2 \n\t"\
00826 "packuswb %%mm6, %%mm5 \n\t"\
00827 "packuswb %%mm3, %%mm4 \n\t"\
00828
00829 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
00830
00831 #define YSCALEYUV2RGB(index, c) \
00832 REAL_YSCALEYUV2RGB_UV(index, c) \
00833 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
00834 REAL_YSCALEYUV2RGB_COEFF(c)
00835
00839 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
00840 const int16_t *ubuf[2], const int16_t *vbuf[2],
00841 const int16_t *abuf[2], uint8_t *dest,
00842 int dstW, int yalpha, int uvalpha, int y)
00843 {
00844 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00845 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00846
00847 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00848 const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
00849 #if ARCH_X86_64
00850 __asm__ volatile(
00851 YSCALEYUV2RGB(%%r8, %5)
00852 YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
00853 "psraw $3, %%mm1 \n\t"
00854 "psraw $3, %%mm7 \n\t"
00855 "packuswb %%mm7, %%mm1 \n\t"
00856 WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00857 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
00858 "a" (&c->redDither),
00859 "r" (abuf0), "r" (abuf1)
00860 : "%r8"
00861 );
00862 #else
00863 c->u_temp=(intptr_t)abuf0;
00864 c->v_temp=(intptr_t)abuf1;
00865 __asm__ volatile(
00866 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00867 "mov %4, %%"REG_b" \n\t"
00868 "push %%"REG_BP" \n\t"
00869 YSCALEYUV2RGB(%%REGBP, %5)
00870 "push %0 \n\t"
00871 "push %1 \n\t"
00872 "mov "U_TEMP"(%5), %0 \n\t"
00873 "mov "V_TEMP"(%5), %1 \n\t"
00874 YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
00875 "psraw $3, %%mm1 \n\t"
00876 "psraw $3, %%mm7 \n\t"
00877 "packuswb %%mm7, %%mm1 \n\t"
00878 "pop %1 \n\t"
00879 "pop %0 \n\t"
00880 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00881 "pop %%"REG_BP" \n\t"
00882 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00883 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00884 "a" (&c->redDither)
00885 );
00886 #endif
00887 } else {
00888 __asm__ volatile(
00889 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00890 "mov %4, %%"REG_b" \n\t"
00891 "push %%"REG_BP" \n\t"
00892 YSCALEYUV2RGB(%%REGBP, %5)
00893 "pcmpeqd %%mm7, %%mm7 \n\t"
00894 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00895 "pop %%"REG_BP" \n\t"
00896 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00897 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00898 "a" (&c->redDither)
00899 );
00900 }
00901 }
00902
00903 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
00904 const int16_t *ubuf[2], const int16_t *vbuf[2],
00905 const int16_t *abuf[2], uint8_t *dest,
00906 int dstW, int yalpha, int uvalpha, int y)
00907 {
00908 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00909 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00910
00911
00912 __asm__ volatile(
00913 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00914 "mov %4, %%"REG_b" \n\t"
00915 "push %%"REG_BP" \n\t"
00916 YSCALEYUV2RGB(%%REGBP, %5)
00917 "pxor %%mm7, %%mm7 \n\t"
00918 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
00919 "pop %%"REG_BP" \n\t"
00920 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00921 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00922 "a" (&c->redDither)
00923 );
00924 }
00925
00926 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
00927 const int16_t *ubuf[2], const int16_t *vbuf[2],
00928 const int16_t *abuf[2], uint8_t *dest,
00929 int dstW, int yalpha, int uvalpha, int y)
00930 {
00931 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00932 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00933
00934
00935 __asm__ volatile(
00936 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00937 "mov %4, %%"REG_b" \n\t"
00938 "push %%"REG_BP" \n\t"
00939 YSCALEYUV2RGB(%%REGBP, %5)
00940 "pxor %%mm7, %%mm7 \n\t"
00941
00942 #ifdef DITHER1XBPP
00943 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
00944 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
00945 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
00946 #endif
00947 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
00948 "pop %%"REG_BP" \n\t"
00949 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00950 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00951 "a" (&c->redDither)
00952 );
00953 }
00954
00955 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
00956 const int16_t *ubuf[2], const int16_t *vbuf[2],
00957 const int16_t *abuf[2], uint8_t *dest,
00958 int dstW, int yalpha, int uvalpha, int y)
00959 {
00960 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00961 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00962
00963
00964 __asm__ volatile(
00965 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00966 "mov %4, %%"REG_b" \n\t"
00967 "push %%"REG_BP" \n\t"
00968 YSCALEYUV2RGB(%%REGBP, %5)
00969 "pxor %%mm7, %%mm7 \n\t"
00970
00971 #ifdef DITHER1XBPP
00972 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
00973 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
00974 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
00975 #endif
00976 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
00977 "pop %%"REG_BP" \n\t"
00978 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00979 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00980 "a" (&c->redDither)
00981 );
00982 }
00983
00984 #define REAL_YSCALEYUV2PACKED(index, c) \
00985 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00986 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
00987 "psraw $3, %%mm0 \n\t"\
00988 "psraw $3, %%mm1 \n\t"\
00989 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00990 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00991 "xor "#index", "#index" \n\t"\
00992 ".p2align 4 \n\t"\
00993 "1: \n\t"\
00994 "movq (%2, "#index"), %%mm2 \n\t" \
00995 "movq (%3, "#index"), %%mm3 \n\t" \
00996 "add "UV_OFFx2"("#c"), "#index" \n\t" \
00997 "movq (%2, "#index"), %%mm5 \n\t" \
00998 "movq (%3, "#index"), %%mm4 \n\t" \
00999 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01000 "psubw %%mm3, %%mm2 \n\t" \
01001 "psubw %%mm4, %%mm5 \n\t" \
01002 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
01003 "pmulhw %%mm0, %%mm2 \n\t" \
01004 "pmulhw %%mm0, %%mm5 \n\t" \
01005 "psraw $7, %%mm3 \n\t" \
01006 "psraw $7, %%mm4 \n\t" \
01007 "paddw %%mm2, %%mm3 \n\t" \
01008 "paddw %%mm5, %%mm4 \n\t" \
01009 "movq (%0, "#index", 2), %%mm0 \n\t" \
01010 "movq (%1, "#index", 2), %%mm1 \n\t" \
01011 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
01012 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
01013 "psubw %%mm1, %%mm0 \n\t" \
01014 "psubw %%mm7, %%mm6 \n\t" \
01015 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
01016 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
01017 "psraw $7, %%mm1 \n\t" \
01018 "psraw $7, %%mm7 \n\t" \
01019 "paddw %%mm0, %%mm1 \n\t" \
01020 "paddw %%mm6, %%mm7 \n\t" \
01021
01022 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
01023
01024 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
01025 const int16_t *ubuf[2], const int16_t *vbuf[2],
01026 const int16_t *abuf[2], uint8_t *dest,
01027 int dstW, int yalpha, int uvalpha, int y)
01028 {
01029 const int16_t *buf0 = buf[0], *buf1 = buf[1],
01030 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01031
01032
01033 __asm__ volatile(
01034 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01035 "mov %4, %%"REG_b" \n\t"
01036 "push %%"REG_BP" \n\t"
01037 YSCALEYUV2PACKED(%%REGBP, %5)
01038 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01039 "pop %%"REG_BP" \n\t"
01040 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01041 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01042 "a" (&c->redDither)
01043 );
01044 }
01045
01046 #define REAL_YSCALEYUV2RGB1(index, c) \
01047 "xor "#index", "#index" \n\t"\
01048 ".p2align 4 \n\t"\
01049 "1: \n\t"\
01050 "movq (%2, "#index"), %%mm3 \n\t" \
01051 "add "UV_OFFx2"("#c"), "#index" \n\t" \
01052 "movq (%2, "#index"), %%mm4 \n\t" \
01053 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01054 "psraw $4, %%mm3 \n\t" \
01055 "psraw $4, %%mm4 \n\t" \
01056 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
01057 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
01058 "movq %%mm3, %%mm2 \n\t" \
01059 "movq %%mm4, %%mm5 \n\t" \
01060 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
01061 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
01062 \
01063 "movq (%0, "#index", 2), %%mm1 \n\t" \
01064 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01065 "psraw $4, %%mm1 \n\t" \
01066 "psraw $4, %%mm7 \n\t" \
01067 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
01068 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
01069 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
01070 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
01071 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
01072 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
01073 \
01074 "paddw %%mm3, %%mm4 \n\t"\
01075 "movq %%mm2, %%mm0 \n\t"\
01076 "movq %%mm5, %%mm6 \n\t"\
01077 "movq %%mm4, %%mm3 \n\t"\
01078 "punpcklwd %%mm2, %%mm2 \n\t"\
01079 "punpcklwd %%mm5, %%mm5 \n\t"\
01080 "punpcklwd %%mm4, %%mm4 \n\t"\
01081 "paddw %%mm1, %%mm2 \n\t"\
01082 "paddw %%mm1, %%mm5 \n\t"\
01083 "paddw %%mm1, %%mm4 \n\t"\
01084 "punpckhwd %%mm0, %%mm0 \n\t"\
01085 "punpckhwd %%mm6, %%mm6 \n\t"\
01086 "punpckhwd %%mm3, %%mm3 \n\t"\
01087 "paddw %%mm7, %%mm0 \n\t"\
01088 "paddw %%mm7, %%mm6 \n\t"\
01089 "paddw %%mm7, %%mm3 \n\t"\
01090 \
01091 "packuswb %%mm0, %%mm2 \n\t"\
01092 "packuswb %%mm6, %%mm5 \n\t"\
01093 "packuswb %%mm3, %%mm4 \n\t"\
01094
01095 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
01096
01097
01098 #define REAL_YSCALEYUV2RGB1b(index, c) \
01099 "xor "#index", "#index" \n\t"\
01100 ".p2align 4 \n\t"\
01101 "1: \n\t"\
01102 "movq (%2, "#index"), %%mm2 \n\t" \
01103 "movq (%3, "#index"), %%mm3 \n\t" \
01104 "add "UV_OFFx2"("#c"), "#index" \n\t" \
01105 "movq (%2, "#index"), %%mm5 \n\t" \
01106 "movq (%3, "#index"), %%mm4 \n\t" \
01107 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01108 "paddw %%mm2, %%mm3 \n\t" \
01109 "paddw %%mm5, %%mm4 \n\t" \
01110 "psrlw $5, %%mm3 \n\t" \
01111 "psrlw $5, %%mm4 \n\t" \
01112 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
01113 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
01114 "movq %%mm3, %%mm2 \n\t" \
01115 "movq %%mm4, %%mm5 \n\t" \
01116 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
01117 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
01118 \
01119 "movq (%0, "#index", 2), %%mm1 \n\t" \
01120 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01121 "psraw $4, %%mm1 \n\t" \
01122 "psraw $4, %%mm7 \n\t" \
01123 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
01124 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
01125 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
01126 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
01127 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
01128 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
01129 \
01130 "paddw %%mm3, %%mm4 \n\t"\
01131 "movq %%mm2, %%mm0 \n\t"\
01132 "movq %%mm5, %%mm6 \n\t"\
01133 "movq %%mm4, %%mm3 \n\t"\
01134 "punpcklwd %%mm2, %%mm2 \n\t"\
01135 "punpcklwd %%mm5, %%mm5 \n\t"\
01136 "punpcklwd %%mm4, %%mm4 \n\t"\
01137 "paddw %%mm1, %%mm2 \n\t"\
01138 "paddw %%mm1, %%mm5 \n\t"\
01139 "paddw %%mm1, %%mm4 \n\t"\
01140 "punpckhwd %%mm0, %%mm0 \n\t"\
01141 "punpckhwd %%mm6, %%mm6 \n\t"\
01142 "punpckhwd %%mm3, %%mm3 \n\t"\
01143 "paddw %%mm7, %%mm0 \n\t"\
01144 "paddw %%mm7, %%mm6 \n\t"\
01145 "paddw %%mm7, %%mm3 \n\t"\
01146 \
01147 "packuswb %%mm0, %%mm2 \n\t"\
01148 "packuswb %%mm6, %%mm5 \n\t"\
01149 "packuswb %%mm3, %%mm4 \n\t"\
01150
01151 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
01152
01153 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
01154 "movq (%1, "#index", 2), %%mm7 \n\t" \
01155 "movq 8(%1, "#index", 2), %%mm1 \n\t" \
01156 "psraw $7, %%mm7 \n\t" \
01157 "psraw $7, %%mm1 \n\t" \
01158 "packuswb %%mm1, %%mm7 \n\t"
01159 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
01160
01164 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
01165 const int16_t *ubuf[2], const int16_t *bguf[2],
01166 const int16_t *abuf0, uint8_t *dest,
01167 int dstW, int uvalpha, int y)
01168 {
01169 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01170 const int16_t *buf1= buf0;
01171
01172 if (uvalpha < 2048) {
01173 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01174 __asm__ volatile(
01175 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01176 "mov %4, %%"REG_b" \n\t"
01177 "push %%"REG_BP" \n\t"
01178 YSCALEYUV2RGB1(%%REGBP, %5)
01179 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01180 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01181 "pop %%"REG_BP" \n\t"
01182 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01183 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01184 "a" (&c->redDither)
01185 );
01186 } else {
01187 __asm__ volatile(
01188 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01189 "mov %4, %%"REG_b" \n\t"
01190 "push %%"REG_BP" \n\t"
01191 YSCALEYUV2RGB1(%%REGBP, %5)
01192 "pcmpeqd %%mm7, %%mm7 \n\t"
01193 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01194 "pop %%"REG_BP" \n\t"
01195 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01196 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01197 "a" (&c->redDither)
01198 );
01199 }
01200 } else {
01201 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01202 __asm__ volatile(
01203 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01204 "mov %4, %%"REG_b" \n\t"
01205 "push %%"REG_BP" \n\t"
01206 YSCALEYUV2RGB1b(%%REGBP, %5)
01207 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01208 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01209 "pop %%"REG_BP" \n\t"
01210 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01211 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01212 "a" (&c->redDither)
01213 );
01214 } else {
01215 __asm__ volatile(
01216 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01217 "mov %4, %%"REG_b" \n\t"
01218 "push %%"REG_BP" \n\t"
01219 YSCALEYUV2RGB1b(%%REGBP, %5)
01220 "pcmpeqd %%mm7, %%mm7 \n\t"
01221 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01222 "pop %%"REG_BP" \n\t"
01223 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01224 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01225 "a" (&c->redDither)
01226 );
01227 }
01228 }
01229 }
01230
01231 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
01232 const int16_t *ubuf[2], const int16_t *bguf[2],
01233 const int16_t *abuf0, uint8_t *dest,
01234 int dstW, int uvalpha, int y)
01235 {
01236 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01237 const int16_t *buf1= buf0;
01238
01239 if (uvalpha < 2048) {
01240 __asm__ volatile(
01241 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01242 "mov %4, %%"REG_b" \n\t"
01243 "push %%"REG_BP" \n\t"
01244 YSCALEYUV2RGB1(%%REGBP, %5)
01245 "pxor %%mm7, %%mm7 \n\t"
01246 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01247 "pop %%"REG_BP" \n\t"
01248 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01249 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01250 "a" (&c->redDither)
01251 );
01252 } else {
01253 __asm__ volatile(
01254 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01255 "mov %4, %%"REG_b" \n\t"
01256 "push %%"REG_BP" \n\t"
01257 YSCALEYUV2RGB1b(%%REGBP, %5)
01258 "pxor %%mm7, %%mm7 \n\t"
01259 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01260 "pop %%"REG_BP" \n\t"
01261 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01262 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01263 "a" (&c->redDither)
01264 );
01265 }
01266 }
01267
01268 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
01269 const int16_t *ubuf[2], const int16_t *bguf[2],
01270 const int16_t *abuf0, uint8_t *dest,
01271 int dstW, int uvalpha, int y)
01272 {
01273 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01274 const int16_t *buf1= buf0;
01275
01276 if (uvalpha < 2048) {
01277 __asm__ volatile(
01278 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01279 "mov %4, %%"REG_b" \n\t"
01280 "push %%"REG_BP" \n\t"
01281 YSCALEYUV2RGB1(%%REGBP, %5)
01282 "pxor %%mm7, %%mm7 \n\t"
01283
01284 #ifdef DITHER1XBPP
01285 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01286 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01287 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01288 #endif
01289 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01290 "pop %%"REG_BP" \n\t"
01291 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01292 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01293 "a" (&c->redDither)
01294 );
01295 } else {
01296 __asm__ volatile(
01297 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01298 "mov %4, %%"REG_b" \n\t"
01299 "push %%"REG_BP" \n\t"
01300 YSCALEYUV2RGB1b(%%REGBP, %5)
01301 "pxor %%mm7, %%mm7 \n\t"
01302
01303 #ifdef DITHER1XBPP
01304 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01305 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01306 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01307 #endif
01308 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01309 "pop %%"REG_BP" \n\t"
01310 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01311 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01312 "a" (&c->redDither)
01313 );
01314 }
01315 }
01316
01317 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
01318 const int16_t *ubuf[2], const int16_t *bguf[2],
01319 const int16_t *abuf0, uint8_t *dest,
01320 int dstW, int uvalpha, int y)
01321 {
01322 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01323 const int16_t *buf1= buf0;
01324
01325 if (uvalpha < 2048) {
01326 __asm__ volatile(
01327 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01328 "mov %4, %%"REG_b" \n\t"
01329 "push %%"REG_BP" \n\t"
01330 YSCALEYUV2RGB1(%%REGBP, %5)
01331 "pxor %%mm7, %%mm7 \n\t"
01332
01333 #ifdef DITHER1XBPP
01334 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01335 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01336 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01337 #endif
01338 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01339 "pop %%"REG_BP" \n\t"
01340 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01341 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01342 "a" (&c->redDither)
01343 );
01344 } else {
01345 __asm__ volatile(
01346 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01347 "mov %4, %%"REG_b" \n\t"
01348 "push %%"REG_BP" \n\t"
01349 YSCALEYUV2RGB1b(%%REGBP, %5)
01350 "pxor %%mm7, %%mm7 \n\t"
01351
01352 #ifdef DITHER1XBPP
01353 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01354 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01355 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01356 #endif
01357 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01358 "pop %%"REG_BP" \n\t"
01359 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01360 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01361 "a" (&c->redDither)
01362 );
01363 }
01364 }
01365
01366 #define REAL_YSCALEYUV2PACKED1(index, c) \
01367 "xor "#index", "#index" \n\t"\
01368 ".p2align 4 \n\t"\
01369 "1: \n\t"\
01370 "movq (%2, "#index"), %%mm3 \n\t" \
01371 "add "UV_OFFx2"("#c"), "#index" \n\t" \
01372 "movq (%2, "#index"), %%mm4 \n\t" \
01373 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01374 "psraw $7, %%mm3 \n\t" \
01375 "psraw $7, %%mm4 \n\t" \
01376 "movq (%0, "#index", 2), %%mm1 \n\t" \
01377 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01378 "psraw $7, %%mm1 \n\t" \
01379 "psraw $7, %%mm7 \n\t" \
01380
01381 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
01382
01383 #define REAL_YSCALEYUV2PACKED1b(index, c) \
01384 "xor "#index", "#index" \n\t"\
01385 ".p2align 4 \n\t"\
01386 "1: \n\t"\
01387 "movq (%2, "#index"), %%mm2 \n\t" \
01388 "movq (%3, "#index"), %%mm3 \n\t" \
01389 "add "UV_OFFx2"("#c"), "#index" \n\t" \
01390 "movq (%2, "#index"), %%mm5 \n\t" \
01391 "movq (%3, "#index"), %%mm4 \n\t" \
01392 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01393 "paddw %%mm2, %%mm3 \n\t" \
01394 "paddw %%mm5, %%mm4 \n\t" \
01395 "psrlw $8, %%mm3 \n\t" \
01396 "psrlw $8, %%mm4 \n\t" \
01397 "movq (%0, "#index", 2), %%mm1 \n\t" \
01398 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01399 "psraw $7, %%mm1 \n\t" \
01400 "psraw $7, %%mm7 \n\t"
01401 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
01402
01403 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
01404 const int16_t *ubuf[2], const int16_t *bguf[2],
01405 const int16_t *abuf0, uint8_t *dest,
01406 int dstW, int uvalpha, int y)
01407 {
01408 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01409 const int16_t *buf1= buf0;
01410
01411 if (uvalpha < 2048) {
01412 __asm__ volatile(
01413 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01414 "mov %4, %%"REG_b" \n\t"
01415 "push %%"REG_BP" \n\t"
01416 YSCALEYUV2PACKED1(%%REGBP, %5)
01417 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01418 "pop %%"REG_BP" \n\t"
01419 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01420 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01421 "a" (&c->redDither)
01422 );
01423 } else {
01424 __asm__ volatile(
01425 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01426 "mov %4, %%"REG_b" \n\t"
01427 "push %%"REG_BP" \n\t"
01428 YSCALEYUV2PACKED1b(%%REGBP, %5)
01429 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01430 "pop %%"REG_BP" \n\t"
01431 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01432 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01433 "a" (&c->redDither)
01434 );
01435 }
01436 }
01437
01438 #if !COMPILE_TEMPLATE_MMX2
01439
01440
01441 static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
01442 int width, uint32_t *unused)
01443 {
01444 __asm__ volatile(
01445 "movq "MANGLE(bm01010101)", %%mm2 \n\t"
01446 "mov %0, %%"REG_a" \n\t"
01447 "1: \n\t"
01448 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01449 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01450 "pand %%mm2, %%mm0 \n\t"
01451 "pand %%mm2, %%mm1 \n\t"
01452 "packuswb %%mm1, %%mm0 \n\t"
01453 "movq %%mm0, (%2, %%"REG_a") \n\t"
01454 "add $8, %%"REG_a" \n\t"
01455 " js 1b \n\t"
01456 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
01457 : "%"REG_a
01458 );
01459 }
01460
01461 static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
01462 const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
01463 int width, uint32_t *unused)
01464 {
01465 __asm__ volatile(
01466 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01467 "mov %0, %%"REG_a" \n\t"
01468 "1: \n\t"
01469 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01470 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01471 "psrlw $8, %%mm0 \n\t"
01472 "psrlw $8, %%mm1 \n\t"
01473 "packuswb %%mm1, %%mm0 \n\t"
01474 "movq %%mm0, %%mm1 \n\t"
01475 "psrlw $8, %%mm0 \n\t"
01476 "pand %%mm4, %%mm1 \n\t"
01477 "packuswb %%mm0, %%mm0 \n\t"
01478 "packuswb %%mm1, %%mm1 \n\t"
01479 "movd %%mm0, (%3, %%"REG_a") \n\t"
01480 "movd %%mm1, (%2, %%"REG_a") \n\t"
01481 "add $4, %%"REG_a" \n\t"
01482 " js 1b \n\t"
01483 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01484 : "%"REG_a
01485 );
01486 assert(src1 == src2);
01487 }
01488
01489
01490
01491 static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
01492 int width, uint32_t *unused)
01493 {
01494 __asm__ volatile(
01495 "mov %0, %%"REG_a" \n\t"
01496 "1: \n\t"
01497 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01498 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01499 "psrlw $8, %%mm0 \n\t"
01500 "psrlw $8, %%mm1 \n\t"
01501 "packuswb %%mm1, %%mm0 \n\t"
01502 "movq %%mm0, (%2, %%"REG_a") \n\t"
01503 "add $8, %%"REG_a" \n\t"
01504 " js 1b \n\t"
01505 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
01506 : "%"REG_a
01507 );
01508 }
01509
01510 static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV,
01511 const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
01512 int width, uint32_t *unused)
01513 {
01514 __asm__ volatile(
01515 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01516 "mov %0, %%"REG_a" \n\t"
01517 "1: \n\t"
01518 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01519 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01520 "pand %%mm4, %%mm0 \n\t"
01521 "pand %%mm4, %%mm1 \n\t"
01522 "packuswb %%mm1, %%mm0 \n\t"
01523 "movq %%mm0, %%mm1 \n\t"
01524 "psrlw $8, %%mm0 \n\t"
01525 "pand %%mm4, %%mm1 \n\t"
01526 "packuswb %%mm0, %%mm0 \n\t"
01527 "packuswb %%mm1, %%mm1 \n\t"
01528 "movd %%mm0, (%3, %%"REG_a") \n\t"
01529 "movd %%mm1, (%2, %%"REG_a") \n\t"
01530 "add $4, %%"REG_a" \n\t"
01531 " js 1b \n\t"
01532 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01533 : "%"REG_a
01534 );
01535 assert(src1 == src2);
01536 }
01537
01538 static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
01539 const uint8_t *src, int width)
01540 {
01541 __asm__ volatile(
01542 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01543 "mov %0, %%"REG_a" \n\t"
01544 "1: \n\t"
01545 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01546 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01547 "movq %%mm0, %%mm2 \n\t"
01548 "movq %%mm1, %%mm3 \n\t"
01549 "pand %%mm4, %%mm0 \n\t"
01550 "pand %%mm4, %%mm1 \n\t"
01551 "psrlw $8, %%mm2 \n\t"
01552 "psrlw $8, %%mm3 \n\t"
01553 "packuswb %%mm1, %%mm0 \n\t"
01554 "packuswb %%mm3, %%mm2 \n\t"
01555 "movq %%mm0, (%2, %%"REG_a") \n\t"
01556 "movq %%mm2, (%3, %%"REG_a") \n\t"
01557 "add $8, %%"REG_a" \n\t"
01558 " js 1b \n\t"
01559 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
01560 : "%"REG_a
01561 );
01562 }
01563
01564 static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
01565 const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
01566 int width, uint32_t *unused)
01567 {
01568 RENAME(nvXXtoUV)(dstU, dstV, src1, width);
01569 }
01570
01571 static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
01572 const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
01573 int width, uint32_t *unused)
01574 {
01575 RENAME(nvXXtoUV)(dstV, dstU, src1, width);
01576 }
01577 #endif
01578
01579 static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src,
01580 int width, enum PixelFormat srcFormat)
01581 {
01582
01583 if(srcFormat == PIX_FMT_BGR24) {
01584 __asm__ volatile(
01585 "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
01586 "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
01587 :
01588 );
01589 } else {
01590 __asm__ volatile(
01591 "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
01592 "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
01593 :
01594 );
01595 }
01596
01597 __asm__ volatile(
01598 "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t"
01599 "mov %2, %%"REG_a" \n\t"
01600 "pxor %%mm7, %%mm7 \n\t"
01601 "1: \n\t"
01602 PREFETCH" 64(%0) \n\t"
01603 "movd (%0), %%mm0 \n\t"
01604 "movd 2(%0), %%mm1 \n\t"
01605 "movd 6(%0), %%mm2 \n\t"
01606 "movd 8(%0), %%mm3 \n\t"
01607 "add $12, %0 \n\t"
01608 "punpcklbw %%mm7, %%mm0 \n\t"
01609 "punpcklbw %%mm7, %%mm1 \n\t"
01610 "punpcklbw %%mm7, %%mm2 \n\t"
01611 "punpcklbw %%mm7, %%mm3 \n\t"
01612 "pmaddwd %%mm5, %%mm0 \n\t"
01613 "pmaddwd %%mm6, %%mm1 \n\t"
01614 "pmaddwd %%mm5, %%mm2 \n\t"
01615 "pmaddwd %%mm6, %%mm3 \n\t"
01616 "paddd %%mm1, %%mm0 \n\t"
01617 "paddd %%mm3, %%mm2 \n\t"
01618 "paddd %%mm4, %%mm0 \n\t"
01619 "paddd %%mm4, %%mm2 \n\t"
01620 "psrad $9, %%mm0 \n\t"
01621 "psrad $9, %%mm2 \n\t"
01622 "packssdw %%mm2, %%mm0 \n\t"
01623 "movq %%mm0, (%1, %%"REG_a") \n\t"
01624 "add $8, %%"REG_a" \n\t"
01625 " js 1b \n\t"
01626 : "+r" (src)
01627 : "r" (dst+width), "g" ((x86_reg)-2*width)
01628 : "%"REG_a
01629 );
01630 }
01631
01632 static void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
01633 int width, uint32_t *unused)
01634 {
01635 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
01636 }
01637
01638 static void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
01639 int width, uint32_t *unused)
01640 {
01641 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
01642 }
01643
01644 static av_always_inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV,
01645 const uint8_t *src, int width,
01646 enum PixelFormat srcFormat)
01647 {
01648 __asm__ volatile(
01649 "movq 24(%4), %%mm6 \n\t"
01650 "mov %3, %%"REG_a" \n\t"
01651 "pxor %%mm7, %%mm7 \n\t"
01652 "1: \n\t"
01653 PREFETCH" 64(%0) \n\t"
01654 "movd (%0), %%mm0 \n\t"
01655 "movd 2(%0), %%mm1 \n\t"
01656 "punpcklbw %%mm7, %%mm0 \n\t"
01657 "punpcklbw %%mm7, %%mm1 \n\t"
01658 "movq %%mm0, %%mm2 \n\t"
01659 "movq %%mm1, %%mm3 \n\t"
01660 "pmaddwd (%4), %%mm0 \n\t"
01661 "pmaddwd 8(%4), %%mm1 \n\t"
01662 "pmaddwd 16(%4), %%mm2 \n\t"
01663 "pmaddwd %%mm6, %%mm3 \n\t"
01664 "paddd %%mm1, %%mm0 \n\t"
01665 "paddd %%mm3, %%mm2 \n\t"
01666
01667 "movd 6(%0), %%mm1 \n\t"
01668 "movd 8(%0), %%mm3 \n\t"
01669 "add $12, %0 \n\t"
01670 "punpcklbw %%mm7, %%mm1 \n\t"
01671 "punpcklbw %%mm7, %%mm3 \n\t"
01672 "movq %%mm1, %%mm4 \n\t"
01673 "movq %%mm3, %%mm5 \n\t"
01674 "pmaddwd (%4), %%mm1 \n\t"
01675 "pmaddwd 8(%4), %%mm3 \n\t"
01676 "pmaddwd 16(%4), %%mm4 \n\t"
01677 "pmaddwd %%mm6, %%mm5 \n\t"
01678 "paddd %%mm3, %%mm1 \n\t"
01679 "paddd %%mm5, %%mm4 \n\t"
01680
01681 "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t"
01682 "paddd %%mm3, %%mm0 \n\t"
01683 "paddd %%mm3, %%mm2 \n\t"
01684 "paddd %%mm3, %%mm1 \n\t"
01685 "paddd %%mm3, %%mm4 \n\t"
01686 "psrad $9, %%mm0 \n\t"
01687 "psrad $9, %%mm2 \n\t"
01688 "psrad $9, %%mm1 \n\t"
01689 "psrad $9, %%mm4 \n\t"
01690 "packssdw %%mm1, %%mm0 \n\t"
01691 "packssdw %%mm4, %%mm2 \n\t"
01692 "movq %%mm0, (%1, %%"REG_a") \n\t"
01693 "movq %%mm2, (%2, %%"REG_a") \n\t"
01694 "add $8, %%"REG_a" \n\t"
01695 " js 1b \n\t"
01696 : "+r" (src)
01697 : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
01698 : "%"REG_a
01699 );
01700 }
01701
01702 static void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV,
01703 const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
01704 int width, uint32_t *unused)
01705 {
01706 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
01707 assert(src1 == src2);
01708 }
01709
01710 static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV,
01711 const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
01712 int width, uint32_t *unused)
01713 {
01714 assert(src1==src2);
01715 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
01716 }
01717
01718 #if COMPILE_TEMPLATE_MMX2
01719 static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
01720 int dstWidth, const uint8_t *src,
01721 int srcW, int xInc)
01722 {
01723 int16_t *filterPos = c->hLumFilterPos;
01724 int16_t *filter = c->hLumFilter;
01725 void *mmx2FilterCode= c->lumMmx2FilterCode;
01726 int i;
01727 #if defined(PIC)
01728 DECLARE_ALIGNED(8, uint64_t, ebxsave);
01729 #endif
01730
01731 __asm__ volatile(
01732 #if defined(PIC)
01733 "mov %%"REG_b", %5 \n\t"
01734 #endif
01735 "pxor %%mm7, %%mm7 \n\t"
01736 "mov %0, %%"REG_c" \n\t"
01737 "mov %1, %%"REG_D" \n\t"
01738 "mov %2, %%"REG_d" \n\t"
01739 "mov %3, %%"REG_b" \n\t"
01740 "xor %%"REG_a", %%"REG_a" \n\t"
01741 PREFETCH" (%%"REG_c") \n\t"
01742 PREFETCH" 32(%%"REG_c") \n\t"
01743 PREFETCH" 64(%%"REG_c") \n\t"
01744
01745 #if ARCH_X86_64
01746 #define CALL_MMX2_FILTER_CODE \
01747 "movl (%%"REG_b"), %%esi \n\t"\
01748 "call *%4 \n\t"\
01749 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
01750 "add %%"REG_S", %%"REG_c" \n\t"\
01751 "add %%"REG_a", %%"REG_D" \n\t"\
01752 "xor %%"REG_a", %%"REG_a" \n\t"\
01753
01754 #else
01755 #define CALL_MMX2_FILTER_CODE \
01756 "movl (%%"REG_b"), %%esi \n\t"\
01757 "call *%4 \n\t"\
01758 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
01759 "add %%"REG_a", %%"REG_D" \n\t"\
01760 "xor %%"REG_a", %%"REG_a" \n\t"\
01761
01762 #endif
01763
01764 CALL_MMX2_FILTER_CODE
01765 CALL_MMX2_FILTER_CODE
01766 CALL_MMX2_FILTER_CODE
01767 CALL_MMX2_FILTER_CODE
01768 CALL_MMX2_FILTER_CODE
01769 CALL_MMX2_FILTER_CODE
01770 CALL_MMX2_FILTER_CODE
01771 CALL_MMX2_FILTER_CODE
01772
01773 #if defined(PIC)
01774 "mov %5, %%"REG_b" \n\t"
01775 #endif
01776 :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
01777 "m" (mmx2FilterCode)
01778 #if defined(PIC)
01779 ,"m" (ebxsave)
01780 #endif
01781 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
01782 #if !defined(PIC)
01783 ,"%"REG_b
01784 #endif
01785 );
01786
01787 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
01788 dst[i] = src[srcW-1]*128;
01789 }
01790
01791 static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
01792 int dstWidth, const uint8_t *src1,
01793 const uint8_t *src2, int srcW, int xInc)
01794 {
01795 int16_t *filterPos = c->hChrFilterPos;
01796 int16_t *filter = c->hChrFilter;
01797 void *mmx2FilterCode= c->chrMmx2FilterCode;
01798 int i;
01799 #if defined(PIC)
01800 DECLARE_ALIGNED(8, uint64_t, ebxsave);
01801 #endif
01802
01803 __asm__ volatile(
01804 #if defined(PIC)
01805 "mov %%"REG_b", %7 \n\t"
01806 #endif
01807 "pxor %%mm7, %%mm7 \n\t"
01808 "mov %0, %%"REG_c" \n\t"
01809 "mov %1, %%"REG_D" \n\t"
01810 "mov %2, %%"REG_d" \n\t"
01811 "mov %3, %%"REG_b" \n\t"
01812 "xor %%"REG_a", %%"REG_a" \n\t"
01813 PREFETCH" (%%"REG_c") \n\t"
01814 PREFETCH" 32(%%"REG_c") \n\t"
01815 PREFETCH" 64(%%"REG_c") \n\t"
01816
01817 CALL_MMX2_FILTER_CODE
01818 CALL_MMX2_FILTER_CODE
01819 CALL_MMX2_FILTER_CODE
01820 CALL_MMX2_FILTER_CODE
01821 "xor %%"REG_a", %%"REG_a" \n\t"
01822 "mov %5, %%"REG_c" \n\t"
01823 "mov %6, %%"REG_D" \n\t"
01824 PREFETCH" (%%"REG_c") \n\t"
01825 PREFETCH" 32(%%"REG_c") \n\t"
01826 PREFETCH" 64(%%"REG_c") \n\t"
01827
01828 CALL_MMX2_FILTER_CODE
01829 CALL_MMX2_FILTER_CODE
01830 CALL_MMX2_FILTER_CODE
01831 CALL_MMX2_FILTER_CODE
01832
01833 #if defined(PIC)
01834 "mov %7, %%"REG_b" \n\t"
01835 #endif
01836 :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
01837 "m" (mmx2FilterCode), "m" (src2), "m"(dst2)
01838 #if defined(PIC)
01839 ,"m" (ebxsave)
01840 #endif
01841 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
01842 #if !defined(PIC)
01843 ,"%"REG_b
01844 #endif
01845 );
01846
01847 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
01848 dst1[i] = src1[srcW-1]*128;
01849 dst2[i] = src2[srcW-1]*128;
01850 }
01851 }
01852 #endif
01853
01854 static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
01855 {
01856 enum PixelFormat srcFormat = c->srcFormat,
01857 dstFormat = c->dstFormat;
01858 c->use_mmx_vfilter= 0;
01859 if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12
01860 && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
01861 if (c->flags & SWS_ACCURATE_RND) {
01862 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01863 switch (c->dstFormat) {
01864 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
01865 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
01866 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
01867 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
01868 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
01869 default: break;
01870 }
01871 }
01872 } else {
01873 c->use_mmx_vfilter= 1;
01874 c->yuv2planeX = RENAME(yuv2yuvX );
01875 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01876 switch (c->dstFormat) {
01877 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
01878 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
01879 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
01880 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
01881 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
01882 default: break;
01883 }
01884 }
01885 }
01886 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01887 switch (c->dstFormat) {
01888 case PIX_FMT_RGB32:
01889 c->yuv2packed1 = RENAME(yuv2rgb32_1);
01890 c->yuv2packed2 = RENAME(yuv2rgb32_2);
01891 break;
01892 case PIX_FMT_BGR24:
01893 c->yuv2packed1 = RENAME(yuv2bgr24_1);
01894 c->yuv2packed2 = RENAME(yuv2bgr24_2);
01895 break;
01896 case PIX_FMT_RGB555:
01897 c->yuv2packed1 = RENAME(yuv2rgb555_1);
01898 c->yuv2packed2 = RENAME(yuv2rgb555_2);
01899 break;
01900 case PIX_FMT_RGB565:
01901 c->yuv2packed1 = RENAME(yuv2rgb565_1);
01902 c->yuv2packed2 = RENAME(yuv2rgb565_2);
01903 break;
01904 case PIX_FMT_YUYV422:
01905 c->yuv2packed1 = RENAME(yuv2yuyv422_1);
01906 c->yuv2packed2 = RENAME(yuv2yuyv422_2);
01907 break;
01908 default:
01909 break;
01910 }
01911 }
01912 }
01913
01914 if (c->srcBpc == 8 && c->dstBpc <= 10) {
01915
01916 #if COMPILE_TEMPLATE_MMX2
01917 if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
01918 {
01919 c->hyscale_fast = RENAME(hyscale_fast);
01920 c->hcscale_fast = RENAME(hcscale_fast);
01921 } else {
01922 #endif
01923 c->hyscale_fast = NULL;
01924 c->hcscale_fast = NULL;
01925 #if COMPILE_TEMPLATE_MMX2
01926 }
01927 #endif
01928 }
01929
01930 #if !COMPILE_TEMPLATE_MMX2
01931 switch(srcFormat) {
01932 case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break;
01933 case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break;
01934 case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break;
01935 case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break;
01936 default: break;
01937 }
01938 #endif
01939 if (!c->chrSrcHSubSample) {
01940 switch(srcFormat) {
01941 case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
01942 case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break;
01943 default: break;
01944 }
01945 }
01946
01947 switch (srcFormat) {
01948 #if !COMPILE_TEMPLATE_MMX2
01949 case PIX_FMT_YUYV422 :
01950 case PIX_FMT_Y400A : c->lumToYV12 = RENAME(yuy2ToY); break;
01951 case PIX_FMT_UYVY422 : c->lumToYV12 = RENAME(uyvyToY); break;
01952 #endif
01953 case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
01954 case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
01955 default: break;
01956 }
01957 #if !COMPILE_TEMPLATE_MMX2
01958 if (c->alpPixBuf) {
01959 switch (srcFormat) {
01960 case PIX_FMT_Y400A : c->alpToYV12 = RENAME(yuy2ToY); break;
01961 default: break;
01962 }
01963 }
01964 #endif
01965 }