00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #undef REAL_MOVNTQ
00022 #undef MOVNTQ
00023 #undef MOVNTQ2
00024 #undef PREFETCH
00025
00026 #if COMPILE_TEMPLATE_MMX2
00027 #define PREFETCH "prefetchnta"
00028 #else
00029 #define PREFETCH " # nop"
00030 #endif
00031
00032 #if COMPILE_TEMPLATE_MMX2
00033 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
00034 #define MOVNTQ2 "movntq "
00035 #else
00036 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
00037 #define MOVNTQ2 "movq "
00038 #endif
00039 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
00040
00041 #if !COMPILE_TEMPLATE_MMX2
00042 static av_always_inline void
00043 dither_8to16(const uint8_t *srcDither, int rot)
00044 {
00045 if (rot) {
00046 __asm__ volatile("pxor %%mm0, %%mm0\n\t"
00047 "movq (%0), %%mm3\n\t"
00048 "movq %%mm3, %%mm4\n\t"
00049 "psrlq $24, %%mm3\n\t"
00050 "psllq $40, %%mm4\n\t"
00051 "por %%mm4, %%mm3\n\t"
00052 "movq %%mm3, %%mm4\n\t"
00053 "punpcklbw %%mm0, %%mm3\n\t"
00054 "punpckhbw %%mm0, %%mm4\n\t"
00055 :: "r"(srcDither)
00056 );
00057 } else {
00058 __asm__ volatile("pxor %%mm0, %%mm0\n\t"
00059 "movq (%0), %%mm3\n\t"
00060 "movq %%mm3, %%mm4\n\t"
00061 "punpcklbw %%mm0, %%mm3\n\t"
00062 "punpckhbw %%mm0, %%mm4\n\t"
00063 :: "r"(srcDither)
00064 );
00065 }
00066 }
00067 #endif
00068
00069 static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
00070 const int16_t **src, uint8_t *dest, int dstW,
00071 const uint8_t *dither, int offset)
00072 {
00073 dither_8to16(dither, offset);
00074 __asm__ volatile(\
00075 "psraw $4, %%mm3\n\t"
00076 "psraw $4, %%mm4\n\t"
00077 "movq %%mm3, %%mm6\n\t"
00078 "movq %%mm4, %%mm7\n\t"
00079 "movl %3, %%ecx\n\t"
00080 "mov %0, %%"REG_d" \n\t"\
00081 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00082 ".p2align 4 \n\t" \
00083 "1: \n\t"\
00084 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00085 "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" \
00086 "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" \
00087 "add $16, %%"REG_d" \n\t"\
00088 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00089 "test %%"REG_S", %%"REG_S" \n\t"\
00090 "pmulhw %%mm0, %%mm2 \n\t"\
00091 "pmulhw %%mm0, %%mm5 \n\t"\
00092 "paddw %%mm2, %%mm3 \n\t"\
00093 "paddw %%mm5, %%mm4 \n\t"\
00094 " jnz 1b \n\t"\
00095 "psraw $3, %%mm3 \n\t"\
00096 "psraw $3, %%mm4 \n\t"\
00097 "packuswb %%mm4, %%mm3 \n\t"
00098 MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t"
00099 "add $8, %%"REG_c" \n\t"\
00100 "cmp %2, %%"REG_c" \n\t"\
00101 "movq %%mm6, %%mm3\n\t"
00102 "movq %%mm7, %%mm4\n\t"
00103 "mov %0, %%"REG_d" \n\t"\
00104 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00105 "jb 1b \n\t"\
00106 :: "g" (filter),
00107 "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
00108 : "%"REG_d, "%"REG_S, "%"REG_c
00109 );
00110 }
00111
00112 #define YSCALEYUV2PACKEDX_UV \
00113 __asm__ volatile(\
00114 "xor %%"REG_a", %%"REG_a" \n\t"\
00115 ".p2align 4 \n\t"\
00116 "nop \n\t"\
00117 "1: \n\t"\
00118 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00119 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00120 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00121 "movq %%mm3, %%mm4 \n\t"\
00122 ".p2align 4 \n\t"\
00123 "2: \n\t"\
00124 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00125 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00126 "add %6, %%"REG_S" \n\t" \
00127 "movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" \
00128 "add $16, %%"REG_d" \n\t"\
00129 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00130 "pmulhw %%mm0, %%mm2 \n\t"\
00131 "pmulhw %%mm0, %%mm5 \n\t"\
00132 "paddw %%mm2, %%mm3 \n\t"\
00133 "paddw %%mm5, %%mm4 \n\t"\
00134 "test %%"REG_S", %%"REG_S" \n\t"\
00135 " jnz 2b \n\t"\
00136
00137 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
00138 "lea "offset"(%0), %%"REG_d" \n\t"\
00139 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00140 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
00141 "movq "#dst1", "#dst2" \n\t"\
00142 ".p2align 4 \n\t"\
00143 "2: \n\t"\
00144 "movq 8(%%"REG_d"), "#coeff" \n\t" \
00145 "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" \
00146 "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" \
00147 "add $16, %%"REG_d" \n\t"\
00148 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00149 "pmulhw "#coeff", "#src1" \n\t"\
00150 "pmulhw "#coeff", "#src2" \n\t"\
00151 "paddw "#src1", "#dst1" \n\t"\
00152 "paddw "#src2", "#dst2" \n\t"\
00153 "test %%"REG_S", %%"REG_S" \n\t"\
00154 " jnz 2b \n\t"\
00155
00156 #define YSCALEYUV2PACKEDX \
00157 YSCALEYUV2PACKEDX_UV \
00158 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
00159
00160 #define YSCALEYUV2PACKEDX_END \
00161 :: "r" (&c->redDither), \
00162 "m" (dummy), "m" (dummy), "m" (dummy),\
00163 "r" (dest), "m" (dstW_reg), "m"(uv_off) \
00164 : "%"REG_a, "%"REG_d, "%"REG_S \
00165 );
00166
00167 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
00168 __asm__ volatile(\
00169 "xor %%"REG_a", %%"REG_a" \n\t"\
00170 ".p2align 4 \n\t"\
00171 "nop \n\t"\
00172 "1: \n\t"\
00173 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00174 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00175 "pxor %%mm4, %%mm4 \n\t"\
00176 "pxor %%mm5, %%mm5 \n\t"\
00177 "pxor %%mm6, %%mm6 \n\t"\
00178 "pxor %%mm7, %%mm7 \n\t"\
00179 ".p2align 4 \n\t"\
00180 "2: \n\t"\
00181 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" \
00182 "add %6, %%"REG_S" \n\t" \
00183 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00184 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00185 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" \
00186 "movq %%mm0, %%mm3 \n\t"\
00187 "punpcklwd %%mm1, %%mm0 \n\t"\
00188 "punpckhwd %%mm1, %%mm3 \n\t"\
00189 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" \
00190 "pmaddwd %%mm1, %%mm0 \n\t"\
00191 "pmaddwd %%mm1, %%mm3 \n\t"\
00192 "paddd %%mm0, %%mm4 \n\t"\
00193 "paddd %%mm3, %%mm5 \n\t"\
00194 "add %6, %%"REG_S" \n\t" \
00195 "movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" \
00196 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00197 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00198 "test %%"REG_S", %%"REG_S" \n\t"\
00199 "movq %%mm2, %%mm0 \n\t"\
00200 "punpcklwd %%mm3, %%mm2 \n\t"\
00201 "punpckhwd %%mm3, %%mm0 \n\t"\
00202 "pmaddwd %%mm1, %%mm2 \n\t"\
00203 "pmaddwd %%mm1, %%mm0 \n\t"\
00204 "paddd %%mm2, %%mm6 \n\t"\
00205 "paddd %%mm0, %%mm7 \n\t"\
00206 " jnz 2b \n\t"\
00207 "psrad $16, %%mm4 \n\t"\
00208 "psrad $16, %%mm5 \n\t"\
00209 "psrad $16, %%mm6 \n\t"\
00210 "psrad $16, %%mm7 \n\t"\
00211 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00212 "packssdw %%mm5, %%mm4 \n\t"\
00213 "packssdw %%mm7, %%mm6 \n\t"\
00214 "paddw %%mm0, %%mm4 \n\t"\
00215 "paddw %%mm0, %%mm6 \n\t"\
00216 "movq %%mm4, "U_TEMP"(%0) \n\t"\
00217 "movq %%mm6, "V_TEMP"(%0) \n\t"\
00218
00219 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
00220 "lea "offset"(%0), %%"REG_d" \n\t"\
00221 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00222 "pxor %%mm1, %%mm1 \n\t"\
00223 "pxor %%mm5, %%mm5 \n\t"\
00224 "pxor %%mm7, %%mm7 \n\t"\
00225 "pxor %%mm6, %%mm6 \n\t"\
00226 ".p2align 4 \n\t"\
00227 "2: \n\t"\
00228 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00229 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00230 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00231 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" \
00232 "movq %%mm0, %%mm3 \n\t"\
00233 "punpcklwd %%mm4, %%mm0 \n\t"\
00234 "punpckhwd %%mm4, %%mm3 \n\t"\
00235 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" \
00236 "pmaddwd %%mm4, %%mm0 \n\t"\
00237 "pmaddwd %%mm4, %%mm3 \n\t"\
00238 "paddd %%mm0, %%mm1 \n\t"\
00239 "paddd %%mm3, %%mm5 \n\t"\
00240 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00241 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00242 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00243 "test %%"REG_S", %%"REG_S" \n\t"\
00244 "movq %%mm2, %%mm0 \n\t"\
00245 "punpcklwd %%mm3, %%mm2 \n\t"\
00246 "punpckhwd %%mm3, %%mm0 \n\t"\
00247 "pmaddwd %%mm4, %%mm2 \n\t"\
00248 "pmaddwd %%mm4, %%mm0 \n\t"\
00249 "paddd %%mm2, %%mm7 \n\t"\
00250 "paddd %%mm0, %%mm6 \n\t"\
00251 " jnz 2b \n\t"\
00252 "psrad $16, %%mm1 \n\t"\
00253 "psrad $16, %%mm5 \n\t"\
00254 "psrad $16, %%mm7 \n\t"\
00255 "psrad $16, %%mm6 \n\t"\
00256 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00257 "packssdw %%mm5, %%mm1 \n\t"\
00258 "packssdw %%mm6, %%mm7 \n\t"\
00259 "paddw %%mm0, %%mm1 \n\t"\
00260 "paddw %%mm0, %%mm7 \n\t"\
00261 "movq "U_TEMP"(%0), %%mm3 \n\t"\
00262 "movq "V_TEMP"(%0), %%mm4 \n\t"\
00263
00264 #define YSCALEYUV2PACKEDX_ACCURATE \
00265 YSCALEYUV2PACKEDX_ACCURATE_UV \
00266 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
00267
00268 #define YSCALEYUV2RGBX \
00269 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
00270 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
00271 "movq %%mm3, %%mm2 \n\t" \
00272 "movq %%mm4, %%mm5 \n\t" \
00273 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
00274 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
00275 \
00276 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
00277 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
00278 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
00279 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
00280 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
00281 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
00282 \
00283 "paddw %%mm3, %%mm4 \n\t"\
00284 "movq %%mm2, %%mm0 \n\t"\
00285 "movq %%mm5, %%mm6 \n\t"\
00286 "movq %%mm4, %%mm3 \n\t"\
00287 "punpcklwd %%mm2, %%mm2 \n\t"\
00288 "punpcklwd %%mm5, %%mm5 \n\t"\
00289 "punpcklwd %%mm4, %%mm4 \n\t"\
00290 "paddw %%mm1, %%mm2 \n\t"\
00291 "paddw %%mm1, %%mm5 \n\t"\
00292 "paddw %%mm1, %%mm4 \n\t"\
00293 "punpckhwd %%mm0, %%mm0 \n\t"\
00294 "punpckhwd %%mm6, %%mm6 \n\t"\
00295 "punpckhwd %%mm3, %%mm3 \n\t"\
00296 "paddw %%mm7, %%mm0 \n\t"\
00297 "paddw %%mm7, %%mm6 \n\t"\
00298 "paddw %%mm7, %%mm3 \n\t"\
00299 \
00300 "packuswb %%mm0, %%mm2 \n\t"\
00301 "packuswb %%mm6, %%mm5 \n\t"\
00302 "packuswb %%mm3, %%mm4 \n\t"\
00303
00304 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
00305 "movq "#b", "#q2" \n\t" \
00306 "movq "#r", "#t" \n\t" \
00307 "punpcklbw "#g", "#b" \n\t" \
00308 "punpcklbw "#a", "#r" \n\t" \
00309 "punpckhbw "#g", "#q2" \n\t" \
00310 "punpckhbw "#a", "#t" \n\t" \
00311 "movq "#b", "#q0" \n\t" \
00312 "movq "#q2", "#q3" \n\t" \
00313 "punpcklwd "#r", "#q0" \n\t" \
00314 "punpckhwd "#r", "#b" \n\t" \
00315 "punpcklwd "#t", "#q2" \n\t" \
00316 "punpckhwd "#t", "#q3" \n\t" \
00317 \
00318 MOVNTQ( q0, (dst, index, 4))\
00319 MOVNTQ( b, 8(dst, index, 4))\
00320 MOVNTQ( q2, 16(dst, index, 4))\
00321 MOVNTQ( q3, 24(dst, index, 4))\
00322 \
00323 "add $8, "#index" \n\t"\
00324 "cmp "#dstw", "#index" \n\t"\
00325 " jb 1b \n\t"
00326 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
00327
00328 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
00329 const int16_t **lumSrc, int lumFilterSize,
00330 const int16_t *chrFilter, const int16_t **chrUSrc,
00331 const int16_t **chrVSrc,
00332 int chrFilterSize, const int16_t **alpSrc,
00333 uint8_t *dest, int dstW, int dstY)
00334 {
00335 x86_reg dummy=0;
00336 x86_reg dstW_reg = dstW;
00337 x86_reg uv_off = c->uv_offx2;
00338
00339 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00340 YSCALEYUV2PACKEDX_ACCURATE
00341 YSCALEYUV2RGBX
00342 "movq %%mm2, "U_TEMP"(%0) \n\t"
00343 "movq %%mm4, "V_TEMP"(%0) \n\t"
00344 "movq %%mm5, "Y_TEMP"(%0) \n\t"
00345 YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
00346 "movq "Y_TEMP"(%0), %%mm5 \n\t"
00347 "psraw $3, %%mm1 \n\t"
00348 "psraw $3, %%mm7 \n\t"
00349 "packuswb %%mm7, %%mm1 \n\t"
00350 WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
00351 YSCALEYUV2PACKEDX_END
00352 } else {
00353 YSCALEYUV2PACKEDX_ACCURATE
00354 YSCALEYUV2RGBX
00355 "pcmpeqd %%mm7, %%mm7 \n\t"
00356 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00357 YSCALEYUV2PACKEDX_END
00358 }
00359 }
00360
00361 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
00362 const int16_t **lumSrc, int lumFilterSize,
00363 const int16_t *chrFilter, const int16_t **chrUSrc,
00364 const int16_t **chrVSrc,
00365 int chrFilterSize, const int16_t **alpSrc,
00366 uint8_t *dest, int dstW, int dstY)
00367 {
00368 x86_reg dummy=0;
00369 x86_reg dstW_reg = dstW;
00370 x86_reg uv_off = c->uv_offx2;
00371
00372 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00373 YSCALEYUV2PACKEDX
00374 YSCALEYUV2RGBX
00375 YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
00376 "psraw $3, %%mm1 \n\t"
00377 "psraw $3, %%mm7 \n\t"
00378 "packuswb %%mm7, %%mm1 \n\t"
00379 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00380 YSCALEYUV2PACKEDX_END
00381 } else {
00382 YSCALEYUV2PACKEDX
00383 YSCALEYUV2RGBX
00384 "pcmpeqd %%mm7, %%mm7 \n\t"
00385 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00386 YSCALEYUV2PACKEDX_END
00387 }
00388 }
00389
00390 #define REAL_WRITERGB16(dst, dstw, index) \
00391 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00392 "pand "MANGLE(bFC)", %%mm4 \n\t" \
00393 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00394 "psrlq $3, %%mm2 \n\t"\
00395 \
00396 "movq %%mm2, %%mm1 \n\t"\
00397 "movq %%mm4, %%mm3 \n\t"\
00398 \
00399 "punpcklbw %%mm7, %%mm3 \n\t"\
00400 "punpcklbw %%mm5, %%mm2 \n\t"\
00401 "punpckhbw %%mm7, %%mm4 \n\t"\
00402 "punpckhbw %%mm5, %%mm1 \n\t"\
00403 \
00404 "psllq $3, %%mm3 \n\t"\
00405 "psllq $3, %%mm4 \n\t"\
00406 \
00407 "por %%mm3, %%mm2 \n\t"\
00408 "por %%mm4, %%mm1 \n\t"\
00409 \
00410 MOVNTQ(%%mm2, (dst, index, 2))\
00411 MOVNTQ(%%mm1, 8(dst, index, 2))\
00412 \
00413 "add $8, "#index" \n\t"\
00414 "cmp "#dstw", "#index" \n\t"\
00415 " jb 1b \n\t"
00416 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
00417
00418 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
00419 const int16_t **lumSrc, int lumFilterSize,
00420 const int16_t *chrFilter, const int16_t **chrUSrc,
00421 const int16_t **chrVSrc,
00422 int chrFilterSize, const int16_t **alpSrc,
00423 uint8_t *dest, int dstW, int dstY)
00424 {
00425 x86_reg dummy=0;
00426 x86_reg dstW_reg = dstW;
00427 x86_reg uv_off = c->uv_offx2;
00428
00429 YSCALEYUV2PACKEDX_ACCURATE
00430 YSCALEYUV2RGBX
00431 "pxor %%mm7, %%mm7 \n\t"
00432
00433 #ifdef DITHER1XBPP
00434 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00435 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00436 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00437 #endif
00438 WRITERGB16(%4, %5, %%REGa)
00439 YSCALEYUV2PACKEDX_END
00440 }
00441
00442 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
00443 const int16_t **lumSrc, int lumFilterSize,
00444 const int16_t *chrFilter, const int16_t **chrUSrc,
00445 const int16_t **chrVSrc,
00446 int chrFilterSize, const int16_t **alpSrc,
00447 uint8_t *dest, int dstW, int dstY)
00448 {
00449 x86_reg dummy=0;
00450 x86_reg dstW_reg = dstW;
00451 x86_reg uv_off = c->uv_offx2;
00452
00453 YSCALEYUV2PACKEDX
00454 YSCALEYUV2RGBX
00455 "pxor %%mm7, %%mm7 \n\t"
00456
00457 #ifdef DITHER1XBPP
00458 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00459 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00460 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00461 #endif
00462 WRITERGB16(%4, %5, %%REGa)
00463 YSCALEYUV2PACKEDX_END
00464 }
00465
00466 #define REAL_WRITERGB15(dst, dstw, index) \
00467 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00468 "pand "MANGLE(bF8)", %%mm4 \n\t" \
00469 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00470 "psrlq $3, %%mm2 \n\t"\
00471 "psrlq $1, %%mm5 \n\t"\
00472 \
00473 "movq %%mm2, %%mm1 \n\t"\
00474 "movq %%mm4, %%mm3 \n\t"\
00475 \
00476 "punpcklbw %%mm7, %%mm3 \n\t"\
00477 "punpcklbw %%mm5, %%mm2 \n\t"\
00478 "punpckhbw %%mm7, %%mm4 \n\t"\
00479 "punpckhbw %%mm5, %%mm1 \n\t"\
00480 \
00481 "psllq $2, %%mm3 \n\t"\
00482 "psllq $2, %%mm4 \n\t"\
00483 \
00484 "por %%mm3, %%mm2 \n\t"\
00485 "por %%mm4, %%mm1 \n\t"\
00486 \
00487 MOVNTQ(%%mm2, (dst, index, 2))\
00488 MOVNTQ(%%mm1, 8(dst, index, 2))\
00489 \
00490 "add $8, "#index" \n\t"\
00491 "cmp "#dstw", "#index" \n\t"\
00492 " jb 1b \n\t"
00493 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
00494
00495 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
00496 const int16_t **lumSrc, int lumFilterSize,
00497 const int16_t *chrFilter, const int16_t **chrUSrc,
00498 const int16_t **chrVSrc,
00499 int chrFilterSize, const int16_t **alpSrc,
00500 uint8_t *dest, int dstW, int dstY)
00501 {
00502 x86_reg dummy=0;
00503 x86_reg dstW_reg = dstW;
00504 x86_reg uv_off = c->uv_offx2;
00505
00506 YSCALEYUV2PACKEDX_ACCURATE
00507 YSCALEYUV2RGBX
00508 "pxor %%mm7, %%mm7 \n\t"
00509
00510 #ifdef DITHER1XBPP
00511 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00512 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00513 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00514 #endif
00515 WRITERGB15(%4, %5, %%REGa)
00516 YSCALEYUV2PACKEDX_END
00517 }
00518
00519 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
00520 const int16_t **lumSrc, int lumFilterSize,
00521 const int16_t *chrFilter, const int16_t **chrUSrc,
00522 const int16_t **chrVSrc,
00523 int chrFilterSize, const int16_t **alpSrc,
00524 uint8_t *dest, int dstW, int dstY)
00525 {
00526 x86_reg dummy=0;
00527 x86_reg dstW_reg = dstW;
00528 x86_reg uv_off = c->uv_offx2;
00529
00530 YSCALEYUV2PACKEDX
00531 YSCALEYUV2RGBX
00532 "pxor %%mm7, %%mm7 \n\t"
00533
00534 #ifdef DITHER1XBPP
00535 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00536 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00537 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00538 #endif
00539 WRITERGB15(%4, %5, %%REGa)
00540 YSCALEYUV2PACKEDX_END
00541 }
00542
00543 #define WRITEBGR24MMX(dst, dstw, index) \
00544 \
00545 "movq %%mm2, %%mm1 \n\t" \
00546 "movq %%mm5, %%mm6 \n\t" \
00547 "punpcklbw %%mm4, %%mm2 \n\t" \
00548 "punpcklbw %%mm7, %%mm5 \n\t" \
00549 "punpckhbw %%mm4, %%mm1 \n\t" \
00550 "punpckhbw %%mm7, %%mm6 \n\t" \
00551 "movq %%mm2, %%mm0 \n\t" \
00552 "movq %%mm1, %%mm3 \n\t" \
00553 "punpcklwd %%mm5, %%mm0 \n\t" \
00554 "punpckhwd %%mm5, %%mm2 \n\t" \
00555 "punpcklwd %%mm6, %%mm1 \n\t" \
00556 "punpckhwd %%mm6, %%mm3 \n\t" \
00557 \
00558 "movq %%mm0, %%mm4 \n\t" \
00559 "movq %%mm2, %%mm6 \n\t" \
00560 "movq %%mm1, %%mm5 \n\t" \
00561 "movq %%mm3, %%mm7 \n\t" \
00562 \
00563 "psllq $40, %%mm0 \n\t" \
00564 "psllq $40, %%mm2 \n\t" \
00565 "psllq $40, %%mm1 \n\t" \
00566 "psllq $40, %%mm3 \n\t" \
00567 \
00568 "punpckhdq %%mm4, %%mm0 \n\t" \
00569 "punpckhdq %%mm6, %%mm2 \n\t" \
00570 "punpckhdq %%mm5, %%mm1 \n\t" \
00571 "punpckhdq %%mm7, %%mm3 \n\t" \
00572 \
00573 "psrlq $8, %%mm0 \n\t" \
00574 "movq %%mm2, %%mm6 \n\t" \
00575 "psllq $40, %%mm2 \n\t" \
00576 "por %%mm2, %%mm0 \n\t" \
00577 MOVNTQ(%%mm0, (dst))\
00578 \
00579 "psrlq $24, %%mm6 \n\t" \
00580 "movq %%mm1, %%mm5 \n\t" \
00581 "psllq $24, %%mm1 \n\t" \
00582 "por %%mm1, %%mm6 \n\t" \
00583 MOVNTQ(%%mm6, 8(dst))\
00584 \
00585 "psrlq $40, %%mm5 \n\t" \
00586 "psllq $8, %%mm3 \n\t" \
00587 "por %%mm3, %%mm5 \n\t" \
00588 MOVNTQ(%%mm5, 16(dst))\
00589 \
00590 "add $24, "#dst" \n\t"\
00591 \
00592 "add $8, "#index" \n\t"\
00593 "cmp "#dstw", "#index" \n\t"\
00594 " jb 1b \n\t"
00595
00596 #define WRITEBGR24MMX2(dst, dstw, index) \
00597 \
00598 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
00599 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
00600 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
00601 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
00602 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
00603 \
00604 "pand %%mm0, %%mm1 \n\t" \
00605 "pand %%mm0, %%mm3 \n\t" \
00606 "pand %%mm7, %%mm6 \n\t" \
00607 \
00608 "psllq $8, %%mm3 \n\t" \
00609 "por %%mm1, %%mm6 \n\t"\
00610 "por %%mm3, %%mm6 \n\t"\
00611 MOVNTQ(%%mm6, (dst))\
00612 \
00613 "psrlq $8, %%mm4 \n\t" \
00614 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
00615 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
00616 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
00617 \
00618 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" \
00619 "pand %%mm7, %%mm3 \n\t" \
00620 "pand %%mm0, %%mm6 \n\t" \
00621 \
00622 "por %%mm1, %%mm3 \n\t" \
00623 "por %%mm3, %%mm6 \n\t"\
00624 MOVNTQ(%%mm6, 8(dst))\
00625 \
00626 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
00627 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
00628 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
00629 \
00630 "pand %%mm7, %%mm1 \n\t" \
00631 "pand %%mm0, %%mm3 \n\t" \
00632 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" \
00633 \
00634 "por %%mm1, %%mm3 \n\t"\
00635 "por %%mm3, %%mm6 \n\t"\
00636 MOVNTQ(%%mm6, 16(dst))\
00637 \
00638 "add $24, "#dst" \n\t"\
00639 \
00640 "add $8, "#index" \n\t"\
00641 "cmp "#dstw", "#index" \n\t"\
00642 " jb 1b \n\t"
00643
00644 #if COMPILE_TEMPLATE_MMX2
00645 #undef WRITEBGR24
00646 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
00647 #else
00648 #undef WRITEBGR24
00649 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
00650 #endif
00651
00652 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
00653 const int16_t **lumSrc, int lumFilterSize,
00654 const int16_t *chrFilter, const int16_t **chrUSrc,
00655 const int16_t **chrVSrc,
00656 int chrFilterSize, const int16_t **alpSrc,
00657 uint8_t *dest, int dstW, int dstY)
00658 {
00659 x86_reg dummy=0;
00660 x86_reg dstW_reg = dstW;
00661 x86_reg uv_off = c->uv_offx2;
00662
00663 YSCALEYUV2PACKEDX_ACCURATE
00664 YSCALEYUV2RGBX
00665 "pxor %%mm7, %%mm7 \n\t"
00666 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t"
00667 "add %4, %%"REG_c" \n\t"
00668 WRITEBGR24(%%REGc, %5, %%REGa)
00669 :: "r" (&c->redDither),
00670 "m" (dummy), "m" (dummy), "m" (dummy),
00671 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00672 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00673 );
00674 }
00675
00676 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
00677 const int16_t **lumSrc, int lumFilterSize,
00678 const int16_t *chrFilter, const int16_t **chrUSrc,
00679 const int16_t **chrVSrc,
00680 int chrFilterSize, const int16_t **alpSrc,
00681 uint8_t *dest, int dstW, int dstY)
00682 {
00683 x86_reg dummy=0;
00684 x86_reg dstW_reg = dstW;
00685 x86_reg uv_off = c->uv_offx2;
00686
00687 YSCALEYUV2PACKEDX
00688 YSCALEYUV2RGBX
00689 "pxor %%mm7, %%mm7 \n\t"
00690 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t"
00691 "add %4, %%"REG_c" \n\t"
00692 WRITEBGR24(%%REGc, %5, %%REGa)
00693 :: "r" (&c->redDither),
00694 "m" (dummy), "m" (dummy), "m" (dummy),
00695 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00696 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00697 );
00698 }
00699
00700 #define REAL_WRITEYUY2(dst, dstw, index) \
00701 "packuswb %%mm3, %%mm3 \n\t"\
00702 "packuswb %%mm4, %%mm4 \n\t"\
00703 "packuswb %%mm7, %%mm1 \n\t"\
00704 "punpcklbw %%mm4, %%mm3 \n\t"\
00705 "movq %%mm1, %%mm7 \n\t"\
00706 "punpcklbw %%mm3, %%mm1 \n\t"\
00707 "punpckhbw %%mm3, %%mm7 \n\t"\
00708 \
00709 MOVNTQ(%%mm1, (dst, index, 2))\
00710 MOVNTQ(%%mm7, 8(dst, index, 2))\
00711 \
00712 "add $8, "#index" \n\t"\
00713 "cmp "#dstw", "#index" \n\t"\
00714 " jb 1b \n\t"
00715 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
00716
00717 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
00718 const int16_t **lumSrc, int lumFilterSize,
00719 const int16_t *chrFilter, const int16_t **chrUSrc,
00720 const int16_t **chrVSrc,
00721 int chrFilterSize, const int16_t **alpSrc,
00722 uint8_t *dest, int dstW, int dstY)
00723 {
00724 x86_reg dummy=0;
00725 x86_reg dstW_reg = dstW;
00726 x86_reg uv_off = c->uv_offx2;
00727
00728 YSCALEYUV2PACKEDX_ACCURATE
00729
00730 "psraw $3, %%mm3 \n\t"
00731 "psraw $3, %%mm4 \n\t"
00732 "psraw $3, %%mm1 \n\t"
00733 "psraw $3, %%mm7 \n\t"
00734 WRITEYUY2(%4, %5, %%REGa)
00735 YSCALEYUV2PACKEDX_END
00736 }
00737
00738 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
00739 const int16_t **lumSrc, int lumFilterSize,
00740 const int16_t *chrFilter, const int16_t **chrUSrc,
00741 const int16_t **chrVSrc,
00742 int chrFilterSize, const int16_t **alpSrc,
00743 uint8_t *dest, int dstW, int dstY)
00744 {
00745 x86_reg dummy=0;
00746 x86_reg dstW_reg = dstW;
00747 x86_reg uv_off = c->uv_offx2;
00748
00749 YSCALEYUV2PACKEDX
00750
00751 "psraw $3, %%mm3 \n\t"
00752 "psraw $3, %%mm4 \n\t"
00753 "psraw $3, %%mm1 \n\t"
00754 "psraw $3, %%mm7 \n\t"
00755 WRITEYUY2(%4, %5, %%REGa)
00756 YSCALEYUV2PACKEDX_END
00757 }
00758
00759 #define REAL_YSCALEYUV2RGB_UV(index, c) \
00760 "xor "#index", "#index" \n\t"\
00761 ".p2align 4 \n\t"\
00762 "1: \n\t"\
00763 "movq (%2, "#index"), %%mm2 \n\t" \
00764 "movq (%3, "#index"), %%mm3 \n\t" \
00765 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00766 "movq (%2, "#index"), %%mm5 \n\t" \
00767 "movq (%3, "#index"), %%mm4 \n\t" \
00768 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00769 "psubw %%mm3, %%mm2 \n\t" \
00770 "psubw %%mm4, %%mm5 \n\t" \
00771 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00772 "pmulhw %%mm0, %%mm2 \n\t" \
00773 "pmulhw %%mm0, %%mm5 \n\t" \
00774 "psraw $4, %%mm3 \n\t" \
00775 "psraw $4, %%mm4 \n\t" \
00776 "paddw %%mm2, %%mm3 \n\t" \
00777 "paddw %%mm5, %%mm4 \n\t" \
00778 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00779 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00780 "movq %%mm3, %%mm2 \n\t" \
00781 "movq %%mm4, %%mm5 \n\t" \
00782 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00783 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00784 \
00785
00786 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
00787 "movq ("#b1", "#index", 2), %%mm0 \n\t" \
00788 "movq ("#b2", "#index", 2), %%mm1 \n\t" \
00789 "movq 8("#b1", "#index", 2), %%mm6 \n\t" \
00790 "movq 8("#b2", "#index", 2), %%mm7 \n\t" \
00791 "psubw %%mm1, %%mm0 \n\t" \
00792 "psubw %%mm7, %%mm6 \n\t" \
00793 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00794 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00795 "psraw $4, %%mm1 \n\t" \
00796 "psraw $4, %%mm7 \n\t" \
00797 "paddw %%mm0, %%mm1 \n\t" \
00798 "paddw %%mm6, %%mm7 \n\t" \
00799
00800 #define REAL_YSCALEYUV2RGB_COEFF(c) \
00801 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00802 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00803 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00804 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00805 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00806 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00807 \
00808 "paddw %%mm3, %%mm4 \n\t"\
00809 "movq %%mm2, %%mm0 \n\t"\
00810 "movq %%mm5, %%mm6 \n\t"\
00811 "movq %%mm4, %%mm3 \n\t"\
00812 "punpcklwd %%mm2, %%mm2 \n\t"\
00813 "punpcklwd %%mm5, %%mm5 \n\t"\
00814 "punpcklwd %%mm4, %%mm4 \n\t"\
00815 "paddw %%mm1, %%mm2 \n\t"\
00816 "paddw %%mm1, %%mm5 \n\t"\
00817 "paddw %%mm1, %%mm4 \n\t"\
00818 "punpckhwd %%mm0, %%mm0 \n\t"\
00819 "punpckhwd %%mm6, %%mm6 \n\t"\
00820 "punpckhwd %%mm3, %%mm3 \n\t"\
00821 "paddw %%mm7, %%mm0 \n\t"\
00822 "paddw %%mm7, %%mm6 \n\t"\
00823 "paddw %%mm7, %%mm3 \n\t"\
00824 \
00825 "packuswb %%mm0, %%mm2 \n\t"\
00826 "packuswb %%mm6, %%mm5 \n\t"\
00827 "packuswb %%mm3, %%mm4 \n\t"\
00828
00829 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
00830
00831 #define YSCALEYUV2RGB(index, c) \
00832 REAL_YSCALEYUV2RGB_UV(index, c) \
00833 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
00834 REAL_YSCALEYUV2RGB_COEFF(c)
00835
00839 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
00840 const int16_t *ubuf[2], const int16_t *vbuf[2],
00841 const int16_t *abuf[2], uint8_t *dest,
00842 int dstW, int yalpha, int uvalpha, int y)
00843 {
00844 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00845 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00846
00847 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00848 const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
00849 #if ARCH_X86_64
00850 __asm__ volatile(
00851 YSCALEYUV2RGB(%%r8, %5)
00852 YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
00853 "psraw $3, %%mm1 \n\t"
00854 "psraw $3, %%mm7 \n\t"
00855 "packuswb %%mm7, %%mm1 \n\t"
00856 WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00857 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
00858 "a" (&c->redDither),
00859 "r" (abuf0), "r" (abuf1)
00860 : "%r8"
00861 );
00862 #else
00863 c->u_temp=(intptr_t)abuf0;
00864 c->v_temp=(intptr_t)abuf1;
00865 __asm__ volatile(
00866 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00867 "mov %4, %%"REG_b" \n\t"
00868 "push %%"REG_BP" \n\t"
00869 YSCALEYUV2RGB(%%REGBP, %5)
00870 "push %0 \n\t"
00871 "push %1 \n\t"
00872 "mov "U_TEMP"(%5), %0 \n\t"
00873 "mov "V_TEMP"(%5), %1 \n\t"
00874 YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
00875 "psraw $3, %%mm1 \n\t"
00876 "psraw $3, %%mm7 \n\t"
00877 "packuswb %%mm7, %%mm1 \n\t"
00878 "pop %1 \n\t"
00879 "pop %0 \n\t"
00880 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00881 "pop %%"REG_BP" \n\t"
00882 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00883 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00884 "a" (&c->redDither)
00885 );
00886 #endif
00887 } else {
00888 __asm__ volatile(
00889 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00890 "mov %4, %%"REG_b" \n\t"
00891 "push %%"REG_BP" \n\t"
00892 YSCALEYUV2RGB(%%REGBP, %5)
00893 "pcmpeqd %%mm7, %%mm7 \n\t"
00894 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00895 "pop %%"REG_BP" \n\t"
00896 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00897 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00898 "a" (&c->redDither)
00899 );
00900 }
00901 }
00902
00903 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
00904 const int16_t *ubuf[2], const int16_t *vbuf[2],
00905 const int16_t *abuf[2], uint8_t *dest,
00906 int dstW, int yalpha, int uvalpha, int y)
00907 {
00908 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00909 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00910
00911
00912 __asm__ volatile(
00913 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00914 "mov %4, %%"REG_b" \n\t"
00915 "push %%"REG_BP" \n\t"
00916 YSCALEYUV2RGB(%%REGBP, %5)
00917 "pxor %%mm7, %%mm7 \n\t"
00918 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
00919 "pop %%"REG_BP" \n\t"
00920 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00921 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00922 "a" (&c->redDither)
00923 );
00924 }
00925
00926 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
00927 const int16_t *ubuf[2], const int16_t *vbuf[2],
00928 const int16_t *abuf[2], uint8_t *dest,
00929 int dstW, int yalpha, int uvalpha, int y)
00930 {
00931 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00932 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00933
00934
00935 __asm__ volatile(
00936 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00937 "mov %4, %%"REG_b" \n\t"
00938 "push %%"REG_BP" \n\t"
00939 YSCALEYUV2RGB(%%REGBP, %5)
00940 "pxor %%mm7, %%mm7 \n\t"
00941
00942 #ifdef DITHER1XBPP
00943 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
00944 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
00945 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
00946 #endif
00947 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
00948 "pop %%"REG_BP" \n\t"
00949 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00950 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00951 "a" (&c->redDither)
00952 );
00953 }
00954
00955 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
00956 const int16_t *ubuf[2], const int16_t *vbuf[2],
00957 const int16_t *abuf[2], uint8_t *dest,
00958 int dstW, int yalpha, int uvalpha, int y)
00959 {
00960 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00961 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00962
00963
00964 __asm__ volatile(
00965 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00966 "mov %4, %%"REG_b" \n\t"
00967 "push %%"REG_BP" \n\t"
00968 YSCALEYUV2RGB(%%REGBP, %5)
00969 "pxor %%mm7, %%mm7 \n\t"
00970
00971 #ifdef DITHER1XBPP
00972 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
00973 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
00974 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
00975 #endif
00976 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
00977 "pop %%"REG_BP" \n\t"
00978 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00979 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00980 "a" (&c->redDither)
00981 );
00982 }
00983
00984 #define REAL_YSCALEYUV2PACKED(index, c) \
00985 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00986 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
00987 "psraw $3, %%mm0 \n\t"\
00988 "psraw $3, %%mm1 \n\t"\
00989 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00990 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00991 "xor "#index", "#index" \n\t"\
00992 ".p2align 4 \n\t"\
00993 "1: \n\t"\
00994 "movq (%2, "#index"), %%mm2 \n\t" \
00995 "movq (%3, "#index"), %%mm3 \n\t" \
00996 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00997 "movq (%2, "#index"), %%mm5 \n\t" \
00998 "movq (%3, "#index"), %%mm4 \n\t" \
00999 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01000 "psubw %%mm3, %%mm2 \n\t" \
01001 "psubw %%mm4, %%mm5 \n\t" \
01002 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
01003 "pmulhw %%mm0, %%mm2 \n\t" \
01004 "pmulhw %%mm0, %%mm5 \n\t" \
01005 "psraw $7, %%mm3 \n\t" \
01006 "psraw $7, %%mm4 \n\t" \
01007 "paddw %%mm2, %%mm3 \n\t" \
01008 "paddw %%mm5, %%mm4 \n\t" \
01009 "movq (%0, "#index", 2), %%mm0 \n\t" \
01010 "movq (%1, "#index", 2), %%mm1 \n\t" \
01011 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
01012 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
01013 "psubw %%mm1, %%mm0 \n\t" \
01014 "psubw %%mm7, %%mm6 \n\t" \
01015 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
01016 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
01017 "psraw $7, %%mm1 \n\t" \
01018 "psraw $7, %%mm7 \n\t" \
01019 "paddw %%mm0, %%mm1 \n\t" \
01020 "paddw %%mm6, %%mm7 \n\t" \
01021
01022 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
01023
01024 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
01025 const int16_t *ubuf[2], const int16_t *vbuf[2],
01026 const int16_t *abuf[2], uint8_t *dest,
01027 int dstW, int yalpha, int uvalpha, int y)
01028 {
01029 const int16_t *buf0 = buf[0], *buf1 = buf[1],
01030 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01031
01032
01033 __asm__ volatile(
01034 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01035 "mov %4, %%"REG_b" \n\t"
01036 "push %%"REG_BP" \n\t"
01037 YSCALEYUV2PACKED(%%REGBP, %5)
01038 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01039 "pop %%"REG_BP" \n\t"
01040 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01041 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01042 "a" (&c->redDither)
01043 );
01044 }
01045
01046 #define REAL_YSCALEYUV2RGB1(index, c) \
01047 "xor "#index", "#index" \n\t"\
01048 ".p2align 4 \n\t"\
01049 "1: \n\t"\
01050 "movq (%2, "#index"), %%mm3 \n\t" \
01051 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01052 "movq (%2, "#index"), %%mm4 \n\t" \
01053 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01054 "psraw $4, %%mm3 \n\t" \
01055 "psraw $4, %%mm4 \n\t" \
01056 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
01057 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
01058 "movq %%mm3, %%mm2 \n\t" \
01059 "movq %%mm4, %%mm5 \n\t" \
01060 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
01061 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
01062 \
01063 "movq (%0, "#index", 2), %%mm1 \n\t" \
01064 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01065 "psraw $4, %%mm1 \n\t" \
01066 "psraw $4, %%mm7 \n\t" \
01067 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
01068 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
01069 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
01070 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
01071 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
01072 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
01073 \
01074 "paddw %%mm3, %%mm4 \n\t"\
01075 "movq %%mm2, %%mm0 \n\t"\
01076 "movq %%mm5, %%mm6 \n\t"\
01077 "movq %%mm4, %%mm3 \n\t"\
01078 "punpcklwd %%mm2, %%mm2 \n\t"\
01079 "punpcklwd %%mm5, %%mm5 \n\t"\
01080 "punpcklwd %%mm4, %%mm4 \n\t"\
01081 "paddw %%mm1, %%mm2 \n\t"\
01082 "paddw %%mm1, %%mm5 \n\t"\
01083 "paddw %%mm1, %%mm4 \n\t"\
01084 "punpckhwd %%mm0, %%mm0 \n\t"\
01085 "punpckhwd %%mm6, %%mm6 \n\t"\
01086 "punpckhwd %%mm3, %%mm3 \n\t"\
01087 "paddw %%mm7, %%mm0 \n\t"\
01088 "paddw %%mm7, %%mm6 \n\t"\
01089 "paddw %%mm7, %%mm3 \n\t"\
01090 \
01091 "packuswb %%mm0, %%mm2 \n\t"\
01092 "packuswb %%mm6, %%mm5 \n\t"\
01093 "packuswb %%mm3, %%mm4 \n\t"\
01094
01095 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
01096
01097
01098 #define REAL_YSCALEYUV2RGB1b(index, c) \
01099 "xor "#index", "#index" \n\t"\
01100 ".p2align 4 \n\t"\
01101 "1: \n\t"\
01102 "movq (%2, "#index"), %%mm2 \n\t" \
01103 "movq (%3, "#index"), %%mm3 \n\t" \
01104 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01105 "movq (%2, "#index"), %%mm5 \n\t" \
01106 "movq (%3, "#index"), %%mm4 \n\t" \
01107 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01108 "paddw %%mm2, %%mm3 \n\t" \
01109 "paddw %%mm5, %%mm4 \n\t" \
01110 "psrlw $5, %%mm3 \n\t" \
01111 "psrlw $5, %%mm4 \n\t" \
01112 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
01113 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
01114 "movq %%mm3, %%mm2 \n\t" \
01115 "movq %%mm4, %%mm5 \n\t" \
01116 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
01117 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
01118 \
01119 "movq (%0, "#index", 2), %%mm1 \n\t" \
01120 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01121 "psraw $4, %%mm1 \n\t" \
01122 "psraw $4, %%mm7 \n\t" \
01123 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
01124 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
01125 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
01126 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
01127 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
01128 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
01129 \
01130 "paddw %%mm3, %%mm4 \n\t"\
01131 "movq %%mm2, %%mm0 \n\t"\
01132 "movq %%mm5, %%mm6 \n\t"\
01133 "movq %%mm4, %%mm3 \n\t"\
01134 "punpcklwd %%mm2, %%mm2 \n\t"\
01135 "punpcklwd %%mm5, %%mm5 \n\t"\
01136 "punpcklwd %%mm4, %%mm4 \n\t"\
01137 "paddw %%mm1, %%mm2 \n\t"\
01138 "paddw %%mm1, %%mm5 \n\t"\
01139 "paddw %%mm1, %%mm4 \n\t"\
01140 "punpckhwd %%mm0, %%mm0 \n\t"\
01141 "punpckhwd %%mm6, %%mm6 \n\t"\
01142 "punpckhwd %%mm3, %%mm3 \n\t"\
01143 "paddw %%mm7, %%mm0 \n\t"\
01144 "paddw %%mm7, %%mm6 \n\t"\
01145 "paddw %%mm7, %%mm3 \n\t"\
01146 \
01147 "packuswb %%mm0, %%mm2 \n\t"\
01148 "packuswb %%mm6, %%mm5 \n\t"\
01149 "packuswb %%mm3, %%mm4 \n\t"\
01150
01151 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
01152
01153 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
01154 "movq (%1, "#index", 2), %%mm7 \n\t" \
01155 "movq 8(%1, "#index", 2), %%mm1 \n\t" \
01156 "psraw $7, %%mm7 \n\t" \
01157 "psraw $7, %%mm1 \n\t" \
01158 "packuswb %%mm1, %%mm7 \n\t"
01159 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
01160
01164 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
01165 const int16_t *ubuf[2], const int16_t *vbuf[2],
01166 const int16_t *abuf0, uint8_t *dest,
01167 int dstW, int uvalpha, int y)
01168 {
01169 const int16_t *ubuf0 = ubuf[0];
01170 const int16_t *buf1= buf0;
01171
01172 if (uvalpha < 2048) {
01173 const int16_t *ubuf1 = ubuf[0];
01174 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01175 __asm__ volatile(
01176 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01177 "mov %4, %%"REG_b" \n\t"
01178 "push %%"REG_BP" \n\t"
01179 YSCALEYUV2RGB1(%%REGBP, %5)
01180 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01181 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01182 "pop %%"REG_BP" \n\t"
01183 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01184 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01185 "a" (&c->redDither)
01186 );
01187 } else {
01188 __asm__ volatile(
01189 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01190 "mov %4, %%"REG_b" \n\t"
01191 "push %%"REG_BP" \n\t"
01192 YSCALEYUV2RGB1(%%REGBP, %5)
01193 "pcmpeqd %%mm7, %%mm7 \n\t"
01194 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01195 "pop %%"REG_BP" \n\t"
01196 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01197 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01198 "a" (&c->redDither)
01199 );
01200 }
01201 } else {
01202 const int16_t *ubuf1 = ubuf[1];
01203 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01204 __asm__ volatile(
01205 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01206 "mov %4, %%"REG_b" \n\t"
01207 "push %%"REG_BP" \n\t"
01208 YSCALEYUV2RGB1b(%%REGBP, %5)
01209 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01210 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01211 "pop %%"REG_BP" \n\t"
01212 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01213 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01214 "a" (&c->redDither)
01215 );
01216 } else {
01217 __asm__ volatile(
01218 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01219 "mov %4, %%"REG_b" \n\t"
01220 "push %%"REG_BP" \n\t"
01221 YSCALEYUV2RGB1b(%%REGBP, %5)
01222 "pcmpeqd %%mm7, %%mm7 \n\t"
01223 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01224 "pop %%"REG_BP" \n\t"
01225 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01226 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01227 "a" (&c->redDither)
01228 );
01229 }
01230 }
01231 }
01232
01233 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
01234 const int16_t *ubuf[2], const int16_t *vbuf[2],
01235 const int16_t *abuf0, uint8_t *dest,
01236 int dstW, int uvalpha, int y)
01237 {
01238 const int16_t *ubuf0 = ubuf[0];
01239 const int16_t *buf1= buf0;
01240
01241 if (uvalpha < 2048) {
01242 const int16_t *ubuf1 = ubuf[0];
01243 __asm__ volatile(
01244 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01245 "mov %4, %%"REG_b" \n\t"
01246 "push %%"REG_BP" \n\t"
01247 YSCALEYUV2RGB1(%%REGBP, %5)
01248 "pxor %%mm7, %%mm7 \n\t"
01249 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01250 "pop %%"REG_BP" \n\t"
01251 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01252 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01253 "a" (&c->redDither)
01254 );
01255 } else {
01256 const int16_t *ubuf1 = ubuf[1];
01257 __asm__ volatile(
01258 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01259 "mov %4, %%"REG_b" \n\t"
01260 "push %%"REG_BP" \n\t"
01261 YSCALEYUV2RGB1b(%%REGBP, %5)
01262 "pxor %%mm7, %%mm7 \n\t"
01263 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01264 "pop %%"REG_BP" \n\t"
01265 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01266 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01267 "a" (&c->redDither)
01268 );
01269 }
01270 }
01271
01272 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
01273 const int16_t *ubuf[2], const int16_t *vbuf[2],
01274 const int16_t *abuf0, uint8_t *dest,
01275 int dstW, int uvalpha, int y)
01276 {
01277 const int16_t *ubuf0 = ubuf[0];
01278 const int16_t *buf1= buf0;
01279
01280 if (uvalpha < 2048) {
01281 const int16_t *ubuf1 = ubuf[0];
01282 __asm__ volatile(
01283 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01284 "mov %4, %%"REG_b" \n\t"
01285 "push %%"REG_BP" \n\t"
01286 YSCALEYUV2RGB1(%%REGBP, %5)
01287 "pxor %%mm7, %%mm7 \n\t"
01288
01289 #ifdef DITHER1XBPP
01290 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01291 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01292 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01293 #endif
01294 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01295 "pop %%"REG_BP" \n\t"
01296 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01297 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01298 "a" (&c->redDither)
01299 );
01300 } else {
01301 const int16_t *ubuf1 = ubuf[1];
01302 __asm__ volatile(
01303 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01304 "mov %4, %%"REG_b" \n\t"
01305 "push %%"REG_BP" \n\t"
01306 YSCALEYUV2RGB1b(%%REGBP, %5)
01307 "pxor %%mm7, %%mm7 \n\t"
01308
01309 #ifdef DITHER1XBPP
01310 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01311 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01312 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01313 #endif
01314 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01315 "pop %%"REG_BP" \n\t"
01316 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01317 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01318 "a" (&c->redDither)
01319 );
01320 }
01321 }
01322
01323 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
01324 const int16_t *ubuf[2], const int16_t *vbuf[2],
01325 const int16_t *abuf0, uint8_t *dest,
01326 int dstW, int uvalpha, int y)
01327 {
01328 const int16_t *ubuf0 = ubuf[0];
01329 const int16_t *buf1= buf0;
01330
01331 if (uvalpha < 2048) {
01332 const int16_t *ubuf1 = ubuf[0];
01333 __asm__ volatile(
01334 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01335 "mov %4, %%"REG_b" \n\t"
01336 "push %%"REG_BP" \n\t"
01337 YSCALEYUV2RGB1(%%REGBP, %5)
01338 "pxor %%mm7, %%mm7 \n\t"
01339
01340 #ifdef DITHER1XBPP
01341 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01342 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01343 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01344 #endif
01345 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01346 "pop %%"REG_BP" \n\t"
01347 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01348 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01349 "a" (&c->redDither)
01350 );
01351 } else {
01352 const int16_t *ubuf1 = ubuf[1];
01353 __asm__ volatile(
01354 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01355 "mov %4, %%"REG_b" \n\t"
01356 "push %%"REG_BP" \n\t"
01357 YSCALEYUV2RGB1b(%%REGBP, %5)
01358 "pxor %%mm7, %%mm7 \n\t"
01359
01360 #ifdef DITHER1XBPP
01361 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01362 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01363 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01364 #endif
01365 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01366 "pop %%"REG_BP" \n\t"
01367 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01368 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01369 "a" (&c->redDither)
01370 );
01371 }
01372 }
01373
01374 #define REAL_YSCALEYUV2PACKED1(index, c) \
01375 "xor "#index", "#index" \n\t"\
01376 ".p2align 4 \n\t"\
01377 "1: \n\t"\
01378 "movq (%2, "#index"), %%mm3 \n\t" \
01379 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01380 "movq (%2, "#index"), %%mm4 \n\t" \
01381 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01382 "psraw $7, %%mm3 \n\t" \
01383 "psraw $7, %%mm4 \n\t" \
01384 "movq (%0, "#index", 2), %%mm1 \n\t" \
01385 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01386 "psraw $7, %%mm1 \n\t" \
01387 "psraw $7, %%mm7 \n\t" \
01388
01389 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
01390
01391 #define REAL_YSCALEYUV2PACKED1b(index, c) \
01392 "xor "#index", "#index" \n\t"\
01393 ".p2align 4 \n\t"\
01394 "1: \n\t"\
01395 "movq (%2, "#index"), %%mm2 \n\t" \
01396 "movq (%3, "#index"), %%mm3 \n\t" \
01397 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01398 "movq (%2, "#index"), %%mm5 \n\t" \
01399 "movq (%3, "#index"), %%mm4 \n\t" \
01400 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01401 "paddw %%mm2, %%mm3 \n\t" \
01402 "paddw %%mm5, %%mm4 \n\t" \
01403 "psrlw $8, %%mm3 \n\t" \
01404 "psrlw $8, %%mm4 \n\t" \
01405 "movq (%0, "#index", 2), %%mm1 \n\t" \
01406 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01407 "psraw $7, %%mm1 \n\t" \
01408 "psraw $7, %%mm7 \n\t"
01409 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
01410
01411 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
01412 const int16_t *ubuf[2], const int16_t *vbuf[2],
01413 const int16_t *abuf0, uint8_t *dest,
01414 int dstW, int uvalpha, int y)
01415 {
01416 const int16_t *ubuf0 = ubuf[0];
01417 const int16_t *buf1= buf0;
01418
01419 if (uvalpha < 2048) {
01420 const int16_t *ubuf1 = ubuf[0];
01421 __asm__ volatile(
01422 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01423 "mov %4, %%"REG_b" \n\t"
01424 "push %%"REG_BP" \n\t"
01425 YSCALEYUV2PACKED1(%%REGBP, %5)
01426 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01427 "pop %%"REG_BP" \n\t"
01428 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01429 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01430 "a" (&c->redDither)
01431 );
01432 } else {
01433 const int16_t *ubuf1 = ubuf[1];
01434 __asm__ volatile(
01435 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01436 "mov %4, %%"REG_b" \n\t"
01437 "push %%"REG_BP" \n\t"
01438 YSCALEYUV2PACKED1b(%%REGBP, %5)
01439 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01440 "pop %%"REG_BP" \n\t"
01441 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01442 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01443 "a" (&c->redDither)
01444 );
01445 }
01446 }
01447
01448 #if COMPILE_TEMPLATE_MMX2
01449 static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
01450 int dstWidth, const uint8_t *src,
01451 int srcW, int xInc)
01452 {
01453 int32_t *filterPos = c->hLumFilterPos;
01454 int16_t *filter = c->hLumFilter;
01455 void *mmx2FilterCode= c->lumMmx2FilterCode;
01456 int i;
01457 #if defined(PIC)
01458 uint64_t ebxsave;
01459 #endif
01460 #if ARCH_X86_64
01461 uint64_t retsave;
01462 #endif
01463
01464 __asm__ volatile(
01465 #if defined(PIC)
01466 "mov %%"REG_b", %5 \n\t"
01467 #if ARCH_X86_64
01468 "mov -8(%%rsp), %%"REG_a" \n\t"
01469 "mov %%"REG_a", %6 \n\t"
01470 #endif
01471 #else
01472 #if ARCH_X86_64
01473 "mov -8(%%rsp), %%"REG_a" \n\t"
01474 "mov %%"REG_a", %5 \n\t"
01475 #endif
01476 #endif
01477 "pxor %%mm7, %%mm7 \n\t"
01478 "mov %0, %%"REG_c" \n\t"
01479 "mov %1, %%"REG_D" \n\t"
01480 "mov %2, %%"REG_d" \n\t"
01481 "mov %3, %%"REG_b" \n\t"
01482 "xor %%"REG_a", %%"REG_a" \n\t"
01483 PREFETCH" (%%"REG_c") \n\t"
01484 PREFETCH" 32(%%"REG_c") \n\t"
01485 PREFETCH" 64(%%"REG_c") \n\t"
01486
01487 #if ARCH_X86_64
01488 #define CALL_MMX2_FILTER_CODE \
01489 "movl (%%"REG_b"), %%esi \n\t"\
01490 "call *%4 \n\t"\
01491 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
01492 "add %%"REG_S", %%"REG_c" \n\t"\
01493 "add %%"REG_a", %%"REG_D" \n\t"\
01494 "xor %%"REG_a", %%"REG_a" \n\t"\
01495
01496 #else
01497 #define CALL_MMX2_FILTER_CODE \
01498 "movl (%%"REG_b"), %%esi \n\t"\
01499 "call *%4 \n\t"\
01500 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
01501 "add %%"REG_a", %%"REG_D" \n\t"\
01502 "xor %%"REG_a", %%"REG_a" \n\t"\
01503
01504 #endif
01505
01506 CALL_MMX2_FILTER_CODE
01507 CALL_MMX2_FILTER_CODE
01508 CALL_MMX2_FILTER_CODE
01509 CALL_MMX2_FILTER_CODE
01510 CALL_MMX2_FILTER_CODE
01511 CALL_MMX2_FILTER_CODE
01512 CALL_MMX2_FILTER_CODE
01513 CALL_MMX2_FILTER_CODE
01514
01515 #if defined(PIC)
01516 "mov %5, %%"REG_b" \n\t"
01517 #if ARCH_X86_64
01518 "mov %6, %%"REG_a" \n\t"
01519 "mov %%"REG_a", -8(%%rsp) \n\t"
01520 #endif
01521 #else
01522 #if ARCH_X86_64
01523 "mov %5, %%"REG_a" \n\t"
01524 "mov %%"REG_a", -8(%%rsp) \n\t"
01525 #endif
01526 #endif
01527 :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
01528 "m" (mmx2FilterCode)
01529 #if defined(PIC)
01530 ,"m" (ebxsave)
01531 #endif
01532 #if ARCH_X86_64
01533 ,"m"(retsave)
01534 #endif
01535 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
01536 #if !defined(PIC)
01537 ,"%"REG_b
01538 #endif
01539 );
01540
01541 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
01542 dst[i] = src[srcW-1]*128;
01543 }
01544
01545 static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
01546 int dstWidth, const uint8_t *src1,
01547 const uint8_t *src2, int srcW, int xInc)
01548 {
01549 int32_t *filterPos = c->hChrFilterPos;
01550 int16_t *filter = c->hChrFilter;
01551 void *mmx2FilterCode= c->chrMmx2FilterCode;
01552 int i;
01553 #if defined(PIC)
01554 DECLARE_ALIGNED(8, uint64_t, ebxsave);
01555 #endif
01556 #if ARCH_X86_64
01557 DECLARE_ALIGNED(8, uint64_t, retsave);
01558 #endif
01559
01560 __asm__ volatile(
01561 #if defined(PIC)
01562 "mov %%"REG_b", %7 \n\t"
01563 #if ARCH_X86_64
01564 "mov -8(%%rsp), %%"REG_a" \n\t"
01565 "mov %%"REG_a", %8 \n\t"
01566 #endif
01567 #else
01568 #if ARCH_X86_64
01569 "mov -8(%%rsp), %%"REG_a" \n\t"
01570 "mov %%"REG_a", %7 \n\t"
01571 #endif
01572 #endif
01573 "pxor %%mm7, %%mm7 \n\t"
01574 "mov %0, %%"REG_c" \n\t"
01575 "mov %1, %%"REG_D" \n\t"
01576 "mov %2, %%"REG_d" \n\t"
01577 "mov %3, %%"REG_b" \n\t"
01578 "xor %%"REG_a", %%"REG_a" \n\t"
01579 PREFETCH" (%%"REG_c") \n\t"
01580 PREFETCH" 32(%%"REG_c") \n\t"
01581 PREFETCH" 64(%%"REG_c") \n\t"
01582
01583 CALL_MMX2_FILTER_CODE
01584 CALL_MMX2_FILTER_CODE
01585 CALL_MMX2_FILTER_CODE
01586 CALL_MMX2_FILTER_CODE
01587 "xor %%"REG_a", %%"REG_a" \n\t"
01588 "mov %5, %%"REG_c" \n\t"
01589 "mov %6, %%"REG_D" \n\t"
01590 PREFETCH" (%%"REG_c") \n\t"
01591 PREFETCH" 32(%%"REG_c") \n\t"
01592 PREFETCH" 64(%%"REG_c") \n\t"
01593
01594 CALL_MMX2_FILTER_CODE
01595 CALL_MMX2_FILTER_CODE
01596 CALL_MMX2_FILTER_CODE
01597 CALL_MMX2_FILTER_CODE
01598
01599 #if defined(PIC)
01600 "mov %7, %%"REG_b" \n\t"
01601 #if ARCH_X86_64
01602 "mov %8, %%"REG_a" \n\t"
01603 "mov %%"REG_a", -8(%%rsp) \n\t"
01604 #endif
01605 #else
01606 #if ARCH_X86_64
01607 "mov %7, %%"REG_a" \n\t"
01608 "mov %%"REG_a", -8(%%rsp) \n\t"
01609 #endif
01610 #endif
01611 :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
01612 "m" (mmx2FilterCode), "m" (src2), "m"(dst2)
01613 #if defined(PIC)
01614 ,"m" (ebxsave)
01615 #endif
01616 #if ARCH_X86_64
01617 ,"m"(retsave)
01618 #endif
01619 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
01620 #if !defined(PIC)
01621 ,"%"REG_b
01622 #endif
01623 );
01624
01625 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
01626 dst1[i] = src1[srcW-1]*128;
01627 dst2[i] = src2[srcW-1]*128;
01628 }
01629 }
01630 #endif
01631
01632 static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
01633 {
01634 enum PixelFormat dstFormat = c->dstFormat;
01635
01636 c->use_mmx_vfilter= 0;
01637 if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12
01638 && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
01639 if (c->flags & SWS_ACCURATE_RND) {
01640 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01641 switch (c->dstFormat) {
01642 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
01643 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
01644 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
01645 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
01646 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
01647 default: break;
01648 }
01649 }
01650 } else {
01651 c->use_mmx_vfilter= 1;
01652 c->yuv2planeX = RENAME(yuv2yuvX );
01653 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01654 switch (c->dstFormat) {
01655 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
01656 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
01657 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
01658 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
01659 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
01660 default: break;
01661 }
01662 }
01663 }
01664 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01665 switch (c->dstFormat) {
01666 case PIX_FMT_RGB32:
01667 c->yuv2packed1 = RENAME(yuv2rgb32_1);
01668 c->yuv2packed2 = RENAME(yuv2rgb32_2);
01669 break;
01670 case PIX_FMT_BGR24:
01671 c->yuv2packed1 = RENAME(yuv2bgr24_1);
01672 c->yuv2packed2 = RENAME(yuv2bgr24_2);
01673 break;
01674 case PIX_FMT_RGB555:
01675 c->yuv2packed1 = RENAME(yuv2rgb555_1);
01676 c->yuv2packed2 = RENAME(yuv2rgb555_2);
01677 break;
01678 case PIX_FMT_RGB565:
01679 c->yuv2packed1 = RENAME(yuv2rgb565_1);
01680 c->yuv2packed2 = RENAME(yuv2rgb565_2);
01681 break;
01682 case PIX_FMT_YUYV422:
01683 c->yuv2packed1 = RENAME(yuv2yuyv422_1);
01684 c->yuv2packed2 = RENAME(yuv2yuyv422_2);
01685 break;
01686 default:
01687 break;
01688 }
01689 }
01690 }
01691
01692 if (c->srcBpc == 8 && c->dstBpc <= 10) {
01693
01694 #if COMPILE_TEMPLATE_MMX2
01695 if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
01696 {
01697 c->hyscale_fast = RENAME(hyscale_fast);
01698 c->hcscale_fast = RENAME(hcscale_fast);
01699 } else {
01700 #endif
01701 c->hyscale_fast = NULL;
01702 c->hcscale_fast = NULL;
01703 #if COMPILE_TEMPLATE_MMX2
01704 }
01705 #endif
01706 }
01707 }