00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #undef REAL_MOVNTQ
00022 #undef MOVNTQ
00023 #undef PREFETCH
00024
00025 #if COMPILE_TEMPLATE_MMX2
00026 #define PREFETCH "prefetchnta"
00027 #else
00028 #define PREFETCH " # nop"
00029 #endif
00030
00031 #if COMPILE_TEMPLATE_MMX2
00032 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
00033 #else
00034 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
00035 #endif
00036 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
00037
00038 #define YSCALEYUV2YV12X(offset, dest, end, pos) \
00039 __asm__ volatile(\
00040 "movq "DITHER16"+0(%0), %%mm3 \n\t"\
00041 "movq "DITHER16"+8(%0), %%mm4 \n\t"\
00042 "lea " offset "(%0), %%"REG_d" \n\t"\
00043 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00044 ".p2align 4 \n\t" \
00045 "1: \n\t"\
00046 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00047 "movq (%%"REG_S", %3, 2), %%mm2 \n\t" \
00048 "movq 8(%%"REG_S", %3, 2), %%mm5 \n\t" \
00049 "add $16, %%"REG_d" \n\t"\
00050 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00051 "test %%"REG_S", %%"REG_S" \n\t"\
00052 "pmulhw %%mm0, %%mm2 \n\t"\
00053 "pmulhw %%mm0, %%mm5 \n\t"\
00054 "paddw %%mm2, %%mm3 \n\t"\
00055 "paddw %%mm5, %%mm4 \n\t"\
00056 " jnz 1b \n\t"\
00057 "psraw $3, %%mm3 \n\t"\
00058 "psraw $3, %%mm4 \n\t"\
00059 "packuswb %%mm4, %%mm3 \n\t"\
00060 MOVNTQ(%%mm3, (%1, %3))\
00061 "add $8, %3 \n\t"\
00062 "cmp %2, %3 \n\t"\
00063 "movq "DITHER16"+0(%0), %%mm3 \n\t"\
00064 "movq "DITHER16"+8(%0), %%mm4 \n\t"\
00065 "lea " offset "(%0), %%"REG_d" \n\t"\
00066 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00067 "jb 1b \n\t"\
00068 :: "r" (&c->redDither),\
00069 "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
00070 : "%"REG_d, "%"REG_S\
00071 );
00072
00073 static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
00074 const int16_t **lumSrc, int lumFilterSize,
00075 const int16_t *chrFilter, const int16_t **chrUSrc,
00076 const int16_t **chrVSrc,
00077 int chrFilterSize, const int16_t **alpSrc,
00078 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00079 uint8_t *aDest, int dstW, int chrDstW,
00080 const uint8_t *lumDither, const uint8_t *chrDither)
00081 {
00082 int i;
00083 if (uDest) {
00084 x86_reg uv_off = c->uv_off;
00085 for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4;
00086 YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
00087 for(i=0; i<8; i++) c->dither16[i] = chrDither[(i+3)&7]>>4;
00088 YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
00089 }
00090 for(i=0; i<8; i++) c->dither16[i] = lumDither[i]>>4;
00091 if (CONFIG_SWSCALE_ALPHA && aDest) {
00092 YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
00093 }
00094
00095 YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
00096 }
00097
00098 #define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
00099 __asm__ volatile(\
00100 "lea " offset "(%0), %%"REG_d" \n\t"\
00101 "movq "DITHER32"+0(%0), %%mm4 \n\t"\
00102 "movq "DITHER32"+8(%0), %%mm5 \n\t"\
00103 "movq "DITHER32"+16(%0), %%mm6 \n\t"\
00104 "movq "DITHER32"+24(%0), %%mm7 \n\t"\
00105 "pxor %%mm4, %%mm4 \n\t"\
00106 "pxor %%mm5, %%mm5 \n\t"\
00107 "pxor %%mm6, %%mm6 \n\t"\
00108 "pxor %%mm7, %%mm7 \n\t"\
00109 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00110 ".p2align 4 \n\t"\
00111 "1: \n\t"\
00112 "movq (%%"REG_S", %3, 2), %%mm0 \n\t" \
00113 "movq 8(%%"REG_S", %3, 2), %%mm2 \n\t" \
00114 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00115 "movq (%%"REG_S", %3, 2), %%mm1 \n\t" \
00116 "movq %%mm0, %%mm3 \n\t"\
00117 "punpcklwd %%mm1, %%mm0 \n\t"\
00118 "punpckhwd %%mm1, %%mm3 \n\t"\
00119 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" \
00120 "pmaddwd %%mm1, %%mm0 \n\t"\
00121 "pmaddwd %%mm1, %%mm3 \n\t"\
00122 "paddd %%mm0, %%mm4 \n\t"\
00123 "paddd %%mm3, %%mm5 \n\t"\
00124 "movq 8(%%"REG_S", %3, 2), %%mm3 \n\t" \
00125 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00126 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00127 "test %%"REG_S", %%"REG_S" \n\t"\
00128 "movq %%mm2, %%mm0 \n\t"\
00129 "punpcklwd %%mm3, %%mm2 \n\t"\
00130 "punpckhwd %%mm3, %%mm0 \n\t"\
00131 "pmaddwd %%mm1, %%mm2 \n\t"\
00132 "pmaddwd %%mm1, %%mm0 \n\t"\
00133 "paddd %%mm2, %%mm6 \n\t"\
00134 "paddd %%mm0, %%mm7 \n\t"\
00135 " jnz 1b \n\t"\
00136 "psrad $19, %%mm4 \n\t"\
00137 "psrad $19, %%mm5 \n\t"\
00138 "psrad $19, %%mm6 \n\t"\
00139 "psrad $19, %%mm7 \n\t"\
00140 "packssdw %%mm5, %%mm4 \n\t"\
00141 "packssdw %%mm7, %%mm6 \n\t"\
00142 "packuswb %%mm6, %%mm4 \n\t"\
00143 MOVNTQ(%%mm4, (%1, %3))\
00144 "add $8, %3 \n\t"\
00145 "cmp %2, %3 \n\t"\
00146 "lea " offset "(%0), %%"REG_d" \n\t"\
00147 "movq "DITHER32"+0(%0), %%mm4 \n\t"\
00148 "movq "DITHER32"+8(%0), %%mm5 \n\t"\
00149 "movq "DITHER32"+16(%0), %%mm6 \n\t"\
00150 "movq "DITHER32"+24(%0), %%mm7 \n\t"\
00151 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00152 "jb 1b \n\t"\
00153 :: "r" (&c->redDither),\
00154 "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
00155 : "%"REG_a, "%"REG_d, "%"REG_S\
00156 );
00157
00158 static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
00159 const int16_t **lumSrc, int lumFilterSize,
00160 const int16_t *chrFilter, const int16_t **chrUSrc,
00161 const int16_t **chrVSrc,
00162 int chrFilterSize, const int16_t **alpSrc,
00163 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00164 uint8_t *aDest, int dstW, int chrDstW,
00165 const uint8_t *lumDither, const uint8_t *chrDither)
00166 {
00167 int i;
00168 if (uDest) {
00169 x86_reg uv_off = c->uv_off;
00170 for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12;
00171 YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
00172 for(i=0; i<8; i++) c->dither32[i] = chrDither[(i+3)&7]<<12;
00173 YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
00174 }
00175 for(i=0; i<8; i++) c->dither32[i] = lumDither[i]<<12;
00176 if (CONFIG_SWSCALE_ALPHA && aDest) {
00177 YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
00178 }
00179
00180 YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
00181 }
00182
00183 static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
00184 const int16_t *chrUSrc, const int16_t *chrVSrc,
00185 const int16_t *alpSrc,
00186 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00187 uint8_t *aDest, int dstW, int chrDstW,
00188 const uint8_t *lumDither, const uint8_t *chrDither)
00189 {
00190 int p= 4;
00191 const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
00192 uint8_t *dst[4]= { aDest, dest, uDest, vDest };
00193 x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
00194
00195 while (p--) {
00196 if (dst[p]) {
00197 __asm__ volatile(
00198 "mov %2, %%"REG_a" \n\t"
00199 ".p2align 4 \n\t"
00200 "1: \n\t"
00201 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"
00202 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"
00203 "psraw $7, %%mm0 \n\t"
00204 "psraw $7, %%mm1 \n\t"
00205 "packuswb %%mm1, %%mm0 \n\t"
00206 MOVNTQ(%%mm0, (%1, %%REGa))
00207 "add $8, %%"REG_a" \n\t"
00208 "jnc 1b \n\t"
00209 :: "r" (src[p]), "r" (dst[p] + counter[p]),
00210 "g" (-counter[p])
00211 : "%"REG_a
00212 );
00213 }
00214 }
00215 }
00216
00217 static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
00218 const int16_t *chrUSrc, const int16_t *chrVSrc,
00219 const int16_t *alpSrc,
00220 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00221 uint8_t *aDest, int dstW, int chrDstW,
00222 const uint8_t *lumDither, const uint8_t *chrDither)
00223 {
00224 int p= 4;
00225 const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
00226 uint8_t *dst[4]= { aDest, dest, uDest, vDest };
00227 x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
00228
00229 while (p--) {
00230 if (dst[p]) {
00231 int i;
00232 for(i=0; i<8; i++) c->dither16[i] = i<2 ? lumDither[i] : chrDither[i];
00233 __asm__ volatile(
00234 "mov %2, %%"REG_a" \n\t"
00235 "movq 0(%3), %%mm6 \n\t"
00236 "movq 8(%3), %%mm7 \n\t"
00237 ".p2align 4 \n\t"
00238 "1: \n\t"
00239 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"
00240 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"
00241 "paddsw %%mm6, %%mm0 \n\t"
00242 "paddsw %%mm7, %%mm1 \n\t"
00243 "psraw $7, %%mm0 \n\t"
00244 "psraw $7, %%mm1 \n\t"
00245 "packuswb %%mm1, %%mm0 \n\t"
00246 MOVNTQ(%%mm0, (%1, %%REGa))
00247 "add $8, %%"REG_a" \n\t"
00248 "jnc 1b \n\t"
00249 :: "r" (src[p]), "r" (dst[p] + counter[p]),
00250 "g" (-counter[p]), "r"(c->dither16)
00251 : "%"REG_a
00252 );
00253 }
00254 }
00255 }
00256
00257 #define YSCALEYUV2PACKEDX_UV \
00258 __asm__ volatile(\
00259 "xor %%"REG_a", %%"REG_a" \n\t"\
00260 ".p2align 4 \n\t"\
00261 "nop \n\t"\
00262 "1: \n\t"\
00263 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00264 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00265 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00266 "movq %%mm3, %%mm4 \n\t"\
00267 ".p2align 4 \n\t"\
00268 "2: \n\t"\
00269 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00270 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00271 "add %6, %%"REG_S" \n\t" \
00272 "movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" \
00273 "add $16, %%"REG_d" \n\t"\
00274 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00275 "pmulhw %%mm0, %%mm2 \n\t"\
00276 "pmulhw %%mm0, %%mm5 \n\t"\
00277 "paddw %%mm2, %%mm3 \n\t"\
00278 "paddw %%mm5, %%mm4 \n\t"\
00279 "test %%"REG_S", %%"REG_S" \n\t"\
00280 " jnz 2b \n\t"\
00281
00282 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
00283 "lea "offset"(%0), %%"REG_d" \n\t"\
00284 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00285 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
00286 "movq "#dst1", "#dst2" \n\t"\
00287 ".p2align 4 \n\t"\
00288 "2: \n\t"\
00289 "movq 8(%%"REG_d"), "#coeff" \n\t" \
00290 "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" \
00291 "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" \
00292 "add $16, %%"REG_d" \n\t"\
00293 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00294 "pmulhw "#coeff", "#src1" \n\t"\
00295 "pmulhw "#coeff", "#src2" \n\t"\
00296 "paddw "#src1", "#dst1" \n\t"\
00297 "paddw "#src2", "#dst2" \n\t"\
00298 "test %%"REG_S", %%"REG_S" \n\t"\
00299 " jnz 2b \n\t"\
00300
00301 #define YSCALEYUV2PACKEDX \
00302 YSCALEYUV2PACKEDX_UV \
00303 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
00304
00305 #define YSCALEYUV2PACKEDX_END \
00306 :: "r" (&c->redDither), \
00307 "m" (dummy), "m" (dummy), "m" (dummy),\
00308 "r" (dest), "m" (dstW_reg), "m"(uv_off) \
00309 : "%"REG_a, "%"REG_d, "%"REG_S \
00310 );
00311
00312 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
00313 __asm__ volatile(\
00314 "xor %%"REG_a", %%"REG_a" \n\t"\
00315 ".p2align 4 \n\t"\
00316 "nop \n\t"\
00317 "1: \n\t"\
00318 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00319 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00320 "pxor %%mm4, %%mm4 \n\t"\
00321 "pxor %%mm5, %%mm5 \n\t"\
00322 "pxor %%mm6, %%mm6 \n\t"\
00323 "pxor %%mm7, %%mm7 \n\t"\
00324 ".p2align 4 \n\t"\
00325 "2: \n\t"\
00326 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" \
00327 "add %6, %%"REG_S" \n\t" \
00328 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00329 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00330 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" \
00331 "movq %%mm0, %%mm3 \n\t"\
00332 "punpcklwd %%mm1, %%mm0 \n\t"\
00333 "punpckhwd %%mm1, %%mm3 \n\t"\
00334 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" \
00335 "pmaddwd %%mm1, %%mm0 \n\t"\
00336 "pmaddwd %%mm1, %%mm3 \n\t"\
00337 "paddd %%mm0, %%mm4 \n\t"\
00338 "paddd %%mm3, %%mm5 \n\t"\
00339 "add %6, %%"REG_S" \n\t" \
00340 "movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" \
00341 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00342 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00343 "test %%"REG_S", %%"REG_S" \n\t"\
00344 "movq %%mm2, %%mm0 \n\t"\
00345 "punpcklwd %%mm3, %%mm2 \n\t"\
00346 "punpckhwd %%mm3, %%mm0 \n\t"\
00347 "pmaddwd %%mm1, %%mm2 \n\t"\
00348 "pmaddwd %%mm1, %%mm0 \n\t"\
00349 "paddd %%mm2, %%mm6 \n\t"\
00350 "paddd %%mm0, %%mm7 \n\t"\
00351 " jnz 2b \n\t"\
00352 "psrad $16, %%mm4 \n\t"\
00353 "psrad $16, %%mm5 \n\t"\
00354 "psrad $16, %%mm6 \n\t"\
00355 "psrad $16, %%mm7 \n\t"\
00356 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00357 "packssdw %%mm5, %%mm4 \n\t"\
00358 "packssdw %%mm7, %%mm6 \n\t"\
00359 "paddw %%mm0, %%mm4 \n\t"\
00360 "paddw %%mm0, %%mm6 \n\t"\
00361 "movq %%mm4, "U_TEMP"(%0) \n\t"\
00362 "movq %%mm6, "V_TEMP"(%0) \n\t"\
00363
00364 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
00365 "lea "offset"(%0), %%"REG_d" \n\t"\
00366 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00367 "pxor %%mm1, %%mm1 \n\t"\
00368 "pxor %%mm5, %%mm5 \n\t"\
00369 "pxor %%mm7, %%mm7 \n\t"\
00370 "pxor %%mm6, %%mm6 \n\t"\
00371 ".p2align 4 \n\t"\
00372 "2: \n\t"\
00373 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00374 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00375 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00376 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" \
00377 "movq %%mm0, %%mm3 \n\t"\
00378 "punpcklwd %%mm4, %%mm0 \n\t"\
00379 "punpckhwd %%mm4, %%mm3 \n\t"\
00380 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" \
00381 "pmaddwd %%mm4, %%mm0 \n\t"\
00382 "pmaddwd %%mm4, %%mm3 \n\t"\
00383 "paddd %%mm0, %%mm1 \n\t"\
00384 "paddd %%mm3, %%mm5 \n\t"\
00385 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00386 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00387 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00388 "test %%"REG_S", %%"REG_S" \n\t"\
00389 "movq %%mm2, %%mm0 \n\t"\
00390 "punpcklwd %%mm3, %%mm2 \n\t"\
00391 "punpckhwd %%mm3, %%mm0 \n\t"\
00392 "pmaddwd %%mm4, %%mm2 \n\t"\
00393 "pmaddwd %%mm4, %%mm0 \n\t"\
00394 "paddd %%mm2, %%mm7 \n\t"\
00395 "paddd %%mm0, %%mm6 \n\t"\
00396 " jnz 2b \n\t"\
00397 "psrad $16, %%mm1 \n\t"\
00398 "psrad $16, %%mm5 \n\t"\
00399 "psrad $16, %%mm7 \n\t"\
00400 "psrad $16, %%mm6 \n\t"\
00401 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00402 "packssdw %%mm5, %%mm1 \n\t"\
00403 "packssdw %%mm6, %%mm7 \n\t"\
00404 "paddw %%mm0, %%mm1 \n\t"\
00405 "paddw %%mm0, %%mm7 \n\t"\
00406 "movq "U_TEMP"(%0), %%mm3 \n\t"\
00407 "movq "V_TEMP"(%0), %%mm4 \n\t"\
00408
00409 #define YSCALEYUV2PACKEDX_ACCURATE \
00410 YSCALEYUV2PACKEDX_ACCURATE_UV \
00411 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
00412
00413 #define YSCALEYUV2RGBX \
00414 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
00415 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
00416 "movq %%mm3, %%mm2 \n\t" \
00417 "movq %%mm4, %%mm5 \n\t" \
00418 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
00419 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
00420 \
00421 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
00422 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
00423 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
00424 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
00425 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
00426 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
00427 \
00428 "paddw %%mm3, %%mm4 \n\t"\
00429 "movq %%mm2, %%mm0 \n\t"\
00430 "movq %%mm5, %%mm6 \n\t"\
00431 "movq %%mm4, %%mm3 \n\t"\
00432 "punpcklwd %%mm2, %%mm2 \n\t"\
00433 "punpcklwd %%mm5, %%mm5 \n\t"\
00434 "punpcklwd %%mm4, %%mm4 \n\t"\
00435 "paddw %%mm1, %%mm2 \n\t"\
00436 "paddw %%mm1, %%mm5 \n\t"\
00437 "paddw %%mm1, %%mm4 \n\t"\
00438 "punpckhwd %%mm0, %%mm0 \n\t"\
00439 "punpckhwd %%mm6, %%mm6 \n\t"\
00440 "punpckhwd %%mm3, %%mm3 \n\t"\
00441 "paddw %%mm7, %%mm0 \n\t"\
00442 "paddw %%mm7, %%mm6 \n\t"\
00443 "paddw %%mm7, %%mm3 \n\t"\
00444 \
00445 "packuswb %%mm0, %%mm2 \n\t"\
00446 "packuswb %%mm6, %%mm5 \n\t"\
00447 "packuswb %%mm3, %%mm4 \n\t"\
00448
00449 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
00450 "movq "#b", "#q2" \n\t" \
00451 "movq "#r", "#t" \n\t" \
00452 "punpcklbw "#g", "#b" \n\t" \
00453 "punpcklbw "#a", "#r" \n\t" \
00454 "punpckhbw "#g", "#q2" \n\t" \
00455 "punpckhbw "#a", "#t" \n\t" \
00456 "movq "#b", "#q0" \n\t" \
00457 "movq "#q2", "#q3" \n\t" \
00458 "punpcklwd "#r", "#q0" \n\t" \
00459 "punpckhwd "#r", "#b" \n\t" \
00460 "punpcklwd "#t", "#q2" \n\t" \
00461 "punpckhwd "#t", "#q3" \n\t" \
00462 \
00463 MOVNTQ( q0, (dst, index, 4))\
00464 MOVNTQ( b, 8(dst, index, 4))\
00465 MOVNTQ( q2, 16(dst, index, 4))\
00466 MOVNTQ( q3, 24(dst, index, 4))\
00467 \
00468 "add $8, "#index" \n\t"\
00469 "cmp "#dstw", "#index" \n\t"\
00470 " jb 1b \n\t"
00471 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
00472
00473 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
00474 const int16_t **lumSrc, int lumFilterSize,
00475 const int16_t *chrFilter, const int16_t **chrUSrc,
00476 const int16_t **chrVSrc,
00477 int chrFilterSize, const int16_t **alpSrc,
00478 uint8_t *dest, int dstW, int dstY)
00479 {
00480 x86_reg dummy=0;
00481 x86_reg dstW_reg = dstW;
00482 x86_reg uv_off = c->uv_off << 1;
00483
00484 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00485 YSCALEYUV2PACKEDX_ACCURATE
00486 YSCALEYUV2RGBX
00487 "movq %%mm2, "U_TEMP"(%0) \n\t"
00488 "movq %%mm4, "V_TEMP"(%0) \n\t"
00489 "movq %%mm5, "Y_TEMP"(%0) \n\t"
00490 YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
00491 "movq "Y_TEMP"(%0), %%mm5 \n\t"
00492 "psraw $3, %%mm1 \n\t"
00493 "psraw $3, %%mm7 \n\t"
00494 "packuswb %%mm7, %%mm1 \n\t"
00495 WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
00496 YSCALEYUV2PACKEDX_END
00497 } else {
00498 YSCALEYUV2PACKEDX_ACCURATE
00499 YSCALEYUV2RGBX
00500 "pcmpeqd %%mm7, %%mm7 \n\t"
00501 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00502 YSCALEYUV2PACKEDX_END
00503 }
00504 }
00505
00506 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
00507 const int16_t **lumSrc, int lumFilterSize,
00508 const int16_t *chrFilter, const int16_t **chrUSrc,
00509 const int16_t **chrVSrc,
00510 int chrFilterSize, const int16_t **alpSrc,
00511 uint8_t *dest, int dstW, int dstY)
00512 {
00513 x86_reg dummy=0;
00514 x86_reg dstW_reg = dstW;
00515 x86_reg uv_off = c->uv_off << 1;
00516
00517 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00518 YSCALEYUV2PACKEDX
00519 YSCALEYUV2RGBX
00520 YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
00521 "psraw $3, %%mm1 \n\t"
00522 "psraw $3, %%mm7 \n\t"
00523 "packuswb %%mm7, %%mm1 \n\t"
00524 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00525 YSCALEYUV2PACKEDX_END
00526 } else {
00527 YSCALEYUV2PACKEDX
00528 YSCALEYUV2RGBX
00529 "pcmpeqd %%mm7, %%mm7 \n\t"
00530 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00531 YSCALEYUV2PACKEDX_END
00532 }
00533 }
00534
00535 #define REAL_WRITERGB16(dst, dstw, index) \
00536 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00537 "pand "MANGLE(bFC)", %%mm4 \n\t" \
00538 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00539 "psrlq $3, %%mm2 \n\t"\
00540 \
00541 "movq %%mm2, %%mm1 \n\t"\
00542 "movq %%mm4, %%mm3 \n\t"\
00543 \
00544 "punpcklbw %%mm7, %%mm3 \n\t"\
00545 "punpcklbw %%mm5, %%mm2 \n\t"\
00546 "punpckhbw %%mm7, %%mm4 \n\t"\
00547 "punpckhbw %%mm5, %%mm1 \n\t"\
00548 \
00549 "psllq $3, %%mm3 \n\t"\
00550 "psllq $3, %%mm4 \n\t"\
00551 \
00552 "por %%mm3, %%mm2 \n\t"\
00553 "por %%mm4, %%mm1 \n\t"\
00554 \
00555 MOVNTQ(%%mm2, (dst, index, 2))\
00556 MOVNTQ(%%mm1, 8(dst, index, 2))\
00557 \
00558 "add $8, "#index" \n\t"\
00559 "cmp "#dstw", "#index" \n\t"\
00560 " jb 1b \n\t"
00561 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
00562
00563 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
00564 const int16_t **lumSrc, int lumFilterSize,
00565 const int16_t *chrFilter, const int16_t **chrUSrc,
00566 const int16_t **chrVSrc,
00567 int chrFilterSize, const int16_t **alpSrc,
00568 uint8_t *dest, int dstW, int dstY)
00569 {
00570 x86_reg dummy=0;
00571 x86_reg dstW_reg = dstW;
00572 x86_reg uv_off = c->uv_off << 1;
00573
00574 YSCALEYUV2PACKEDX_ACCURATE
00575 YSCALEYUV2RGBX
00576 "pxor %%mm7, %%mm7 \n\t"
00577
00578 #ifdef DITHER1XBPP
00579 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00580 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00581 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00582 #endif
00583 WRITERGB16(%4, %5, %%REGa)
00584 YSCALEYUV2PACKEDX_END
00585 }
00586
00587 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
00588 const int16_t **lumSrc, int lumFilterSize,
00589 const int16_t *chrFilter, const int16_t **chrUSrc,
00590 const int16_t **chrVSrc,
00591 int chrFilterSize, const int16_t **alpSrc,
00592 uint8_t *dest, int dstW, int dstY)
00593 {
00594 x86_reg dummy=0;
00595 x86_reg dstW_reg = dstW;
00596 x86_reg uv_off = c->uv_off << 1;
00597
00598 YSCALEYUV2PACKEDX
00599 YSCALEYUV2RGBX
00600 "pxor %%mm7, %%mm7 \n\t"
00601
00602 #ifdef DITHER1XBPP
00603 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00604 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00605 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00606 #endif
00607 WRITERGB16(%4, %5, %%REGa)
00608 YSCALEYUV2PACKEDX_END
00609 }
00610
00611 #define REAL_WRITERGB15(dst, dstw, index) \
00612 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00613 "pand "MANGLE(bF8)", %%mm4 \n\t" \
00614 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00615 "psrlq $3, %%mm2 \n\t"\
00616 "psrlq $1, %%mm5 \n\t"\
00617 \
00618 "movq %%mm2, %%mm1 \n\t"\
00619 "movq %%mm4, %%mm3 \n\t"\
00620 \
00621 "punpcklbw %%mm7, %%mm3 \n\t"\
00622 "punpcklbw %%mm5, %%mm2 \n\t"\
00623 "punpckhbw %%mm7, %%mm4 \n\t"\
00624 "punpckhbw %%mm5, %%mm1 \n\t"\
00625 \
00626 "psllq $2, %%mm3 \n\t"\
00627 "psllq $2, %%mm4 \n\t"\
00628 \
00629 "por %%mm3, %%mm2 \n\t"\
00630 "por %%mm4, %%mm1 \n\t"\
00631 \
00632 MOVNTQ(%%mm2, (dst, index, 2))\
00633 MOVNTQ(%%mm1, 8(dst, index, 2))\
00634 \
00635 "add $8, "#index" \n\t"\
00636 "cmp "#dstw", "#index" \n\t"\
00637 " jb 1b \n\t"
00638 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
00639
00640 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
00641 const int16_t **lumSrc, int lumFilterSize,
00642 const int16_t *chrFilter, const int16_t **chrUSrc,
00643 const int16_t **chrVSrc,
00644 int chrFilterSize, const int16_t **alpSrc,
00645 uint8_t *dest, int dstW, int dstY)
00646 {
00647 x86_reg dummy=0;
00648 x86_reg dstW_reg = dstW;
00649 x86_reg uv_off = c->uv_off << 1;
00650
00651 YSCALEYUV2PACKEDX_ACCURATE
00652 YSCALEYUV2RGBX
00653 "pxor %%mm7, %%mm7 \n\t"
00654
00655 #ifdef DITHER1XBPP
00656 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00657 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00658 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00659 #endif
00660 WRITERGB15(%4, %5, %%REGa)
00661 YSCALEYUV2PACKEDX_END
00662 }
00663
00664 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
00665 const int16_t **lumSrc, int lumFilterSize,
00666 const int16_t *chrFilter, const int16_t **chrUSrc,
00667 const int16_t **chrVSrc,
00668 int chrFilterSize, const int16_t **alpSrc,
00669 uint8_t *dest, int dstW, int dstY)
00670 {
00671 x86_reg dummy=0;
00672 x86_reg dstW_reg = dstW;
00673 x86_reg uv_off = c->uv_off << 1;
00674
00675 YSCALEYUV2PACKEDX
00676 YSCALEYUV2RGBX
00677 "pxor %%mm7, %%mm7 \n\t"
00678
00679 #ifdef DITHER1XBPP
00680 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00681 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00682 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00683 #endif
00684 WRITERGB15(%4, %5, %%REGa)
00685 YSCALEYUV2PACKEDX_END
00686 }
00687
00688 #define WRITEBGR24MMX(dst, dstw, index) \
00689 \
00690 "movq %%mm2, %%mm1 \n\t" \
00691 "movq %%mm5, %%mm6 \n\t" \
00692 "punpcklbw %%mm4, %%mm2 \n\t" \
00693 "punpcklbw %%mm7, %%mm5 \n\t" \
00694 "punpckhbw %%mm4, %%mm1 \n\t" \
00695 "punpckhbw %%mm7, %%mm6 \n\t" \
00696 "movq %%mm2, %%mm0 \n\t" \
00697 "movq %%mm1, %%mm3 \n\t" \
00698 "punpcklwd %%mm5, %%mm0 \n\t" \
00699 "punpckhwd %%mm5, %%mm2 \n\t" \
00700 "punpcklwd %%mm6, %%mm1 \n\t" \
00701 "punpckhwd %%mm6, %%mm3 \n\t" \
00702 \
00703 "movq %%mm0, %%mm4 \n\t" \
00704 "movq %%mm2, %%mm6 \n\t" \
00705 "movq %%mm1, %%mm5 \n\t" \
00706 "movq %%mm3, %%mm7 \n\t" \
00707 \
00708 "psllq $40, %%mm0 \n\t" \
00709 "psllq $40, %%mm2 \n\t" \
00710 "psllq $40, %%mm1 \n\t" \
00711 "psllq $40, %%mm3 \n\t" \
00712 \
00713 "punpckhdq %%mm4, %%mm0 \n\t" \
00714 "punpckhdq %%mm6, %%mm2 \n\t" \
00715 "punpckhdq %%mm5, %%mm1 \n\t" \
00716 "punpckhdq %%mm7, %%mm3 \n\t" \
00717 \
00718 "psrlq $8, %%mm0 \n\t" \
00719 "movq %%mm2, %%mm6 \n\t" \
00720 "psllq $40, %%mm2 \n\t" \
00721 "por %%mm2, %%mm0 \n\t" \
00722 MOVNTQ(%%mm0, (dst))\
00723 \
00724 "psrlq $24, %%mm6 \n\t" \
00725 "movq %%mm1, %%mm5 \n\t" \
00726 "psllq $24, %%mm1 \n\t" \
00727 "por %%mm1, %%mm6 \n\t" \
00728 MOVNTQ(%%mm6, 8(dst))\
00729 \
00730 "psrlq $40, %%mm5 \n\t" \
00731 "psllq $8, %%mm3 \n\t" \
00732 "por %%mm3, %%mm5 \n\t" \
00733 MOVNTQ(%%mm5, 16(dst))\
00734 \
00735 "add $24, "#dst" \n\t"\
00736 \
00737 "add $8, "#index" \n\t"\
00738 "cmp "#dstw", "#index" \n\t"\
00739 " jb 1b \n\t"
00740
00741 #define WRITEBGR24MMX2(dst, dstw, index) \
00742 \
00743 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
00744 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
00745 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
00746 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
00747 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
00748 \
00749 "pand %%mm0, %%mm1 \n\t" \
00750 "pand %%mm0, %%mm3 \n\t" \
00751 "pand %%mm7, %%mm6 \n\t" \
00752 \
00753 "psllq $8, %%mm3 \n\t" \
00754 "por %%mm1, %%mm6 \n\t"\
00755 "por %%mm3, %%mm6 \n\t"\
00756 MOVNTQ(%%mm6, (dst))\
00757 \
00758 "psrlq $8, %%mm4 \n\t" \
00759 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
00760 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
00761 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
00762 \
00763 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" \
00764 "pand %%mm7, %%mm3 \n\t" \
00765 "pand %%mm0, %%mm6 \n\t" \
00766 \
00767 "por %%mm1, %%mm3 \n\t" \
00768 "por %%mm3, %%mm6 \n\t"\
00769 MOVNTQ(%%mm6, 8(dst))\
00770 \
00771 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
00772 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
00773 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
00774 \
00775 "pand %%mm7, %%mm1 \n\t" \
00776 "pand %%mm0, %%mm3 \n\t" \
00777 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" \
00778 \
00779 "por %%mm1, %%mm3 \n\t"\
00780 "por %%mm3, %%mm6 \n\t"\
00781 MOVNTQ(%%mm6, 16(dst))\
00782 \
00783 "add $24, "#dst" \n\t"\
00784 \
00785 "add $8, "#index" \n\t"\
00786 "cmp "#dstw", "#index" \n\t"\
00787 " jb 1b \n\t"
00788
00789 #if COMPILE_TEMPLATE_MMX2
00790 #undef WRITEBGR24
00791 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
00792 #else
00793 #undef WRITEBGR24
00794 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
00795 #endif
00796
00797 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
00798 const int16_t **lumSrc, int lumFilterSize,
00799 const int16_t *chrFilter, const int16_t **chrUSrc,
00800 const int16_t **chrVSrc,
00801 int chrFilterSize, const int16_t **alpSrc,
00802 uint8_t *dest, int dstW, int dstY)
00803 {
00804 x86_reg dummy=0;
00805 x86_reg dstW_reg = dstW;
00806 x86_reg uv_off = c->uv_off << 1;
00807
00808 YSCALEYUV2PACKEDX_ACCURATE
00809 YSCALEYUV2RGBX
00810 "pxor %%mm7, %%mm7 \n\t"
00811 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t"
00812 "add %4, %%"REG_c" \n\t"
00813 WRITEBGR24(%%REGc, %5, %%REGa)
00814 :: "r" (&c->redDither),
00815 "m" (dummy), "m" (dummy), "m" (dummy),
00816 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00817 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00818 );
00819 }
00820
00821 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
00822 const int16_t **lumSrc, int lumFilterSize,
00823 const int16_t *chrFilter, const int16_t **chrUSrc,
00824 const int16_t **chrVSrc,
00825 int chrFilterSize, const int16_t **alpSrc,
00826 uint8_t *dest, int dstW, int dstY)
00827 {
00828 x86_reg dummy=0;
00829 x86_reg dstW_reg = dstW;
00830 x86_reg uv_off = c->uv_off << 1;
00831
00832 YSCALEYUV2PACKEDX
00833 YSCALEYUV2RGBX
00834 "pxor %%mm7, %%mm7 \n\t"
00835 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t"
00836 "add %4, %%"REG_c" \n\t"
00837 WRITEBGR24(%%REGc, %5, %%REGa)
00838 :: "r" (&c->redDither),
00839 "m" (dummy), "m" (dummy), "m" (dummy),
00840 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00841 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00842 );
00843 }
00844
00845 #define REAL_WRITEYUY2(dst, dstw, index) \
00846 "packuswb %%mm3, %%mm3 \n\t"\
00847 "packuswb %%mm4, %%mm4 \n\t"\
00848 "packuswb %%mm7, %%mm1 \n\t"\
00849 "punpcklbw %%mm4, %%mm3 \n\t"\
00850 "movq %%mm1, %%mm7 \n\t"\
00851 "punpcklbw %%mm3, %%mm1 \n\t"\
00852 "punpckhbw %%mm3, %%mm7 \n\t"\
00853 \
00854 MOVNTQ(%%mm1, (dst, index, 2))\
00855 MOVNTQ(%%mm7, 8(dst, index, 2))\
00856 \
00857 "add $8, "#index" \n\t"\
00858 "cmp "#dstw", "#index" \n\t"\
00859 " jb 1b \n\t"
00860 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
00861
00862 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
00863 const int16_t **lumSrc, int lumFilterSize,
00864 const int16_t *chrFilter, const int16_t **chrUSrc,
00865 const int16_t **chrVSrc,
00866 int chrFilterSize, const int16_t **alpSrc,
00867 uint8_t *dest, int dstW, int dstY)
00868 {
00869 x86_reg dummy=0;
00870 x86_reg dstW_reg = dstW;
00871 x86_reg uv_off = c->uv_off << 1;
00872
00873 YSCALEYUV2PACKEDX_ACCURATE
00874
00875 "psraw $3, %%mm3 \n\t"
00876 "psraw $3, %%mm4 \n\t"
00877 "psraw $3, %%mm1 \n\t"
00878 "psraw $3, %%mm7 \n\t"
00879 WRITEYUY2(%4, %5, %%REGa)
00880 YSCALEYUV2PACKEDX_END
00881 }
00882
00883 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
00884 const int16_t **lumSrc, int lumFilterSize,
00885 const int16_t *chrFilter, const int16_t **chrUSrc,
00886 const int16_t **chrVSrc,
00887 int chrFilterSize, const int16_t **alpSrc,
00888 uint8_t *dest, int dstW, int dstY)
00889 {
00890 x86_reg dummy=0;
00891 x86_reg dstW_reg = dstW;
00892 x86_reg uv_off = c->uv_off << 1;
00893
00894 YSCALEYUV2PACKEDX
00895
00896 "psraw $3, %%mm3 \n\t"
00897 "psraw $3, %%mm4 \n\t"
00898 "psraw $3, %%mm1 \n\t"
00899 "psraw $3, %%mm7 \n\t"
00900 WRITEYUY2(%4, %5, %%REGa)
00901 YSCALEYUV2PACKEDX_END
00902 }
00903
00904 #define REAL_YSCALEYUV2RGB_UV(index, c) \
00905 "xor "#index", "#index" \n\t"\
00906 ".p2align 4 \n\t"\
00907 "1: \n\t"\
00908 "movq (%2, "#index"), %%mm2 \n\t" \
00909 "movq (%3, "#index"), %%mm3 \n\t" \
00910 "add "UV_OFFx2"("#c"), "#index" \n\t" \
00911 "movq (%2, "#index"), %%mm5 \n\t" \
00912 "movq (%3, "#index"), %%mm4 \n\t" \
00913 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
00914 "psubw %%mm3, %%mm2 \n\t" \
00915 "psubw %%mm4, %%mm5 \n\t" \
00916 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00917 "pmulhw %%mm0, %%mm2 \n\t" \
00918 "pmulhw %%mm0, %%mm5 \n\t" \
00919 "psraw $4, %%mm3 \n\t" \
00920 "psraw $4, %%mm4 \n\t" \
00921 "paddw %%mm2, %%mm3 \n\t" \
00922 "paddw %%mm5, %%mm4 \n\t" \
00923 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00924 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00925 "movq %%mm3, %%mm2 \n\t" \
00926 "movq %%mm4, %%mm5 \n\t" \
00927 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00928 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00929 \
00930
00931 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
00932 "movq ("#b1", "#index", 2), %%mm0 \n\t" \
00933 "movq ("#b2", "#index", 2), %%mm1 \n\t" \
00934 "movq 8("#b1", "#index", 2), %%mm6 \n\t" \
00935 "movq 8("#b2", "#index", 2), %%mm7 \n\t" \
00936 "psubw %%mm1, %%mm0 \n\t" \
00937 "psubw %%mm7, %%mm6 \n\t" \
00938 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00939 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00940 "psraw $4, %%mm1 \n\t" \
00941 "psraw $4, %%mm7 \n\t" \
00942 "paddw %%mm0, %%mm1 \n\t" \
00943 "paddw %%mm6, %%mm7 \n\t" \
00944
00945 #define REAL_YSCALEYUV2RGB_COEFF(c) \
00946 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00947 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00948 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00949 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00950 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00951 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00952 \
00953 "paddw %%mm3, %%mm4 \n\t"\
00954 "movq %%mm2, %%mm0 \n\t"\
00955 "movq %%mm5, %%mm6 \n\t"\
00956 "movq %%mm4, %%mm3 \n\t"\
00957 "punpcklwd %%mm2, %%mm2 \n\t"\
00958 "punpcklwd %%mm5, %%mm5 \n\t"\
00959 "punpcklwd %%mm4, %%mm4 \n\t"\
00960 "paddw %%mm1, %%mm2 \n\t"\
00961 "paddw %%mm1, %%mm5 \n\t"\
00962 "paddw %%mm1, %%mm4 \n\t"\
00963 "punpckhwd %%mm0, %%mm0 \n\t"\
00964 "punpckhwd %%mm6, %%mm6 \n\t"\
00965 "punpckhwd %%mm3, %%mm3 \n\t"\
00966 "paddw %%mm7, %%mm0 \n\t"\
00967 "paddw %%mm7, %%mm6 \n\t"\
00968 "paddw %%mm7, %%mm3 \n\t"\
00969 \
00970 "packuswb %%mm0, %%mm2 \n\t"\
00971 "packuswb %%mm6, %%mm5 \n\t"\
00972 "packuswb %%mm3, %%mm4 \n\t"\
00973
00974 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
00975
00976 #define YSCALEYUV2RGB(index, c) \
00977 REAL_YSCALEYUV2RGB_UV(index, c) \
00978 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
00979 REAL_YSCALEYUV2RGB_COEFF(c)
00980
00984 static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
00985 const uint16_t *buf1, const uint16_t *ubuf0,
00986 const uint16_t *ubuf1, const uint16_t *vbuf0,
00987 const uint16_t *vbuf1, const uint16_t *abuf0,
00988 const uint16_t *abuf1, uint8_t *dest,
00989 int dstW, int yalpha, int uvalpha, int y)
00990 {
00991 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00992 #if ARCH_X86_64
00993 __asm__ volatile(
00994 YSCALEYUV2RGB(%%r8, %5)
00995 YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
00996 "psraw $3, %%mm1 \n\t"
00997 "psraw $3, %%mm7 \n\t"
00998 "packuswb %%mm7, %%mm1 \n\t"
00999 WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
01000 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
01001 "a" (&c->redDither),
01002 "r" (abuf0), "r" (abuf1)
01003 : "%r8"
01004 );
01005 #else
01006 c->u_temp=(intptr_t)abuf0;
01007 c->v_temp=(intptr_t)abuf1;
01008 __asm__ volatile(
01009 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01010 "mov %4, %%"REG_b" \n\t"
01011 "push %%"REG_BP" \n\t"
01012 YSCALEYUV2RGB(%%REGBP, %5)
01013 "push %0 \n\t"
01014 "push %1 \n\t"
01015 "mov "U_TEMP"(%5), %0 \n\t"
01016 "mov "V_TEMP"(%5), %1 \n\t"
01017 YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
01018 "psraw $3, %%mm1 \n\t"
01019 "psraw $3, %%mm7 \n\t"
01020 "packuswb %%mm7, %%mm1 \n\t"
01021 "pop %1 \n\t"
01022 "pop %0 \n\t"
01023 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
01024 "pop %%"REG_BP" \n\t"
01025 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01026 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01027 "a" (&c->redDither)
01028 );
01029 #endif
01030 } else {
01031 __asm__ volatile(
01032 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01033 "mov %4, %%"REG_b" \n\t"
01034 "push %%"REG_BP" \n\t"
01035 YSCALEYUV2RGB(%%REGBP, %5)
01036 "pcmpeqd %%mm7, %%mm7 \n\t"
01037 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01038 "pop %%"REG_BP" \n\t"
01039 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01040 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01041 "a" (&c->redDither)
01042 );
01043 }
01044 }
01045
01046 static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
01047 const uint16_t *buf1, const uint16_t *ubuf0,
01048 const uint16_t *ubuf1, const uint16_t *vbuf0,
01049 const uint16_t *vbuf1, const uint16_t *abuf0,
01050 const uint16_t *abuf1, uint8_t *dest,
01051 int dstW, int yalpha, int uvalpha, int y)
01052 {
01053
01054 __asm__ volatile(
01055 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01056 "mov %4, %%"REG_b" \n\t"
01057 "push %%"REG_BP" \n\t"
01058 YSCALEYUV2RGB(%%REGBP, %5)
01059 "pxor %%mm7, %%mm7 \n\t"
01060 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01061 "pop %%"REG_BP" \n\t"
01062 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01063 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01064 "a" (&c->redDither)
01065 );
01066 }
01067
01068 static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
01069 const uint16_t *buf1, const uint16_t *ubuf0,
01070 const uint16_t *ubuf1, const uint16_t *vbuf0,
01071 const uint16_t *vbuf1, const uint16_t *abuf0,
01072 const uint16_t *abuf1, uint8_t *dest,
01073 int dstW, int yalpha, int uvalpha, int y)
01074 {
01075
01076 __asm__ volatile(
01077 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01078 "mov %4, %%"REG_b" \n\t"
01079 "push %%"REG_BP" \n\t"
01080 YSCALEYUV2RGB(%%REGBP, %5)
01081 "pxor %%mm7, %%mm7 \n\t"
01082
01083 #ifdef DITHER1XBPP
01084 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01085 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01086 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01087 #endif
01088 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01089 "pop %%"REG_BP" \n\t"
01090 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01091 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01092 "a" (&c->redDither)
01093 );
01094 }
01095
01096 static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
01097 const uint16_t *buf1, const uint16_t *ubuf0,
01098 const uint16_t *ubuf1, const uint16_t *vbuf0,
01099 const uint16_t *vbuf1, const uint16_t *abuf0,
01100 const uint16_t *abuf1, uint8_t *dest,
01101 int dstW, int yalpha, int uvalpha, int y)
01102 {
01103
01104 __asm__ volatile(
01105 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01106 "mov %4, %%"REG_b" \n\t"
01107 "push %%"REG_BP" \n\t"
01108 YSCALEYUV2RGB(%%REGBP, %5)
01109 "pxor %%mm7, %%mm7 \n\t"
01110
01111 #ifdef DITHER1XBPP
01112 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01113 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01114 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01115 #endif
01116 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01117 "pop %%"REG_BP" \n\t"
01118 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01119 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01120 "a" (&c->redDither)
01121 );
01122 }
01123
01124 #define REAL_YSCALEYUV2PACKED(index, c) \
01125 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
01126 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
01127 "psraw $3, %%mm0 \n\t"\
01128 "psraw $3, %%mm1 \n\t"\
01129 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
01130 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
01131 "xor "#index", "#index" \n\t"\
01132 ".p2align 4 \n\t"\
01133 "1: \n\t"\
01134 "movq (%2, "#index"), %%mm2 \n\t" \
01135 "movq (%3, "#index"), %%mm3 \n\t" \
01136 "add "UV_OFFx2"("#c"), "#index" \n\t" \
01137 "movq (%2, "#index"), %%mm5 \n\t" \
01138 "movq (%3, "#index"), %%mm4 \n\t" \
01139 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01140 "psubw %%mm3, %%mm2 \n\t" \
01141 "psubw %%mm4, %%mm5 \n\t" \
01142 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
01143 "pmulhw %%mm0, %%mm2 \n\t" \
01144 "pmulhw %%mm0, %%mm5 \n\t" \
01145 "psraw $7, %%mm3 \n\t" \
01146 "psraw $7, %%mm4 \n\t" \
01147 "paddw %%mm2, %%mm3 \n\t" \
01148 "paddw %%mm5, %%mm4 \n\t" \
01149 "movq (%0, "#index", 2), %%mm0 \n\t" \
01150 "movq (%1, "#index", 2), %%mm1 \n\t" \
01151 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
01152 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
01153 "psubw %%mm1, %%mm0 \n\t" \
01154 "psubw %%mm7, %%mm6 \n\t" \
01155 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
01156 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
01157 "psraw $7, %%mm1 \n\t" \
01158 "psraw $7, %%mm7 \n\t" \
01159 "paddw %%mm0, %%mm1 \n\t" \
01160 "paddw %%mm6, %%mm7 \n\t" \
01161
01162 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
01163
01164 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
01165 const uint16_t *buf1, const uint16_t *ubuf0,
01166 const uint16_t *ubuf1, const uint16_t *vbuf0,
01167 const uint16_t *vbuf1, const uint16_t *abuf0,
01168 const uint16_t *abuf1, uint8_t *dest,
01169 int dstW, int yalpha, int uvalpha, int y)
01170 {
01171
01172 __asm__ volatile(
01173 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01174 "mov %4, %%"REG_b" \n\t"
01175 "push %%"REG_BP" \n\t"
01176 YSCALEYUV2PACKED(%%REGBP, %5)
01177 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01178 "pop %%"REG_BP" \n\t"
01179 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01180 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01181 "a" (&c->redDither)
01182 );
01183 }
01184
01185 #define REAL_YSCALEYUV2RGB1(index, c) \
01186 "xor "#index", "#index" \n\t"\
01187 ".p2align 4 \n\t"\
01188 "1: \n\t"\
01189 "movq (%2, "#index"), %%mm3 \n\t" \
01190 "add "UV_OFFx2"("#c"), "#index" \n\t" \
01191 "movq (%2, "#index"), %%mm4 \n\t" \
01192 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01193 "psraw $4, %%mm3 \n\t" \
01194 "psraw $4, %%mm4 \n\t" \
01195 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
01196 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
01197 "movq %%mm3, %%mm2 \n\t" \
01198 "movq %%mm4, %%mm5 \n\t" \
01199 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
01200 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
01201 \
01202 "movq (%0, "#index", 2), %%mm1 \n\t" \
01203 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01204 "psraw $4, %%mm1 \n\t" \
01205 "psraw $4, %%mm7 \n\t" \
01206 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
01207 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
01208 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
01209 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
01210 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
01211 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
01212 \
01213 "paddw %%mm3, %%mm4 \n\t"\
01214 "movq %%mm2, %%mm0 \n\t"\
01215 "movq %%mm5, %%mm6 \n\t"\
01216 "movq %%mm4, %%mm3 \n\t"\
01217 "punpcklwd %%mm2, %%mm2 \n\t"\
01218 "punpcklwd %%mm5, %%mm5 \n\t"\
01219 "punpcklwd %%mm4, %%mm4 \n\t"\
01220 "paddw %%mm1, %%mm2 \n\t"\
01221 "paddw %%mm1, %%mm5 \n\t"\
01222 "paddw %%mm1, %%mm4 \n\t"\
01223 "punpckhwd %%mm0, %%mm0 \n\t"\
01224 "punpckhwd %%mm6, %%mm6 \n\t"\
01225 "punpckhwd %%mm3, %%mm3 \n\t"\
01226 "paddw %%mm7, %%mm0 \n\t"\
01227 "paddw %%mm7, %%mm6 \n\t"\
01228 "paddw %%mm7, %%mm3 \n\t"\
01229 \
01230 "packuswb %%mm0, %%mm2 \n\t"\
01231 "packuswb %%mm6, %%mm5 \n\t"\
01232 "packuswb %%mm3, %%mm4 \n\t"\
01233
01234 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
01235
01236
01237 #define REAL_YSCALEYUV2RGB1b(index, c) \
01238 "xor "#index", "#index" \n\t"\
01239 ".p2align 4 \n\t"\
01240 "1: \n\t"\
01241 "movq (%2, "#index"), %%mm2 \n\t" \
01242 "movq (%3, "#index"), %%mm3 \n\t" \
01243 "add "UV_OFFx2"("#c"), "#index" \n\t" \
01244 "movq (%2, "#index"), %%mm5 \n\t" \
01245 "movq (%3, "#index"), %%mm4 \n\t" \
01246 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01247 "paddw %%mm2, %%mm3 \n\t" \
01248 "paddw %%mm5, %%mm4 \n\t" \
01249 "psrlw $5, %%mm3 \n\t" \
01250 "psrlw $5, %%mm4 \n\t" \
01251 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
01252 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
01253 "movq %%mm3, %%mm2 \n\t" \
01254 "movq %%mm4, %%mm5 \n\t" \
01255 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
01256 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
01257 \
01258 "movq (%0, "#index", 2), %%mm1 \n\t" \
01259 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01260 "psraw $4, %%mm1 \n\t" \
01261 "psraw $4, %%mm7 \n\t" \
01262 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
01263 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
01264 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
01265 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
01266 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
01267 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
01268 \
01269 "paddw %%mm3, %%mm4 \n\t"\
01270 "movq %%mm2, %%mm0 \n\t"\
01271 "movq %%mm5, %%mm6 \n\t"\
01272 "movq %%mm4, %%mm3 \n\t"\
01273 "punpcklwd %%mm2, %%mm2 \n\t"\
01274 "punpcklwd %%mm5, %%mm5 \n\t"\
01275 "punpcklwd %%mm4, %%mm4 \n\t"\
01276 "paddw %%mm1, %%mm2 \n\t"\
01277 "paddw %%mm1, %%mm5 \n\t"\
01278 "paddw %%mm1, %%mm4 \n\t"\
01279 "punpckhwd %%mm0, %%mm0 \n\t"\
01280 "punpckhwd %%mm6, %%mm6 \n\t"\
01281 "punpckhwd %%mm3, %%mm3 \n\t"\
01282 "paddw %%mm7, %%mm0 \n\t"\
01283 "paddw %%mm7, %%mm6 \n\t"\
01284 "paddw %%mm7, %%mm3 \n\t"\
01285 \
01286 "packuswb %%mm0, %%mm2 \n\t"\
01287 "packuswb %%mm6, %%mm5 \n\t"\
01288 "packuswb %%mm3, %%mm4 \n\t"\
01289
01290 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
01291
01292 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
01293 "movq (%1, "#index", 2), %%mm7 \n\t" \
01294 "movq 8(%1, "#index", 2), %%mm1 \n\t" \
01295 "psraw $7, %%mm7 \n\t" \
01296 "psraw $7, %%mm1 \n\t" \
01297 "packuswb %%mm1, %%mm7 \n\t"
01298 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
01299
01303 static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
01304 const uint16_t *ubuf0, const uint16_t *ubuf1,
01305 const uint16_t *vbuf0, const uint16_t *vbuf1,
01306 const uint16_t *abuf0, uint8_t *dest,
01307 int dstW, int uvalpha, enum PixelFormat dstFormat,
01308 int flags, int y)
01309 {
01310 const uint16_t *buf1= buf0;
01311
01312 if (uvalpha < 2048) {
01313 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01314 __asm__ volatile(
01315 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01316 "mov %4, %%"REG_b" \n\t"
01317 "push %%"REG_BP" \n\t"
01318 YSCALEYUV2RGB1(%%REGBP, %5)
01319 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01320 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01321 "pop %%"REG_BP" \n\t"
01322 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01323 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01324 "a" (&c->redDither)
01325 );
01326 } else {
01327 __asm__ volatile(
01328 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01329 "mov %4, %%"REG_b" \n\t"
01330 "push %%"REG_BP" \n\t"
01331 YSCALEYUV2RGB1(%%REGBP, %5)
01332 "pcmpeqd %%mm7, %%mm7 \n\t"
01333 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01334 "pop %%"REG_BP" \n\t"
01335 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01336 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01337 "a" (&c->redDither)
01338 );
01339 }
01340 } else {
01341 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01342 __asm__ volatile(
01343 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01344 "mov %4, %%"REG_b" \n\t"
01345 "push %%"REG_BP" \n\t"
01346 YSCALEYUV2RGB1b(%%REGBP, %5)
01347 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01348 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01349 "pop %%"REG_BP" \n\t"
01350 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01351 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01352 "a" (&c->redDither)
01353 );
01354 } else {
01355 __asm__ volatile(
01356 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01357 "mov %4, %%"REG_b" \n\t"
01358 "push %%"REG_BP" \n\t"
01359 YSCALEYUV2RGB1b(%%REGBP, %5)
01360 "pcmpeqd %%mm7, %%mm7 \n\t"
01361 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01362 "pop %%"REG_BP" \n\t"
01363 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01364 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01365 "a" (&c->redDither)
01366 );
01367 }
01368 }
01369 }
01370
01371 static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
01372 const uint16_t *ubuf0, const uint16_t *ubuf1,
01373 const uint16_t *vbuf0, const uint16_t *vbuf1,
01374 const uint16_t *abuf0, uint8_t *dest,
01375 int dstW, int uvalpha, enum PixelFormat dstFormat,
01376 int flags, int y)
01377 {
01378 const uint16_t *buf1= buf0;
01379
01380 if (uvalpha < 2048) {
01381 __asm__ volatile(
01382 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01383 "mov %4, %%"REG_b" \n\t"
01384 "push %%"REG_BP" \n\t"
01385 YSCALEYUV2RGB1(%%REGBP, %5)
01386 "pxor %%mm7, %%mm7 \n\t"
01387 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01388 "pop %%"REG_BP" \n\t"
01389 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01390 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01391 "a" (&c->redDither)
01392 );
01393 } else {
01394 __asm__ volatile(
01395 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01396 "mov %4, %%"REG_b" \n\t"
01397 "push %%"REG_BP" \n\t"
01398 YSCALEYUV2RGB1b(%%REGBP, %5)
01399 "pxor %%mm7, %%mm7 \n\t"
01400 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01401 "pop %%"REG_BP" \n\t"
01402 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01403 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01404 "a" (&c->redDither)
01405 );
01406 }
01407 }
01408
01409 static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
01410 const uint16_t *ubuf0, const uint16_t *ubuf1,
01411 const uint16_t *vbuf0, const uint16_t *vbuf1,
01412 const uint16_t *abuf0, uint8_t *dest,
01413 int dstW, int uvalpha, enum PixelFormat dstFormat,
01414 int flags, int y)
01415 {
01416 const uint16_t *buf1= buf0;
01417
01418 if (uvalpha < 2048) {
01419 __asm__ volatile(
01420 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01421 "mov %4, %%"REG_b" \n\t"
01422 "push %%"REG_BP" \n\t"
01423 YSCALEYUV2RGB1(%%REGBP, %5)
01424 "pxor %%mm7, %%mm7 \n\t"
01425
01426 #ifdef DITHER1XBPP
01427 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01428 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01429 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01430 #endif
01431 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01432 "pop %%"REG_BP" \n\t"
01433 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01434 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01435 "a" (&c->redDither)
01436 );
01437 } else {
01438 __asm__ volatile(
01439 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01440 "mov %4, %%"REG_b" \n\t"
01441 "push %%"REG_BP" \n\t"
01442 YSCALEYUV2RGB1b(%%REGBP, %5)
01443 "pxor %%mm7, %%mm7 \n\t"
01444
01445 #ifdef DITHER1XBPP
01446 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01447 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01448 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01449 #endif
01450 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01451 "pop %%"REG_BP" \n\t"
01452 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01453 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01454 "a" (&c->redDither)
01455 );
01456 }
01457 }
01458
01459 static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
01460 const uint16_t *ubuf0, const uint16_t *ubuf1,
01461 const uint16_t *vbuf0, const uint16_t *vbuf1,
01462 const uint16_t *abuf0, uint8_t *dest,
01463 int dstW, int uvalpha, enum PixelFormat dstFormat,
01464 int flags, int y)
01465 {
01466 const uint16_t *buf1= buf0;
01467
01468 if (uvalpha < 2048) {
01469 __asm__ volatile(
01470 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01471 "mov %4, %%"REG_b" \n\t"
01472 "push %%"REG_BP" \n\t"
01473 YSCALEYUV2RGB1(%%REGBP, %5)
01474 "pxor %%mm7, %%mm7 \n\t"
01475
01476 #ifdef DITHER1XBPP
01477 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01478 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01479 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01480 #endif
01481 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01482 "pop %%"REG_BP" \n\t"
01483 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01484 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01485 "a" (&c->redDither)
01486 );
01487 } else {
01488 __asm__ volatile(
01489 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01490 "mov %4, %%"REG_b" \n\t"
01491 "push %%"REG_BP" \n\t"
01492 YSCALEYUV2RGB1b(%%REGBP, %5)
01493 "pxor %%mm7, %%mm7 \n\t"
01494
01495 #ifdef DITHER1XBPP
01496 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01497 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01498 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01499 #endif
01500 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01501 "pop %%"REG_BP" \n\t"
01502 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01503 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01504 "a" (&c->redDither)
01505 );
01506 }
01507 }
01508
01509 #define REAL_YSCALEYUV2PACKED1(index, c) \
01510 "xor "#index", "#index" \n\t"\
01511 ".p2align 4 \n\t"\
01512 "1: \n\t"\
01513 "movq (%2, "#index"), %%mm3 \n\t" \
01514 "add "UV_OFFx2"("#c"), "#index" \n\t" \
01515 "movq (%2, "#index"), %%mm4 \n\t" \
01516 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01517 "psraw $7, %%mm3 \n\t" \
01518 "psraw $7, %%mm4 \n\t" \
01519 "movq (%0, "#index", 2), %%mm1 \n\t" \
01520 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01521 "psraw $7, %%mm1 \n\t" \
01522 "psraw $7, %%mm7 \n\t" \
01523
01524 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
01525
01526 #define REAL_YSCALEYUV2PACKED1b(index, c) \
01527 "xor "#index", "#index" \n\t"\
01528 ".p2align 4 \n\t"\
01529 "1: \n\t"\
01530 "movq (%2, "#index"), %%mm2 \n\t" \
01531 "movq (%3, "#index"), %%mm3 \n\t" \
01532 "add "UV_OFFx2"("#c"), "#index" \n\t" \
01533 "movq (%2, "#index"), %%mm5 \n\t" \
01534 "movq (%3, "#index"), %%mm4 \n\t" \
01535 "sub "UV_OFFx2"("#c"), "#index" \n\t" \
01536 "paddw %%mm2, %%mm3 \n\t" \
01537 "paddw %%mm5, %%mm4 \n\t" \
01538 "psrlw $8, %%mm3 \n\t" \
01539 "psrlw $8, %%mm4 \n\t" \
01540 "movq (%0, "#index", 2), %%mm1 \n\t" \
01541 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01542 "psraw $7, %%mm1 \n\t" \
01543 "psraw $7, %%mm7 \n\t"
01544 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
01545
01546 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
01547 const uint16_t *ubuf0, const uint16_t *ubuf1,
01548 const uint16_t *vbuf0, const uint16_t *vbuf1,
01549 const uint16_t *abuf0, uint8_t *dest,
01550 int dstW, int uvalpha, enum PixelFormat dstFormat,
01551 int flags, int y)
01552 {
01553 const uint16_t *buf1= buf0;
01554
01555 if (uvalpha < 2048) {
01556 __asm__ volatile(
01557 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01558 "mov %4, %%"REG_b" \n\t"
01559 "push %%"REG_BP" \n\t"
01560 YSCALEYUV2PACKED1(%%REGBP, %5)
01561 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01562 "pop %%"REG_BP" \n\t"
01563 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01564 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01565 "a" (&c->redDither)
01566 );
01567 } else {
01568 __asm__ volatile(
01569 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01570 "mov %4, %%"REG_b" \n\t"
01571 "push %%"REG_BP" \n\t"
01572 YSCALEYUV2PACKED1b(%%REGBP, %5)
01573 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01574 "pop %%"REG_BP" \n\t"
01575 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01576 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01577 "a" (&c->redDither)
01578 );
01579 }
01580 }
01581
01582 #if !COMPILE_TEMPLATE_MMX2
01583
01584
01585 static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src,
01586 int width, uint32_t *unused)
01587 {
01588 __asm__ volatile(
01589 "movq "MANGLE(bm01010101)", %%mm2 \n\t"
01590 "mov %0, %%"REG_a" \n\t"
01591 "1: \n\t"
01592 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01593 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01594 "pand %%mm2, %%mm0 \n\t"
01595 "pand %%mm2, %%mm1 \n\t"
01596 "packuswb %%mm1, %%mm0 \n\t"
01597 "movq %%mm0, (%2, %%"REG_a") \n\t"
01598 "add $8, %%"REG_a" \n\t"
01599 " js 1b \n\t"
01600 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
01601 : "%"REG_a
01602 );
01603 }
01604
01605 static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
01606 const uint8_t *src1, const uint8_t *src2,
01607 int width, uint32_t *unused)
01608 {
01609 __asm__ volatile(
01610 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01611 "mov %0, %%"REG_a" \n\t"
01612 "1: \n\t"
01613 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01614 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01615 "psrlw $8, %%mm0 \n\t"
01616 "psrlw $8, %%mm1 \n\t"
01617 "packuswb %%mm1, %%mm0 \n\t"
01618 "movq %%mm0, %%mm1 \n\t"
01619 "psrlw $8, %%mm0 \n\t"
01620 "pand %%mm4, %%mm1 \n\t"
01621 "packuswb %%mm0, %%mm0 \n\t"
01622 "packuswb %%mm1, %%mm1 \n\t"
01623 "movd %%mm0, (%3, %%"REG_a") \n\t"
01624 "movd %%mm1, (%2, %%"REG_a") \n\t"
01625 "add $4, %%"REG_a" \n\t"
01626 " js 1b \n\t"
01627 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01628 : "%"REG_a
01629 );
01630 assert(src1 == src2);
01631 }
01632
01633 static void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV,
01634 const uint8_t *src1, const uint8_t *src2,
01635 int width, uint32_t *unused)
01636 {
01637 __asm__ volatile(
01638 "mov %0, %%"REG_a" \n\t"
01639 "1: \n\t"
01640 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01641 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01642 "movq (%2, %%"REG_a",2), %%mm2 \n\t"
01643 "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
01644 "psrlw $8, %%mm0 \n\t"
01645 "psrlw $8, %%mm1 \n\t"
01646 "psrlw $8, %%mm2 \n\t"
01647 "psrlw $8, %%mm3 \n\t"
01648 "packuswb %%mm1, %%mm0 \n\t"
01649 "packuswb %%mm3, %%mm2 \n\t"
01650 "movq %%mm0, (%3, %%"REG_a") \n\t"
01651 "movq %%mm2, (%4, %%"REG_a") \n\t"
01652 "add $8, %%"REG_a" \n\t"
01653 " js 1b \n\t"
01654 : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
01655 : "%"REG_a
01656 );
01657 }
01658
01659
01660
01661 static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src,
01662 int width, uint32_t *unused)
01663 {
01664 __asm__ volatile(
01665 "mov %0, %%"REG_a" \n\t"
01666 "1: \n\t"
01667 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01668 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01669 "psrlw $8, %%mm0 \n\t"
01670 "psrlw $8, %%mm1 \n\t"
01671 "packuswb %%mm1, %%mm0 \n\t"
01672 "movq %%mm0, (%2, %%"REG_a") \n\t"
01673 "add $8, %%"REG_a" \n\t"
01674 " js 1b \n\t"
01675 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
01676 : "%"REG_a
01677 );
01678 }
01679
01680 static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV,
01681 const uint8_t *src1, const uint8_t *src2,
01682 int width, uint32_t *unused)
01683 {
01684 __asm__ volatile(
01685 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01686 "mov %0, %%"REG_a" \n\t"
01687 "1: \n\t"
01688 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01689 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01690 "pand %%mm4, %%mm0 \n\t"
01691 "pand %%mm4, %%mm1 \n\t"
01692 "packuswb %%mm1, %%mm0 \n\t"
01693 "movq %%mm0, %%mm1 \n\t"
01694 "psrlw $8, %%mm0 \n\t"
01695 "pand %%mm4, %%mm1 \n\t"
01696 "packuswb %%mm0, %%mm0 \n\t"
01697 "packuswb %%mm1, %%mm1 \n\t"
01698 "movd %%mm0, (%3, %%"REG_a") \n\t"
01699 "movd %%mm1, (%2, %%"REG_a") \n\t"
01700 "add $4, %%"REG_a" \n\t"
01701 " js 1b \n\t"
01702 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01703 : "%"REG_a
01704 );
01705 assert(src1 == src2);
01706 }
01707
01708 static void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV,
01709 const uint8_t *src1, const uint8_t *src2,
01710 int width, uint32_t *unused)
01711 {
01712 __asm__ volatile(
01713 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01714 "mov %0, %%"REG_a" \n\t"
01715 "1: \n\t"
01716 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01717 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01718 "movq (%2, %%"REG_a",2), %%mm2 \n\t"
01719 "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
01720 "pand %%mm4, %%mm0 \n\t"
01721 "pand %%mm4, %%mm1 \n\t"
01722 "pand %%mm4, %%mm2 \n\t"
01723 "pand %%mm4, %%mm3 \n\t"
01724 "packuswb %%mm1, %%mm0 \n\t"
01725 "packuswb %%mm3, %%mm2 \n\t"
01726 "movq %%mm0, (%3, %%"REG_a") \n\t"
01727 "movq %%mm2, (%4, %%"REG_a") \n\t"
01728 "add $8, %%"REG_a" \n\t"
01729 " js 1b \n\t"
01730 : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
01731 : "%"REG_a
01732 );
01733 }
01734
01735 static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
01736 const uint8_t *src, int width)
01737 {
01738 __asm__ volatile(
01739 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01740 "mov %0, %%"REG_a" \n\t"
01741 "1: \n\t"
01742 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01743 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01744 "movq %%mm0, %%mm2 \n\t"
01745 "movq %%mm1, %%mm3 \n\t"
01746 "pand %%mm4, %%mm0 \n\t"
01747 "pand %%mm4, %%mm1 \n\t"
01748 "psrlw $8, %%mm2 \n\t"
01749 "psrlw $8, %%mm3 \n\t"
01750 "packuswb %%mm1, %%mm0 \n\t"
01751 "packuswb %%mm3, %%mm2 \n\t"
01752 "movq %%mm0, (%2, %%"REG_a") \n\t"
01753 "movq %%mm2, (%3, %%"REG_a") \n\t"
01754 "add $8, %%"REG_a" \n\t"
01755 " js 1b \n\t"
01756 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
01757 : "%"REG_a
01758 );
01759 }
01760
01761 static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
01762 const uint8_t *src1, const uint8_t *src2,
01763 int width, uint32_t *unused)
01764 {
01765 RENAME(nvXXtoUV)(dstU, dstV, src1, width);
01766 }
01767
01768 static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
01769 const uint8_t *src1, const uint8_t *src2,
01770 int width, uint32_t *unused)
01771 {
01772 RENAME(nvXXtoUV)(dstV, dstU, src1, width);
01773 }
01774 #endif
01775
01776 static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src,
01777 int width, enum PixelFormat srcFormat)
01778 {
01779
01780 if(srcFormat == PIX_FMT_BGR24) {
01781 __asm__ volatile(
01782 "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
01783 "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
01784 :
01785 );
01786 } else {
01787 __asm__ volatile(
01788 "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
01789 "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
01790 :
01791 );
01792 }
01793
01794 __asm__ volatile(
01795 "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t"
01796 "mov %2, %%"REG_a" \n\t"
01797 "pxor %%mm7, %%mm7 \n\t"
01798 "1: \n\t"
01799 PREFETCH" 64(%0) \n\t"
01800 "movd (%0), %%mm0 \n\t"
01801 "movd 2(%0), %%mm1 \n\t"
01802 "movd 6(%0), %%mm2 \n\t"
01803 "movd 8(%0), %%mm3 \n\t"
01804 "add $12, %0 \n\t"
01805 "punpcklbw %%mm7, %%mm0 \n\t"
01806 "punpcklbw %%mm7, %%mm1 \n\t"
01807 "punpcklbw %%mm7, %%mm2 \n\t"
01808 "punpcklbw %%mm7, %%mm3 \n\t"
01809 "pmaddwd %%mm5, %%mm0 \n\t"
01810 "pmaddwd %%mm6, %%mm1 \n\t"
01811 "pmaddwd %%mm5, %%mm2 \n\t"
01812 "pmaddwd %%mm6, %%mm3 \n\t"
01813 "paddd %%mm1, %%mm0 \n\t"
01814 "paddd %%mm3, %%mm2 \n\t"
01815 "paddd %%mm4, %%mm0 \n\t"
01816 "paddd %%mm4, %%mm2 \n\t"
01817 "psrad $9, %%mm0 \n\t"
01818 "psrad $9, %%mm2 \n\t"
01819 "packssdw %%mm2, %%mm0 \n\t"
01820 "movq %%mm0, (%1, %%"REG_a") \n\t"
01821 "add $8, %%"REG_a" \n\t"
01822 " js 1b \n\t"
01823 : "+r" (src)
01824 : "r" (dst+width), "g" ((x86_reg)-2*width)
01825 : "%"REG_a
01826 );
01827 }
01828
01829 static void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src,
01830 int width, uint32_t *unused)
01831 {
01832 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
01833 }
01834
01835 static void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src,
01836 int width, uint32_t *unused)
01837 {
01838 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
01839 }
01840
01841 static av_always_inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV,
01842 const uint8_t *src, int width,
01843 enum PixelFormat srcFormat)
01844 {
01845 __asm__ volatile(
01846 "movq 24(%4), %%mm6 \n\t"
01847 "mov %3, %%"REG_a" \n\t"
01848 "pxor %%mm7, %%mm7 \n\t"
01849 "1: \n\t"
01850 PREFETCH" 64(%0) \n\t"
01851 "movd (%0), %%mm0 \n\t"
01852 "movd 2(%0), %%mm1 \n\t"
01853 "punpcklbw %%mm7, %%mm0 \n\t"
01854 "punpcklbw %%mm7, %%mm1 \n\t"
01855 "movq %%mm0, %%mm2 \n\t"
01856 "movq %%mm1, %%mm3 \n\t"
01857 "pmaddwd (%4), %%mm0 \n\t"
01858 "pmaddwd 8(%4), %%mm1 \n\t"
01859 "pmaddwd 16(%4), %%mm2 \n\t"
01860 "pmaddwd %%mm6, %%mm3 \n\t"
01861 "paddd %%mm1, %%mm0 \n\t"
01862 "paddd %%mm3, %%mm2 \n\t"
01863
01864 "movd 6(%0), %%mm1 \n\t"
01865 "movd 8(%0), %%mm3 \n\t"
01866 "add $12, %0 \n\t"
01867 "punpcklbw %%mm7, %%mm1 \n\t"
01868 "punpcklbw %%mm7, %%mm3 \n\t"
01869 "movq %%mm1, %%mm4 \n\t"
01870 "movq %%mm3, %%mm5 \n\t"
01871 "pmaddwd (%4), %%mm1 \n\t"
01872 "pmaddwd 8(%4), %%mm3 \n\t"
01873 "pmaddwd 16(%4), %%mm4 \n\t"
01874 "pmaddwd %%mm6, %%mm5 \n\t"
01875 "paddd %%mm3, %%mm1 \n\t"
01876 "paddd %%mm5, %%mm4 \n\t"
01877
01878 "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t"
01879 "paddd %%mm3, %%mm0 \n\t"
01880 "paddd %%mm3, %%mm2 \n\t"
01881 "paddd %%mm3, %%mm1 \n\t"
01882 "paddd %%mm3, %%mm4 \n\t"
01883 "psrad $9, %%mm0 \n\t"
01884 "psrad $9, %%mm2 \n\t"
01885 "psrad $9, %%mm1 \n\t"
01886 "psrad $9, %%mm4 \n\t"
01887 "packssdw %%mm1, %%mm0 \n\t"
01888 "packssdw %%mm4, %%mm2 \n\t"
01889 "movq %%mm0, (%1, %%"REG_a") \n\t"
01890 "movq %%mm2, (%2, %%"REG_a") \n\t"
01891 "add $8, %%"REG_a" \n\t"
01892 " js 1b \n\t"
01893 : "+r" (src)
01894 : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
01895 : "%"REG_a
01896 );
01897 }
01898
01899 static void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV,
01900 const uint8_t *src1, const uint8_t *src2,
01901 int width, uint32_t *unused)
01902 {
01903 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
01904 assert(src1 == src2);
01905 }
01906
01907 static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV,
01908 const uint8_t *src1, const uint8_t *src2,
01909 int width, uint32_t *unused)
01910 {
01911 assert(src1==src2);
01912 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
01913 }
01914
01915 #if !COMPILE_TEMPLATE_MMX2
01916
01917 static void RENAME(hScale)(int16_t *dst, int dstW,
01918 const uint8_t *src, int srcW,
01919 int xInc, const int16_t *filter,
01920 const int16_t *filterPos, int filterSize)
01921 {
01922 assert(filterSize % 4 == 0 && filterSize>0);
01923 if (filterSize==4) {
01924 x86_reg counter= -2*dstW;
01925 filter-= counter*2;
01926 filterPos-= counter/2;
01927 dst-= counter/2;
01928 __asm__ volatile(
01929 #if defined(PIC)
01930 "push %%"REG_b" \n\t"
01931 #endif
01932 "pxor %%mm7, %%mm7 \n\t"
01933 "push %%"REG_BP" \n\t"
01934 "mov %%"REG_a", %%"REG_BP" \n\t"
01935 ".p2align 4 \n\t"
01936 "1: \n\t"
01937 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
01938 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
01939 "movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
01940 "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
01941 "movd (%3, %%"REG_a"), %%mm0 \n\t"
01942 "movd (%3, %%"REG_b"), %%mm2 \n\t"
01943 "punpcklbw %%mm7, %%mm0 \n\t"
01944 "punpcklbw %%mm7, %%mm2 \n\t"
01945 "pmaddwd %%mm1, %%mm0 \n\t"
01946 "pmaddwd %%mm2, %%mm3 \n\t"
01947 "movq %%mm0, %%mm4 \n\t"
01948 "punpckldq %%mm3, %%mm0 \n\t"
01949 "punpckhdq %%mm3, %%mm4 \n\t"
01950 "paddd %%mm4, %%mm0 \n\t"
01951 "psrad $7, %%mm0 \n\t"
01952 "packssdw %%mm0, %%mm0 \n\t"
01953 "movd %%mm0, (%4, %%"REG_BP") \n\t"
01954 "add $4, %%"REG_BP" \n\t"
01955 " jnc 1b \n\t"
01956
01957 "pop %%"REG_BP" \n\t"
01958 #if defined(PIC)
01959 "pop %%"REG_b" \n\t"
01960 #endif
01961 : "+a" (counter)
01962 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
01963 #if !defined(PIC)
01964 : "%"REG_b
01965 #endif
01966 );
01967 } else if (filterSize==8) {
01968 x86_reg counter= -2*dstW;
01969 filter-= counter*4;
01970 filterPos-= counter/2;
01971 dst-= counter/2;
01972 __asm__ volatile(
01973 #if defined(PIC)
01974 "push %%"REG_b" \n\t"
01975 #endif
01976 "pxor %%mm7, %%mm7 \n\t"
01977 "push %%"REG_BP" \n\t"
01978 "mov %%"REG_a", %%"REG_BP" \n\t"
01979 ".p2align 4 \n\t"
01980 "1: \n\t"
01981 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
01982 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
01983 "movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
01984 "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
01985 "movd (%3, %%"REG_a"), %%mm0 \n\t"
01986 "movd (%3, %%"REG_b"), %%mm2 \n\t"
01987 "punpcklbw %%mm7, %%mm0 \n\t"
01988 "punpcklbw %%mm7, %%mm2 \n\t"
01989 "pmaddwd %%mm1, %%mm0 \n\t"
01990 "pmaddwd %%mm2, %%mm3 \n\t"
01991
01992 "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
01993 "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
01994 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
01995 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
01996 "punpcklbw %%mm7, %%mm4 \n\t"
01997 "punpcklbw %%mm7, %%mm2 \n\t"
01998 "pmaddwd %%mm1, %%mm4 \n\t"
01999 "pmaddwd %%mm2, %%mm5 \n\t"
02000 "paddd %%mm4, %%mm0 \n\t"
02001 "paddd %%mm5, %%mm3 \n\t"
02002 "movq %%mm0, %%mm4 \n\t"
02003 "punpckldq %%mm3, %%mm0 \n\t"
02004 "punpckhdq %%mm3, %%mm4 \n\t"
02005 "paddd %%mm4, %%mm0 \n\t"
02006 "psrad $7, %%mm0 \n\t"
02007 "packssdw %%mm0, %%mm0 \n\t"
02008 "movd %%mm0, (%4, %%"REG_BP") \n\t"
02009 "add $4, %%"REG_BP" \n\t"
02010 " jnc 1b \n\t"
02011
02012 "pop %%"REG_BP" \n\t"
02013 #if defined(PIC)
02014 "pop %%"REG_b" \n\t"
02015 #endif
02016 : "+a" (counter)
02017 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
02018 #if !defined(PIC)
02019 : "%"REG_b
02020 #endif
02021 );
02022 } else {
02023 const uint8_t *offset = src+filterSize;
02024 x86_reg counter= -2*dstW;
02025
02026 filterPos-= counter/2;
02027 dst-= counter/2;
02028 __asm__ volatile(
02029 "pxor %%mm7, %%mm7 \n\t"
02030 ".p2align 4 \n\t"
02031 "1: \n\t"
02032 "mov %2, %%"REG_c" \n\t"
02033 "movzwl (%%"REG_c", %0), %%eax \n\t"
02034 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
02035 "mov %5, %%"REG_c" \n\t"
02036 "pxor %%mm4, %%mm4 \n\t"
02037 "pxor %%mm5, %%mm5 \n\t"
02038 "2: \n\t"
02039 "movq (%1), %%mm1 \n\t"
02040 "movq (%1, %6), %%mm3 \n\t"
02041 "movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t"
02042 "movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t"
02043 "punpcklbw %%mm7, %%mm0 \n\t"
02044 "punpcklbw %%mm7, %%mm2 \n\t"
02045 "pmaddwd %%mm1, %%mm0 \n\t"
02046 "pmaddwd %%mm2, %%mm3 \n\t"
02047 "paddd %%mm3, %%mm5 \n\t"
02048 "paddd %%mm0, %%mm4 \n\t"
02049 "add $8, %1 \n\t"
02050 "add $4, %%"REG_c" \n\t"
02051 "cmp %4, %%"REG_c" \n\t"
02052 " jb 2b \n\t"
02053 "add %6, %1 \n\t"
02054 "movq %%mm4, %%mm0 \n\t"
02055 "punpckldq %%mm5, %%mm4 \n\t"
02056 "punpckhdq %%mm5, %%mm0 \n\t"
02057 "paddd %%mm0, %%mm4 \n\t"
02058 "psrad $7, %%mm4 \n\t"
02059 "packssdw %%mm4, %%mm4 \n\t"
02060 "mov %3, %%"REG_a" \n\t"
02061 "movd %%mm4, (%%"REG_a", %0) \n\t"
02062 "add $4, %0 \n\t"
02063 " jnc 1b \n\t"
02064
02065 : "+r" (counter), "+r" (filter)
02066 : "m" (filterPos), "m" (dst), "m"(offset),
02067 "m" (src), "r" ((x86_reg)filterSize*2)
02068 : "%"REG_a, "%"REG_c, "%"REG_d
02069 );
02070 }
02071 }
02072 #endif
02073
02074 static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
02075 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
02076 {
02077 int i, j;
02078
02079 assert(filterSize % 4 == 0 && filterSize>0);
02080 if (filterSize==4 && shift<15) {
02081 x86_reg counter= -2*dstW;
02082 filter-= counter*2;
02083 filterPos-= counter/2;
02084 dst-= counter/2;
02085 __asm__ volatile(
02086 "movd %5, %%mm7 \n\t"
02087 #if defined(PIC)
02088 "push %%"REG_b" \n\t"
02089 #endif
02090 "push %%"REG_BP" \n\t"
02091 "mov %%"REG_a", %%"REG_BP" \n\t"
02092 ".p2align 4 \n\t"
02093 "1: \n\t"
02094 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
02095 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
02096 "movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
02097 "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
02098 "movq (%3, %%"REG_a", 2), %%mm0 \n\t"
02099 "movq (%3, %%"REG_b", 2), %%mm2 \n\t"
02100 "pmaddwd %%mm1, %%mm0 \n\t"
02101 "pmaddwd %%mm2, %%mm3 \n\t"
02102 "movq %%mm0, %%mm4 \n\t"
02103 "punpckldq %%mm3, %%mm0 \n\t"
02104 "punpckhdq %%mm3, %%mm4 \n\t"
02105 "paddd %%mm4, %%mm0 \n\t"
02106 "psrad %%mm7, %%mm0 \n\t"
02107 "packssdw %%mm0, %%mm0 \n\t"
02108 "movd %%mm0, (%4, %%"REG_BP") \n\t"
02109 "add $4, %%"REG_BP" \n\t"
02110 " jnc 1b \n\t"
02111
02112 "pop %%"REG_BP" \n\t"
02113 #if defined(PIC)
02114 "pop %%"REG_b" \n\t"
02115 #endif
02116 : "+a" (counter)
02117 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift)
02118 #if !defined(PIC)
02119 : "%"REG_b
02120 #endif
02121 );
02122 } else if (filterSize==8 && shift<15) {
02123 x86_reg counter= -2*dstW;
02124 filter-= counter*4;
02125 filterPos-= counter/2;
02126 dst-= counter/2;
02127 __asm__ volatile(
02128 "movd %5, %%mm7 \n\t"
02129 #if defined(PIC)
02130 "push %%"REG_b" \n\t"
02131 #endif
02132 "push %%"REG_BP" \n\t"
02133 "mov %%"REG_a", %%"REG_BP" \n\t"
02134 ".p2align 4 \n\t"
02135 "1: \n\t"
02136 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
02137 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
02138 "movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
02139 "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
02140 "movq (%3, %%"REG_a", 2), %%mm0 \n\t"
02141 "movq (%3, %%"REG_b", 2), %%mm2 \n\t"
02142 "pmaddwd %%mm1, %%mm0 \n\t"
02143 "pmaddwd %%mm2, %%mm3 \n\t"
02144
02145 "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
02146 "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
02147 "movq 8(%3, %%"REG_a", 2), %%mm4 \n\t"
02148 "movq 8(%3, %%"REG_b", 2), %%mm2 \n\t"
02149 "pmaddwd %%mm1, %%mm4 \n\t"
02150 "pmaddwd %%mm2, %%mm5 \n\t"
02151 "paddd %%mm4, %%mm0 \n\t"
02152 "paddd %%mm5, %%mm3 \n\t"
02153 "movq %%mm0, %%mm4 \n\t"
02154 "punpckldq %%mm3, %%mm0 \n\t"
02155 "punpckhdq %%mm3, %%mm4 \n\t"
02156 "paddd %%mm4, %%mm0 \n\t"
02157 "psrad %%mm7, %%mm0 \n\t"
02158 "packssdw %%mm0, %%mm0 \n\t"
02159 "movd %%mm0, (%4, %%"REG_BP") \n\t"
02160 "add $4, %%"REG_BP" \n\t"
02161 " jnc 1b \n\t"
02162
02163 "pop %%"REG_BP" \n\t"
02164 #if defined(PIC)
02165 "pop %%"REG_b" \n\t"
02166 #endif
02167 : "+a" (counter)
02168 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift)
02169 #if !defined(PIC)
02170 : "%"REG_b
02171 #endif
02172 );
02173 } else if (shift<15){
02174 const uint16_t *offset = src+filterSize;
02175 x86_reg counter= -2*dstW;
02176
02177 filterPos-= counter/2;
02178 dst-= counter/2;
02179 __asm__ volatile(
02180 "movd %7, %%mm7 \n\t"
02181 ".p2align 4 \n\t"
02182 "1: \n\t"
02183 "mov %2, %%"REG_c" \n\t"
02184 "movzwl (%%"REG_c", %0), %%eax \n\t"
02185 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
02186 "mov %5, %%"REG_c" \n\t"
02187 "pxor %%mm4, %%mm4 \n\t"
02188 "pxor %%mm5, %%mm5 \n\t"
02189 "2: \n\t"
02190 "movq (%1), %%mm1 \n\t"
02191 "movq (%1, %6), %%mm3 \n\t"
02192 "movq (%%"REG_c", %%"REG_a", 2), %%mm0 \n\t"
02193 "movq (%%"REG_c", %%"REG_d", 2), %%mm2 \n\t"
02194 "pmaddwd %%mm1, %%mm0 \n\t"
02195 "pmaddwd %%mm2, %%mm3 \n\t"
02196 "paddd %%mm3, %%mm5 \n\t"
02197 "paddd %%mm0, %%mm4 \n\t"
02198 "add $8, %1 \n\t"
02199 "add $8, %%"REG_c" \n\t"
02200 "cmp %4, %%"REG_c" \n\t"
02201 " jb 2b \n\t"
02202 "add %6, %1 \n\t"
02203 "movq %%mm4, %%mm0 \n\t"
02204 "punpckldq %%mm5, %%mm4 \n\t"
02205 "punpckhdq %%mm5, %%mm0 \n\t"
02206 "paddd %%mm0, %%mm4 \n\t"
02207 "psrad %%mm7, %%mm4 \n\t"
02208 "packssdw %%mm4, %%mm4 \n\t"
02209 "mov %3, %%"REG_a" \n\t"
02210 "movd %%mm4, (%%"REG_a", %0) \n\t"
02211 "add $4, %0 \n\t"
02212 " jnc 1b \n\t"
02213
02214 : "+r" (counter), "+r" (filter)
02215 : "m" (filterPos), "m" (dst), "m"(offset),
02216 "m" (src), "r" ((x86_reg)filterSize*2), "m"(shift)
02217 : "%"REG_a, "%"REG_c, "%"REG_d
02218 );
02219 } else
02220 for (i=0; i<dstW; i++) {
02221 int srcPos= filterPos[i];
02222 int val=0;
02223 for (j=0; j<filterSize; j++) {
02224 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
02225 }
02226 dst[i] = FFMIN(val>>shift, (1<<15)-1);
02227 }
02228 }
02229
02230
02231 #if COMPILE_TEMPLATE_MMX2
02232 static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
02233 int dstWidth, const uint8_t *src,
02234 int srcW, int xInc)
02235 {
02236 int16_t *filterPos = c->hLumFilterPos;
02237 int16_t *filter = c->hLumFilter;
02238 void *mmx2FilterCode= c->lumMmx2FilterCode;
02239 int i;
02240 #if defined(PIC)
02241 uint64_t ebxsave;
02242 #endif
02243 #if ARCH_X86_64
02244 uint64_t retsave;
02245 #endif
02246
02247 __asm__ volatile(
02248 #if defined(PIC)
02249 "mov %%"REG_b", %5 \n\t"
02250 #if ARCH_X86_64
02251 "mov -8(%%rsp), %%"REG_a" \n\t"
02252 "mov %%"REG_a", %6 \n\t"
02253 #endif
02254 #else
02255 #if ARCH_X86_64
02256 "mov -8(%%rsp), %%"REG_a" \n\t"
02257 "mov %%"REG_a", %5 \n\t"
02258 #endif
02259 #endif
02260 "pxor %%mm7, %%mm7 \n\t"
02261 "mov %0, %%"REG_c" \n\t"
02262 "mov %1, %%"REG_D" \n\t"
02263 "mov %2, %%"REG_d" \n\t"
02264 "mov %3, %%"REG_b" \n\t"
02265 "xor %%"REG_a", %%"REG_a" \n\t"
02266 PREFETCH" (%%"REG_c") \n\t"
02267 PREFETCH" 32(%%"REG_c") \n\t"
02268 PREFETCH" 64(%%"REG_c") \n\t"
02269
02270 #if ARCH_X86_64
02271 #define CALL_MMX2_FILTER_CODE \
02272 "movl (%%"REG_b"), %%esi \n\t"\
02273 "call *%4 \n\t"\
02274 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
02275 "add %%"REG_S", %%"REG_c" \n\t"\
02276 "add %%"REG_a", %%"REG_D" \n\t"\
02277 "xor %%"REG_a", %%"REG_a" \n\t"\
02278
02279 #else
02280 #define CALL_MMX2_FILTER_CODE \
02281 "movl (%%"REG_b"), %%esi \n\t"\
02282 "call *%4 \n\t"\
02283 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
02284 "add %%"REG_a", %%"REG_D" \n\t"\
02285 "xor %%"REG_a", %%"REG_a" \n\t"\
02286
02287 #endif
02288
02289 CALL_MMX2_FILTER_CODE
02290 CALL_MMX2_FILTER_CODE
02291 CALL_MMX2_FILTER_CODE
02292 CALL_MMX2_FILTER_CODE
02293 CALL_MMX2_FILTER_CODE
02294 CALL_MMX2_FILTER_CODE
02295 CALL_MMX2_FILTER_CODE
02296 CALL_MMX2_FILTER_CODE
02297
02298 #if defined(PIC)
02299 "mov %5, %%"REG_b" \n\t"
02300 #if ARCH_X86_64
02301 "mov %6, %%"REG_a" \n\t"
02302 "mov %%"REG_a", -8(%%rsp) \n\t"
02303 #endif
02304 #else
02305 #if ARCH_X86_64
02306 "mov %5, %%"REG_a" \n\t"
02307 "mov %%"REG_a", -8(%%rsp) \n\t"
02308 #endif
02309 #endif
02310 :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
02311 "m" (mmx2FilterCode)
02312 #if defined(PIC)
02313 ,"m" (ebxsave)
02314 #endif
02315 #if ARCH_X86_64
02316 ,"m"(retsave)
02317 #endif
02318 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
02319 #if !defined(PIC)
02320 ,"%"REG_b
02321 #endif
02322 );
02323
02324 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
02325 dst[i] = src[srcW-1]*128;
02326 }
02327
02328 static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
02329 int dstWidth, const uint8_t *src1,
02330 const uint8_t *src2, int srcW, int xInc)
02331 {
02332 int16_t *filterPos = c->hChrFilterPos;
02333 int16_t *filter = c->hChrFilter;
02334 void *mmx2FilterCode= c->chrMmx2FilterCode;
02335 int i;
02336 #if defined(PIC)
02337 DECLARE_ALIGNED(8, uint64_t, ebxsave);
02338 #endif
02339 #if ARCH_X86_64
02340 DECLARE_ALIGNED(8, uint64_t, retsave);
02341 #endif
02342
02343 __asm__ volatile(
02344 #if defined(PIC)
02345 "mov %%"REG_b", %7 \n\t"
02346 #if ARCH_X86_64
02347 "mov -8(%%rsp), %%"REG_a" \n\t"
02348 "mov %%"REG_a", %8 \n\t"
02349 #endif
02350 #else
02351 #if ARCH_X86_64
02352 "mov -8(%%rsp), %%"REG_a" \n\t"
02353 "mov %%"REG_a", %7 \n\t"
02354 #endif
02355 #endif
02356 "pxor %%mm7, %%mm7 \n\t"
02357 "mov %0, %%"REG_c" \n\t"
02358 "mov %1, %%"REG_D" \n\t"
02359 "mov %2, %%"REG_d" \n\t"
02360 "mov %3, %%"REG_b" \n\t"
02361 "xor %%"REG_a", %%"REG_a" \n\t"
02362 PREFETCH" (%%"REG_c") \n\t"
02363 PREFETCH" 32(%%"REG_c") \n\t"
02364 PREFETCH" 64(%%"REG_c") \n\t"
02365
02366 CALL_MMX2_FILTER_CODE
02367 CALL_MMX2_FILTER_CODE
02368 CALL_MMX2_FILTER_CODE
02369 CALL_MMX2_FILTER_CODE
02370 "xor %%"REG_a", %%"REG_a" \n\t"
02371 "mov %5, %%"REG_c" \n\t"
02372 "mov %6, %%"REG_D" \n\t"
02373 PREFETCH" (%%"REG_c") \n\t"
02374 PREFETCH" 32(%%"REG_c") \n\t"
02375 PREFETCH" 64(%%"REG_c") \n\t"
02376
02377 CALL_MMX2_FILTER_CODE
02378 CALL_MMX2_FILTER_CODE
02379 CALL_MMX2_FILTER_CODE
02380 CALL_MMX2_FILTER_CODE
02381
02382 #if defined(PIC)
02383 "mov %7, %%"REG_b" \n\t"
02384 #if ARCH_X86_64
02385 "mov %8, %%"REG_a" \n\t"
02386 "mov %%"REG_a", -8(%%rsp) \n\t"
02387 #endif
02388 #else
02389 #if ARCH_X86_64
02390 "mov %7, %%"REG_a" \n\t"
02391 "mov %%"REG_a", -8(%%rsp) \n\t"
02392 #endif
02393 #endif
02394 :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
02395 "m" (mmx2FilterCode), "m" (src2), "m"(dst2)
02396 #if defined(PIC)
02397 ,"m" (ebxsave)
02398 #endif
02399 #if ARCH_X86_64
02400 ,"m"(retsave)
02401 #endif
02402 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
02403 #if !defined(PIC)
02404 ,"%"REG_b
02405 #endif
02406 );
02407
02408 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
02409 dst1[i] = src1[srcW-1]*128;
02410 dst2[i] = src2[srcW-1]*128;
02411 }
02412 }
02413 #endif
02414
02415 static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
02416 {
02417 enum PixelFormat srcFormat = c->srcFormat,
02418 dstFormat = c->dstFormat;
02419
02420 if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12
02421 && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
02422 if (c->flags & SWS_ACCURATE_RND) {
02423 c->yuv2yuv1 = RENAME(yuv2yuv1_ar );
02424 c->yuv2yuvX = RENAME(yuv2yuvX_ar );
02425 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
02426 switch (c->dstFormat) {
02427 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
02428 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
02429 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
02430 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
02431 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
02432 default: break;
02433 }
02434 }
02435 } else {
02436 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
02437 c->yuv2yuv1 = should_dither ? RENAME(yuv2yuv1_ar ) : RENAME(yuv2yuv1 );
02438 c->yuv2yuvX = RENAME(yuv2yuvX );
02439 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
02440 switch (c->dstFormat) {
02441 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
02442 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
02443 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
02444 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
02445 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
02446 default: break;
02447 }
02448 }
02449 }
02450 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
02451 switch (c->dstFormat) {
02452 case PIX_FMT_RGB32:
02453 c->yuv2packed1 = RENAME(yuv2rgb32_1);
02454 c->yuv2packed2 = RENAME(yuv2rgb32_2);
02455 break;
02456 case PIX_FMT_BGR24:
02457 c->yuv2packed1 = RENAME(yuv2bgr24_1);
02458 c->yuv2packed2 = RENAME(yuv2bgr24_2);
02459 break;
02460 case PIX_FMT_RGB555:
02461 c->yuv2packed1 = RENAME(yuv2rgb555_1);
02462 c->yuv2packed2 = RENAME(yuv2rgb555_2);
02463 break;
02464 case PIX_FMT_RGB565:
02465 c->yuv2packed1 = RENAME(yuv2rgb565_1);
02466 c->yuv2packed2 = RENAME(yuv2rgb565_2);
02467 break;
02468 case PIX_FMT_YUYV422:
02469 c->yuv2packed1 = RENAME(yuv2yuyv422_1);
02470 c->yuv2packed2 = RENAME(yuv2yuyv422_2);
02471 break;
02472 default:
02473 break;
02474 }
02475 }
02476 }
02477
02478 #if !COMPILE_TEMPLATE_MMX2
02479 c->hScale = RENAME(hScale );
02480 #endif
02481
02482
02483 #if COMPILE_TEMPLATE_MMX2
02484 if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
02485 {
02486 c->hyscale_fast = RENAME(hyscale_fast);
02487 c->hcscale_fast = RENAME(hcscale_fast);
02488 } else {
02489 #endif
02490 c->hyscale_fast = NULL;
02491 c->hcscale_fast = NULL;
02492 #if COMPILE_TEMPLATE_MMX2
02493 }
02494 #endif
02495
02496 #if !COMPILE_TEMPLATE_MMX2
02497 switch(srcFormat) {
02498 case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break;
02499 case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break;
02500 case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break;
02501 case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break;
02502 case PIX_FMT_GRAY16LE :
02503 case PIX_FMT_YUV420P9LE:
02504 case PIX_FMT_YUV422P10LE:
02505 case PIX_FMT_YUV420P10LE:
02506 case PIX_FMT_YUV420P16LE:
02507 case PIX_FMT_YUV422P16LE:
02508 case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break;
02509 }
02510 #endif
02511 if (!c->chrSrcHSubSample) {
02512 switch(srcFormat) {
02513 case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
02514 case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break;
02515 default: break;
02516 }
02517 }
02518
02519 switch (srcFormat) {
02520 #if !COMPILE_TEMPLATE_MMX2
02521 case PIX_FMT_YUYV422 :
02522 case PIX_FMT_Y400A :
02523 c->lumToYV12 = RENAME(yuy2ToY); break;
02524 case PIX_FMT_UYVY422 :
02525 c->lumToYV12 = RENAME(uyvyToY); break;
02526 #endif
02527 case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
02528 case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
02529 default: break;
02530 }
02531 #if !COMPILE_TEMPLATE_MMX2
02532 if (c->alpPixBuf) {
02533 switch (srcFormat) {
02534 case PIX_FMT_Y400A : c->alpToYV12 = RENAME(yuy2ToY); break;
02535 default: break;
02536 }
02537 }
02538 #endif
02539 if(isAnyRGB(c->srcFormat))
02540 c->hScale16= RENAME(hScale16);
02541 }