00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <stddef.h>
00028
00029 #undef PREFETCH
00030 #undef MOVNTQ
00031 #undef EMMS
00032 #undef SFENCE
00033 #undef PAVGB
00034
00035 #if COMPILE_TEMPLATE_AMD3DNOW
00036 #define PREFETCH "prefetch"
00037 #define PAVGB "pavgusb"
00038 #elif COMPILE_TEMPLATE_MMX2
00039 #define PREFETCH "prefetchnta"
00040 #define PAVGB "pavgb"
00041 #else
00042 #define PREFETCH " # nop"
00043 #endif
00044
00045 #if COMPILE_TEMPLATE_AMD3DNOW
00046
00047 #define EMMS "femms"
00048 #else
00049 #define EMMS "emms"
00050 #endif
00051
00052 #if COMPILE_TEMPLATE_MMX2
00053 #define MOVNTQ "movntq"
00054 #define SFENCE "sfence"
00055 #else
00056 #define MOVNTQ "movq"
00057 #define SFENCE " # nop"
00058 #endif
00059
00060 #if !COMPILE_TEMPLATE_SSE2
00061
00062 #if !COMPILE_TEMPLATE_AMD3DNOW
00063
00064 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
00065 {
00066 uint8_t *dest = dst;
00067 const uint8_t *s = src;
00068 const uint8_t *end;
00069 const uint8_t *mm_end;
00070 end = s + src_size;
00071 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00072 mm_end = end - 23;
00073 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
00074 while (s < mm_end) {
00075 __asm__ volatile(
00076 PREFETCH" 32%1 \n\t"
00077 "movd %1, %%mm0 \n\t"
00078 "punpckldq 3%1, %%mm0 \n\t"
00079 "movd 6%1, %%mm1 \n\t"
00080 "punpckldq 9%1, %%mm1 \n\t"
00081 "movd 12%1, %%mm2 \n\t"
00082 "punpckldq 15%1, %%mm2 \n\t"
00083 "movd 18%1, %%mm3 \n\t"
00084 "punpckldq 21%1, %%mm3 \n\t"
00085 "por %%mm7, %%mm0 \n\t"
00086 "por %%mm7, %%mm1 \n\t"
00087 "por %%mm7, %%mm2 \n\t"
00088 "por %%mm7, %%mm3 \n\t"
00089 MOVNTQ" %%mm0, %0 \n\t"
00090 MOVNTQ" %%mm1, 8%0 \n\t"
00091 MOVNTQ" %%mm2, 16%0 \n\t"
00092 MOVNTQ" %%mm3, 24%0"
00093 :"=m"(*dest)
00094 :"m"(*s)
00095 :"memory");
00096 dest += 32;
00097 s += 24;
00098 }
00099 __asm__ volatile(SFENCE:::"memory");
00100 __asm__ volatile(EMMS:::"memory");
00101 while (s < end) {
00102 *dest++ = *s++;
00103 *dest++ = *s++;
00104 *dest++ = *s++;
00105 *dest++ = 255;
00106 }
00107 }
00108
00109 #define STORE_BGR24_MMX \
00110 "psrlq $8, %%mm2 \n\t" \
00111 "psrlq $8, %%mm3 \n\t" \
00112 "psrlq $8, %%mm6 \n\t" \
00113 "psrlq $8, %%mm7 \n\t" \
00114 "pand "MANGLE(mask24l)", %%mm0\n\t" \
00115 "pand "MANGLE(mask24l)", %%mm1\n\t" \
00116 "pand "MANGLE(mask24l)", %%mm4\n\t" \
00117 "pand "MANGLE(mask24l)", %%mm5\n\t" \
00118 "pand "MANGLE(mask24h)", %%mm2\n\t" \
00119 "pand "MANGLE(mask24h)", %%mm3\n\t" \
00120 "pand "MANGLE(mask24h)", %%mm6\n\t" \
00121 "pand "MANGLE(mask24h)", %%mm7\n\t" \
00122 "por %%mm2, %%mm0 \n\t" \
00123 "por %%mm3, %%mm1 \n\t" \
00124 "por %%mm6, %%mm4 \n\t" \
00125 "por %%mm7, %%mm5 \n\t" \
00126 \
00127 "movq %%mm1, %%mm2 \n\t" \
00128 "movq %%mm4, %%mm3 \n\t" \
00129 "psllq $48, %%mm2 \n\t" \
00130 "psllq $32, %%mm3 \n\t" \
00131 "pand "MANGLE(mask24hh)", %%mm2\n\t" \
00132 "pand "MANGLE(mask24hhh)", %%mm3\n\t" \
00133 "por %%mm2, %%mm0 \n\t" \
00134 "psrlq $16, %%mm1 \n\t" \
00135 "psrlq $32, %%mm4 \n\t" \
00136 "psllq $16, %%mm5 \n\t" \
00137 "por %%mm3, %%mm1 \n\t" \
00138 "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
00139 "por %%mm5, %%mm4 \n\t" \
00140 \
00141 MOVNTQ" %%mm0, %0 \n\t" \
00142 MOVNTQ" %%mm1, 8%0 \n\t" \
00143 MOVNTQ" %%mm4, 16%0"
00144
00145
00146 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
00147 {
00148 uint8_t *dest = dst;
00149 const uint8_t *s = src;
00150 const uint8_t *end;
00151 const uint8_t *mm_end;
00152 end = s + src_size;
00153 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00154 mm_end = end - 31;
00155 while (s < mm_end) {
00156 __asm__ volatile(
00157 PREFETCH" 32%1 \n\t"
00158 "movq %1, %%mm0 \n\t"
00159 "movq 8%1, %%mm1 \n\t"
00160 "movq 16%1, %%mm4 \n\t"
00161 "movq 24%1, %%mm5 \n\t"
00162 "movq %%mm0, %%mm2 \n\t"
00163 "movq %%mm1, %%mm3 \n\t"
00164 "movq %%mm4, %%mm6 \n\t"
00165 "movq %%mm5, %%mm7 \n\t"
00166 STORE_BGR24_MMX
00167 :"=m"(*dest)
00168 :"m"(*s)
00169 :"memory");
00170 dest += 24;
00171 s += 32;
00172 }
00173 __asm__ volatile(SFENCE:::"memory");
00174 __asm__ volatile(EMMS:::"memory");
00175 while (s < end) {
00176 *dest++ = *s++;
00177 *dest++ = *s++;
00178 *dest++ = *s++;
00179 s++;
00180 }
00181 }
00182
00183
00184
00185
00186
00187
00188
00189 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
00190 {
00191 register const uint8_t* s=src;
00192 register uint8_t* d=dst;
00193 register const uint8_t *end;
00194 const uint8_t *mm_end;
00195 end = s + src_size;
00196 __asm__ volatile(PREFETCH" %0"::"m"(*s));
00197 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s));
00198 mm_end = end - 15;
00199 while (s<mm_end) {
00200 __asm__ volatile(
00201 PREFETCH" 32%1 \n\t"
00202 "movq %1, %%mm0 \n\t"
00203 "movq 8%1, %%mm2 \n\t"
00204 "movq %%mm0, %%mm1 \n\t"
00205 "movq %%mm2, %%mm3 \n\t"
00206 "pand %%mm4, %%mm0 \n\t"
00207 "pand %%mm4, %%mm2 \n\t"
00208 "paddw %%mm1, %%mm0 \n\t"
00209 "paddw %%mm3, %%mm2 \n\t"
00210 MOVNTQ" %%mm0, %0 \n\t"
00211 MOVNTQ" %%mm2, 8%0"
00212 :"=m"(*d)
00213 :"m"(*s)
00214 );
00215 d+=16;
00216 s+=16;
00217 }
00218 __asm__ volatile(SFENCE:::"memory");
00219 __asm__ volatile(EMMS:::"memory");
00220 mm_end = end - 3;
00221 while (s < mm_end) {
00222 register unsigned x= *((const uint32_t *)s);
00223 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
00224 d+=4;
00225 s+=4;
00226 }
00227 if (s < end) {
00228 register unsigned short x= *((const uint16_t *)s);
00229 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
00230 }
00231 }
00232
00233 static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size)
00234 {
00235 register const uint8_t* s=src;
00236 register uint8_t* d=dst;
00237 register const uint8_t *end;
00238 const uint8_t *mm_end;
00239 end = s + src_size;
00240 __asm__ volatile(PREFETCH" %0"::"m"(*s));
00241 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg));
00242 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b));
00243 mm_end = end - 15;
00244 while (s<mm_end) {
00245 __asm__ volatile(
00246 PREFETCH" 32%1 \n\t"
00247 "movq %1, %%mm0 \n\t"
00248 "movq 8%1, %%mm2 \n\t"
00249 "movq %%mm0, %%mm1 \n\t"
00250 "movq %%mm2, %%mm3 \n\t"
00251 "psrlq $1, %%mm0 \n\t"
00252 "psrlq $1, %%mm2 \n\t"
00253 "pand %%mm7, %%mm0 \n\t"
00254 "pand %%mm7, %%mm2 \n\t"
00255 "pand %%mm6, %%mm1 \n\t"
00256 "pand %%mm6, %%mm3 \n\t"
00257 "por %%mm1, %%mm0 \n\t"
00258 "por %%mm3, %%mm2 \n\t"
00259 MOVNTQ" %%mm0, %0 \n\t"
00260 MOVNTQ" %%mm2, 8%0"
00261 :"=m"(*d)
00262 :"m"(*s)
00263 );
00264 d+=16;
00265 s+=16;
00266 }
00267 __asm__ volatile(SFENCE:::"memory");
00268 __asm__ volatile(EMMS:::"memory");
00269 mm_end = end - 3;
00270 while (s < mm_end) {
00271 register uint32_t x= *((const uint32_t*)s);
00272 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
00273 s+=4;
00274 d+=4;
00275 }
00276 if (s < end) {
00277 register uint16_t x= *((const uint16_t*)s);
00278 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
00279 }
00280 }
00281
00282 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size)
00283 {
00284 const uint8_t *s = src;
00285 const uint8_t *end;
00286 const uint8_t *mm_end;
00287 uint16_t *d = (uint16_t *)dst;
00288 end = s + src_size;
00289 mm_end = end - 15;
00290 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00291 __asm__ volatile(
00292 "movq %3, %%mm5 \n\t"
00293 "movq %4, %%mm6 \n\t"
00294 "movq %5, %%mm7 \n\t"
00295 "jmp 2f \n\t"
00296 ".p2align 4 \n\t"
00297 "1: \n\t"
00298 PREFETCH" 32(%1) \n\t"
00299 "movd (%1), %%mm0 \n\t"
00300 "movd 4(%1), %%mm3 \n\t"
00301 "punpckldq 8(%1), %%mm0 \n\t"
00302 "punpckldq 12(%1), %%mm3 \n\t"
00303 "movq %%mm0, %%mm1 \n\t"
00304 "movq %%mm3, %%mm4 \n\t"
00305 "pand %%mm6, %%mm0 \n\t"
00306 "pand %%mm6, %%mm3 \n\t"
00307 "pmaddwd %%mm7, %%mm0 \n\t"
00308 "pmaddwd %%mm7, %%mm3 \n\t"
00309 "pand %%mm5, %%mm1 \n\t"
00310 "pand %%mm5, %%mm4 \n\t"
00311 "por %%mm1, %%mm0 \n\t"
00312 "por %%mm4, %%mm3 \n\t"
00313 "psrld $5, %%mm0 \n\t"
00314 "pslld $11, %%mm3 \n\t"
00315 "por %%mm3, %%mm0 \n\t"
00316 MOVNTQ" %%mm0, (%0) \n\t"
00317 "add $16, %1 \n\t"
00318 "add $8, %0 \n\t"
00319 "2: \n\t"
00320 "cmp %2, %1 \n\t"
00321 " jb 1b \n\t"
00322 : "+r" (d), "+r"(s)
00323 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
00324 );
00325 #else
00326 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00327 __asm__ volatile(
00328 "movq %0, %%mm7 \n\t"
00329 "movq %1, %%mm6 \n\t"
00330 ::"m"(red_16mask),"m"(green_16mask));
00331 while (s < mm_end) {
00332 __asm__ volatile(
00333 PREFETCH" 32%1 \n\t"
00334 "movd %1, %%mm0 \n\t"
00335 "movd 4%1, %%mm3 \n\t"
00336 "punpckldq 8%1, %%mm0 \n\t"
00337 "punpckldq 12%1, %%mm3 \n\t"
00338 "movq %%mm0, %%mm1 \n\t"
00339 "movq %%mm0, %%mm2 \n\t"
00340 "movq %%mm3, %%mm4 \n\t"
00341 "movq %%mm3, %%mm5 \n\t"
00342 "psrlq $3, %%mm0 \n\t"
00343 "psrlq $3, %%mm3 \n\t"
00344 "pand %2, %%mm0 \n\t"
00345 "pand %2, %%mm3 \n\t"
00346 "psrlq $5, %%mm1 \n\t"
00347 "psrlq $5, %%mm4 \n\t"
00348 "pand %%mm6, %%mm1 \n\t"
00349 "pand %%mm6, %%mm4 \n\t"
00350 "psrlq $8, %%mm2 \n\t"
00351 "psrlq $8, %%mm5 \n\t"
00352 "pand %%mm7, %%mm2 \n\t"
00353 "pand %%mm7, %%mm5 \n\t"
00354 "por %%mm1, %%mm0 \n\t"
00355 "por %%mm4, %%mm3 \n\t"
00356 "por %%mm2, %%mm0 \n\t"
00357 "por %%mm5, %%mm3 \n\t"
00358 "psllq $16, %%mm3 \n\t"
00359 "por %%mm3, %%mm0 \n\t"
00360 MOVNTQ" %%mm0, %0 \n\t"
00361 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00362 d += 4;
00363 s += 16;
00364 }
00365 #endif
00366 __asm__ volatile(SFENCE:::"memory");
00367 __asm__ volatile(EMMS:::"memory");
00368 while (s < end) {
00369 register int rgb = *(const uint32_t*)s; s += 4;
00370 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
00371 }
00372 }
00373
00374 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
00375 {
00376 const uint8_t *s = src;
00377 const uint8_t *end;
00378 const uint8_t *mm_end;
00379 uint16_t *d = (uint16_t *)dst;
00380 end = s + src_size;
00381 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00382 __asm__ volatile(
00383 "movq %0, %%mm7 \n\t"
00384 "movq %1, %%mm6 \n\t"
00385 ::"m"(red_16mask),"m"(green_16mask));
00386 mm_end = end - 15;
00387 while (s < mm_end) {
00388 __asm__ volatile(
00389 PREFETCH" 32%1 \n\t"
00390 "movd %1, %%mm0 \n\t"
00391 "movd 4%1, %%mm3 \n\t"
00392 "punpckldq 8%1, %%mm0 \n\t"
00393 "punpckldq 12%1, %%mm3 \n\t"
00394 "movq %%mm0, %%mm1 \n\t"
00395 "movq %%mm0, %%mm2 \n\t"
00396 "movq %%mm3, %%mm4 \n\t"
00397 "movq %%mm3, %%mm5 \n\t"
00398 "psllq $8, %%mm0 \n\t"
00399 "psllq $8, %%mm3 \n\t"
00400 "pand %%mm7, %%mm0 \n\t"
00401 "pand %%mm7, %%mm3 \n\t"
00402 "psrlq $5, %%mm1 \n\t"
00403 "psrlq $5, %%mm4 \n\t"
00404 "pand %%mm6, %%mm1 \n\t"
00405 "pand %%mm6, %%mm4 \n\t"
00406 "psrlq $19, %%mm2 \n\t"
00407 "psrlq $19, %%mm5 \n\t"
00408 "pand %2, %%mm2 \n\t"
00409 "pand %2, %%mm5 \n\t"
00410 "por %%mm1, %%mm0 \n\t"
00411 "por %%mm4, %%mm3 \n\t"
00412 "por %%mm2, %%mm0 \n\t"
00413 "por %%mm5, %%mm3 \n\t"
00414 "psllq $16, %%mm3 \n\t"
00415 "por %%mm3, %%mm0 \n\t"
00416 MOVNTQ" %%mm0, %0 \n\t"
00417 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00418 d += 4;
00419 s += 16;
00420 }
00421 __asm__ volatile(SFENCE:::"memory");
00422 __asm__ volatile(EMMS:::"memory");
00423 while (s < end) {
00424 register int rgb = *(const uint32_t*)s; s += 4;
00425 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
00426 }
00427 }
00428
00429 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size)
00430 {
00431 const uint8_t *s = src;
00432 const uint8_t *end;
00433 const uint8_t *mm_end;
00434 uint16_t *d = (uint16_t *)dst;
00435 end = s + src_size;
00436 mm_end = end - 15;
00437 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00438 __asm__ volatile(
00439 "movq %3, %%mm5 \n\t"
00440 "movq %4, %%mm6 \n\t"
00441 "movq %5, %%mm7 \n\t"
00442 "jmp 2f \n\t"
00443 ".p2align 4 \n\t"
00444 "1: \n\t"
00445 PREFETCH" 32(%1) \n\t"
00446 "movd (%1), %%mm0 \n\t"
00447 "movd 4(%1), %%mm3 \n\t"
00448 "punpckldq 8(%1), %%mm0 \n\t"
00449 "punpckldq 12(%1), %%mm3 \n\t"
00450 "movq %%mm0, %%mm1 \n\t"
00451 "movq %%mm3, %%mm4 \n\t"
00452 "pand %%mm6, %%mm0 \n\t"
00453 "pand %%mm6, %%mm3 \n\t"
00454 "pmaddwd %%mm7, %%mm0 \n\t"
00455 "pmaddwd %%mm7, %%mm3 \n\t"
00456 "pand %%mm5, %%mm1 \n\t"
00457 "pand %%mm5, %%mm4 \n\t"
00458 "por %%mm1, %%mm0 \n\t"
00459 "por %%mm4, %%mm3 \n\t"
00460 "psrld $6, %%mm0 \n\t"
00461 "pslld $10, %%mm3 \n\t"
00462 "por %%mm3, %%mm0 \n\t"
00463 MOVNTQ" %%mm0, (%0) \n\t"
00464 "add $16, %1 \n\t"
00465 "add $8, %0 \n\t"
00466 "2: \n\t"
00467 "cmp %2, %1 \n\t"
00468 " jb 1b \n\t"
00469 : "+r" (d), "+r"(s)
00470 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
00471 );
00472 #else
00473 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00474 __asm__ volatile(
00475 "movq %0, %%mm7 \n\t"
00476 "movq %1, %%mm6 \n\t"
00477 ::"m"(red_15mask),"m"(green_15mask));
00478 while (s < mm_end) {
00479 __asm__ volatile(
00480 PREFETCH" 32%1 \n\t"
00481 "movd %1, %%mm0 \n\t"
00482 "movd 4%1, %%mm3 \n\t"
00483 "punpckldq 8%1, %%mm0 \n\t"
00484 "punpckldq 12%1, %%mm3 \n\t"
00485 "movq %%mm0, %%mm1 \n\t"
00486 "movq %%mm0, %%mm2 \n\t"
00487 "movq %%mm3, %%mm4 \n\t"
00488 "movq %%mm3, %%mm5 \n\t"
00489 "psrlq $3, %%mm0 \n\t"
00490 "psrlq $3, %%mm3 \n\t"
00491 "pand %2, %%mm0 \n\t"
00492 "pand %2, %%mm3 \n\t"
00493 "psrlq $6, %%mm1 \n\t"
00494 "psrlq $6, %%mm4 \n\t"
00495 "pand %%mm6, %%mm1 \n\t"
00496 "pand %%mm6, %%mm4 \n\t"
00497 "psrlq $9, %%mm2 \n\t"
00498 "psrlq $9, %%mm5 \n\t"
00499 "pand %%mm7, %%mm2 \n\t"
00500 "pand %%mm7, %%mm5 \n\t"
00501 "por %%mm1, %%mm0 \n\t"
00502 "por %%mm4, %%mm3 \n\t"
00503 "por %%mm2, %%mm0 \n\t"
00504 "por %%mm5, %%mm3 \n\t"
00505 "psllq $16, %%mm3 \n\t"
00506 "por %%mm3, %%mm0 \n\t"
00507 MOVNTQ" %%mm0, %0 \n\t"
00508 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00509 d += 4;
00510 s += 16;
00511 }
00512 #endif
00513 __asm__ volatile(SFENCE:::"memory");
00514 __asm__ volatile(EMMS:::"memory");
00515 while (s < end) {
00516 register int rgb = *(const uint32_t*)s; s += 4;
00517 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
00518 }
00519 }
00520
00521 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
00522 {
00523 const uint8_t *s = src;
00524 const uint8_t *end;
00525 const uint8_t *mm_end;
00526 uint16_t *d = (uint16_t *)dst;
00527 end = s + src_size;
00528 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00529 __asm__ volatile(
00530 "movq %0, %%mm7 \n\t"
00531 "movq %1, %%mm6 \n\t"
00532 ::"m"(red_15mask),"m"(green_15mask));
00533 mm_end = end - 15;
00534 while (s < mm_end) {
00535 __asm__ volatile(
00536 PREFETCH" 32%1 \n\t"
00537 "movd %1, %%mm0 \n\t"
00538 "movd 4%1, %%mm3 \n\t"
00539 "punpckldq 8%1, %%mm0 \n\t"
00540 "punpckldq 12%1, %%mm3 \n\t"
00541 "movq %%mm0, %%mm1 \n\t"
00542 "movq %%mm0, %%mm2 \n\t"
00543 "movq %%mm3, %%mm4 \n\t"
00544 "movq %%mm3, %%mm5 \n\t"
00545 "psllq $7, %%mm0 \n\t"
00546 "psllq $7, %%mm3 \n\t"
00547 "pand %%mm7, %%mm0 \n\t"
00548 "pand %%mm7, %%mm3 \n\t"
00549 "psrlq $6, %%mm1 \n\t"
00550 "psrlq $6, %%mm4 \n\t"
00551 "pand %%mm6, %%mm1 \n\t"
00552 "pand %%mm6, %%mm4 \n\t"
00553 "psrlq $19, %%mm2 \n\t"
00554 "psrlq $19, %%mm5 \n\t"
00555 "pand %2, %%mm2 \n\t"
00556 "pand %2, %%mm5 \n\t"
00557 "por %%mm1, %%mm0 \n\t"
00558 "por %%mm4, %%mm3 \n\t"
00559 "por %%mm2, %%mm0 \n\t"
00560 "por %%mm5, %%mm3 \n\t"
00561 "psllq $16, %%mm3 \n\t"
00562 "por %%mm3, %%mm0 \n\t"
00563 MOVNTQ" %%mm0, %0 \n\t"
00564 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00565 d += 4;
00566 s += 16;
00567 }
00568 __asm__ volatile(SFENCE:::"memory");
00569 __asm__ volatile(EMMS:::"memory");
00570 while (s < end) {
00571 register int rgb = *(const uint32_t*)s; s += 4;
00572 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
00573 }
00574 }
00575
00576 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
00577 {
00578 const uint8_t *s = src;
00579 const uint8_t *end;
00580 const uint8_t *mm_end;
00581 uint16_t *d = (uint16_t *)dst;
00582 end = s + src_size;
00583 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00584 __asm__ volatile(
00585 "movq %0, %%mm7 \n\t"
00586 "movq %1, %%mm6 \n\t"
00587 ::"m"(red_16mask),"m"(green_16mask));
00588 mm_end = end - 11;
00589 while (s < mm_end) {
00590 __asm__ volatile(
00591 PREFETCH" 32%1 \n\t"
00592 "movd %1, %%mm0 \n\t"
00593 "movd 3%1, %%mm3 \n\t"
00594 "punpckldq 6%1, %%mm0 \n\t"
00595 "punpckldq 9%1, %%mm3 \n\t"
00596 "movq %%mm0, %%mm1 \n\t"
00597 "movq %%mm0, %%mm2 \n\t"
00598 "movq %%mm3, %%mm4 \n\t"
00599 "movq %%mm3, %%mm5 \n\t"
00600 "psrlq $3, %%mm0 \n\t"
00601 "psrlq $3, %%mm3 \n\t"
00602 "pand %2, %%mm0 \n\t"
00603 "pand %2, %%mm3 \n\t"
00604 "psrlq $5, %%mm1 \n\t"
00605 "psrlq $5, %%mm4 \n\t"
00606 "pand %%mm6, %%mm1 \n\t"
00607 "pand %%mm6, %%mm4 \n\t"
00608 "psrlq $8, %%mm2 \n\t"
00609 "psrlq $8, %%mm5 \n\t"
00610 "pand %%mm7, %%mm2 \n\t"
00611 "pand %%mm7, %%mm5 \n\t"
00612 "por %%mm1, %%mm0 \n\t"
00613 "por %%mm4, %%mm3 \n\t"
00614 "por %%mm2, %%mm0 \n\t"
00615 "por %%mm5, %%mm3 \n\t"
00616 "psllq $16, %%mm3 \n\t"
00617 "por %%mm3, %%mm0 \n\t"
00618 MOVNTQ" %%mm0, %0 \n\t"
00619 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00620 d += 4;
00621 s += 12;
00622 }
00623 __asm__ volatile(SFENCE:::"memory");
00624 __asm__ volatile(EMMS:::"memory");
00625 while (s < end) {
00626 const int b = *s++;
00627 const int g = *s++;
00628 const int r = *s++;
00629 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00630 }
00631 }
00632
00633 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size)
00634 {
00635 const uint8_t *s = src;
00636 const uint8_t *end;
00637 const uint8_t *mm_end;
00638 uint16_t *d = (uint16_t *)dst;
00639 end = s + src_size;
00640 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00641 __asm__ volatile(
00642 "movq %0, %%mm7 \n\t"
00643 "movq %1, %%mm6 \n\t"
00644 ::"m"(red_16mask),"m"(green_16mask));
00645 mm_end = end - 15;
00646 while (s < mm_end) {
00647 __asm__ volatile(
00648 PREFETCH" 32%1 \n\t"
00649 "movd %1, %%mm0 \n\t"
00650 "movd 3%1, %%mm3 \n\t"
00651 "punpckldq 6%1, %%mm0 \n\t"
00652 "punpckldq 9%1, %%mm3 \n\t"
00653 "movq %%mm0, %%mm1 \n\t"
00654 "movq %%mm0, %%mm2 \n\t"
00655 "movq %%mm3, %%mm4 \n\t"
00656 "movq %%mm3, %%mm5 \n\t"
00657 "psllq $8, %%mm0 \n\t"
00658 "psllq $8, %%mm3 \n\t"
00659 "pand %%mm7, %%mm0 \n\t"
00660 "pand %%mm7, %%mm3 \n\t"
00661 "psrlq $5, %%mm1 \n\t"
00662 "psrlq $5, %%mm4 \n\t"
00663 "pand %%mm6, %%mm1 \n\t"
00664 "pand %%mm6, %%mm4 \n\t"
00665 "psrlq $19, %%mm2 \n\t"
00666 "psrlq $19, %%mm5 \n\t"
00667 "pand %2, %%mm2 \n\t"
00668 "pand %2, %%mm5 \n\t"
00669 "por %%mm1, %%mm0 \n\t"
00670 "por %%mm4, %%mm3 \n\t"
00671 "por %%mm2, %%mm0 \n\t"
00672 "por %%mm5, %%mm3 \n\t"
00673 "psllq $16, %%mm3 \n\t"
00674 "por %%mm3, %%mm0 \n\t"
00675 MOVNTQ" %%mm0, %0 \n\t"
00676 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00677 d += 4;
00678 s += 12;
00679 }
00680 __asm__ volatile(SFENCE:::"memory");
00681 __asm__ volatile(EMMS:::"memory");
00682 while (s < end) {
00683 const int r = *s++;
00684 const int g = *s++;
00685 const int b = *s++;
00686 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00687 }
00688 }
00689
00690 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
00691 {
00692 const uint8_t *s = src;
00693 const uint8_t *end;
00694 const uint8_t *mm_end;
00695 uint16_t *d = (uint16_t *)dst;
00696 end = s + src_size;
00697 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00698 __asm__ volatile(
00699 "movq %0, %%mm7 \n\t"
00700 "movq %1, %%mm6 \n\t"
00701 ::"m"(red_15mask),"m"(green_15mask));
00702 mm_end = end - 11;
00703 while (s < mm_end) {
00704 __asm__ volatile(
00705 PREFETCH" 32%1 \n\t"
00706 "movd %1, %%mm0 \n\t"
00707 "movd 3%1, %%mm3 \n\t"
00708 "punpckldq 6%1, %%mm0 \n\t"
00709 "punpckldq 9%1, %%mm3 \n\t"
00710 "movq %%mm0, %%mm1 \n\t"
00711 "movq %%mm0, %%mm2 \n\t"
00712 "movq %%mm3, %%mm4 \n\t"
00713 "movq %%mm3, %%mm5 \n\t"
00714 "psrlq $3, %%mm0 \n\t"
00715 "psrlq $3, %%mm3 \n\t"
00716 "pand %2, %%mm0 \n\t"
00717 "pand %2, %%mm3 \n\t"
00718 "psrlq $6, %%mm1 \n\t"
00719 "psrlq $6, %%mm4 \n\t"
00720 "pand %%mm6, %%mm1 \n\t"
00721 "pand %%mm6, %%mm4 \n\t"
00722 "psrlq $9, %%mm2 \n\t"
00723 "psrlq $9, %%mm5 \n\t"
00724 "pand %%mm7, %%mm2 \n\t"
00725 "pand %%mm7, %%mm5 \n\t"
00726 "por %%mm1, %%mm0 \n\t"
00727 "por %%mm4, %%mm3 \n\t"
00728 "por %%mm2, %%mm0 \n\t"
00729 "por %%mm5, %%mm3 \n\t"
00730 "psllq $16, %%mm3 \n\t"
00731 "por %%mm3, %%mm0 \n\t"
00732 MOVNTQ" %%mm0, %0 \n\t"
00733 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00734 d += 4;
00735 s += 12;
00736 }
00737 __asm__ volatile(SFENCE:::"memory");
00738 __asm__ volatile(EMMS:::"memory");
00739 while (s < end) {
00740 const int b = *s++;
00741 const int g = *s++;
00742 const int r = *s++;
00743 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00744 }
00745 }
00746
00747 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size)
00748 {
00749 const uint8_t *s = src;
00750 const uint8_t *end;
00751 const uint8_t *mm_end;
00752 uint16_t *d = (uint16_t *)dst;
00753 end = s + src_size;
00754 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00755 __asm__ volatile(
00756 "movq %0, %%mm7 \n\t"
00757 "movq %1, %%mm6 \n\t"
00758 ::"m"(red_15mask),"m"(green_15mask));
00759 mm_end = end - 15;
00760 while (s < mm_end) {
00761 __asm__ volatile(
00762 PREFETCH" 32%1 \n\t"
00763 "movd %1, %%mm0 \n\t"
00764 "movd 3%1, %%mm3 \n\t"
00765 "punpckldq 6%1, %%mm0 \n\t"
00766 "punpckldq 9%1, %%mm3 \n\t"
00767 "movq %%mm0, %%mm1 \n\t"
00768 "movq %%mm0, %%mm2 \n\t"
00769 "movq %%mm3, %%mm4 \n\t"
00770 "movq %%mm3, %%mm5 \n\t"
00771 "psllq $7, %%mm0 \n\t"
00772 "psllq $7, %%mm3 \n\t"
00773 "pand %%mm7, %%mm0 \n\t"
00774 "pand %%mm7, %%mm3 \n\t"
00775 "psrlq $6, %%mm1 \n\t"
00776 "psrlq $6, %%mm4 \n\t"
00777 "pand %%mm6, %%mm1 \n\t"
00778 "pand %%mm6, %%mm4 \n\t"
00779 "psrlq $19, %%mm2 \n\t"
00780 "psrlq $19, %%mm5 \n\t"
00781 "pand %2, %%mm2 \n\t"
00782 "pand %2, %%mm5 \n\t"
00783 "por %%mm1, %%mm0 \n\t"
00784 "por %%mm4, %%mm3 \n\t"
00785 "por %%mm2, %%mm0 \n\t"
00786 "por %%mm5, %%mm3 \n\t"
00787 "psllq $16, %%mm3 \n\t"
00788 "por %%mm3, %%mm0 \n\t"
00789 MOVNTQ" %%mm0, %0 \n\t"
00790 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00791 d += 4;
00792 s += 12;
00793 }
00794 __asm__ volatile(SFENCE:::"memory");
00795 __asm__ volatile(EMMS:::"memory");
00796 while (s < end) {
00797 const int r = *s++;
00798 const int g = *s++;
00799 const int b = *s++;
00800 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00801 }
00802 }
00803
00804 static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
00805 {
00806 const uint16_t *end;
00807 const uint16_t *mm_end;
00808 uint8_t *d = dst;
00809 const uint16_t *s = (const uint16_t*)src;
00810 end = s + src_size/2;
00811 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00812 mm_end = end - 7;
00813 while (s < mm_end) {
00814 __asm__ volatile(
00815 PREFETCH" 32%1 \n\t"
00816 "movq %1, %%mm0 \n\t"
00817 "movq %1, %%mm1 \n\t"
00818 "movq %1, %%mm2 \n\t"
00819 "pand %2, %%mm0 \n\t"
00820 "pand %3, %%mm1 \n\t"
00821 "pand %4, %%mm2 \n\t"
00822 "psllq $5, %%mm0 \n\t"
00823 "pmulhw %6, %%mm0 \n\t"
00824 "pmulhw %6, %%mm1 \n\t"
00825 "pmulhw %7, %%mm2 \n\t"
00826 "movq %%mm0, %%mm3 \n\t"
00827 "movq %%mm1, %%mm4 \n\t"
00828 "movq %%mm2, %%mm5 \n\t"
00829 "punpcklwd %5, %%mm0 \n\t"
00830 "punpcklwd %5, %%mm1 \n\t"
00831 "punpcklwd %5, %%mm2 \n\t"
00832 "punpckhwd %5, %%mm3 \n\t"
00833 "punpckhwd %5, %%mm4 \n\t"
00834 "punpckhwd %5, %%mm5 \n\t"
00835 "psllq $8, %%mm1 \n\t"
00836 "psllq $16, %%mm2 \n\t"
00837 "por %%mm1, %%mm0 \n\t"
00838 "por %%mm2, %%mm0 \n\t"
00839 "psllq $8, %%mm4 \n\t"
00840 "psllq $16, %%mm5 \n\t"
00841 "por %%mm4, %%mm3 \n\t"
00842 "por %%mm5, %%mm3 \n\t"
00843
00844 "movq %%mm0, %%mm6 \n\t"
00845 "movq %%mm3, %%mm7 \n\t"
00846
00847 "movq 8%1, %%mm0 \n\t"
00848 "movq 8%1, %%mm1 \n\t"
00849 "movq 8%1, %%mm2 \n\t"
00850 "pand %2, %%mm0 \n\t"
00851 "pand %3, %%mm1 \n\t"
00852 "pand %4, %%mm2 \n\t"
00853 "psllq $5, %%mm0 \n\t"
00854 "pmulhw %6, %%mm0 \n\t"
00855 "pmulhw %6, %%mm1 \n\t"
00856 "pmulhw %7, %%mm2 \n\t"
00857 "movq %%mm0, %%mm3 \n\t"
00858 "movq %%mm1, %%mm4 \n\t"
00859 "movq %%mm2, %%mm5 \n\t"
00860 "punpcklwd %5, %%mm0 \n\t"
00861 "punpcklwd %5, %%mm1 \n\t"
00862 "punpcklwd %5, %%mm2 \n\t"
00863 "punpckhwd %5, %%mm3 \n\t"
00864 "punpckhwd %5, %%mm4 \n\t"
00865 "punpckhwd %5, %%mm5 \n\t"
00866 "psllq $8, %%mm1 \n\t"
00867 "psllq $16, %%mm2 \n\t"
00868 "por %%mm1, %%mm0 \n\t"
00869 "por %%mm2, %%mm0 \n\t"
00870 "psllq $8, %%mm4 \n\t"
00871 "psllq $16, %%mm5 \n\t"
00872 "por %%mm4, %%mm3 \n\t"
00873 "por %%mm5, %%mm3 \n\t"
00874
00875 :"=m"(*d)
00876 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null),"m"(mul15_mid),"m"(mul15_hi)
00877 :"memory");
00878
00879 __asm__ volatile(
00880 "movq %%mm0, %%mm4 \n\t"
00881 "movq %%mm3, %%mm5 \n\t"
00882 "movq %%mm6, %%mm0 \n\t"
00883 "movq %%mm7, %%mm1 \n\t"
00884
00885 "movq %%mm4, %%mm6 \n\t"
00886 "movq %%mm5, %%mm7 \n\t"
00887 "movq %%mm0, %%mm2 \n\t"
00888 "movq %%mm1, %%mm3 \n\t"
00889
00890 STORE_BGR24_MMX
00891
00892 :"=m"(*d)
00893 :"m"(*s)
00894 :"memory");
00895 d += 24;
00896 s += 8;
00897 }
00898 __asm__ volatile(SFENCE:::"memory");
00899 __asm__ volatile(EMMS:::"memory");
00900 while (s < end) {
00901 register uint16_t bgr;
00902 bgr = *s++;
00903 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
00904 *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
00905 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
00906 }
00907 }
00908
00909 static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
00910 {
00911 const uint16_t *end;
00912 const uint16_t *mm_end;
00913 uint8_t *d = (uint8_t *)dst;
00914 const uint16_t *s = (const uint16_t *)src;
00915 end = s + src_size/2;
00916 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00917 mm_end = end - 7;
00918 while (s < mm_end) {
00919 __asm__ volatile(
00920 PREFETCH" 32%1 \n\t"
00921 "movq %1, %%mm0 \n\t"
00922 "movq %1, %%mm1 \n\t"
00923 "movq %1, %%mm2 \n\t"
00924 "pand %2, %%mm0 \n\t"
00925 "pand %3, %%mm1 \n\t"
00926 "pand %4, %%mm2 \n\t"
00927 "psllq $5, %%mm0 \n\t"
00928 "psrlq $1, %%mm2 \n\t"
00929 "pmulhw %6, %%mm0 \n\t"
00930 "pmulhw %8, %%mm1 \n\t"
00931 "pmulhw %7, %%mm2 \n\t"
00932 "movq %%mm0, %%mm3 \n\t"
00933 "movq %%mm1, %%mm4 \n\t"
00934 "movq %%mm2, %%mm5 \n\t"
00935 "punpcklwd %5, %%mm0 \n\t"
00936 "punpcklwd %5, %%mm1 \n\t"
00937 "punpcklwd %5, %%mm2 \n\t"
00938 "punpckhwd %5, %%mm3 \n\t"
00939 "punpckhwd %5, %%mm4 \n\t"
00940 "punpckhwd %5, %%mm5 \n\t"
00941 "psllq $8, %%mm1 \n\t"
00942 "psllq $16, %%mm2 \n\t"
00943 "por %%mm1, %%mm0 \n\t"
00944 "por %%mm2, %%mm0 \n\t"
00945 "psllq $8, %%mm4 \n\t"
00946 "psllq $16, %%mm5 \n\t"
00947 "por %%mm4, %%mm3 \n\t"
00948 "por %%mm5, %%mm3 \n\t"
00949
00950 "movq %%mm0, %%mm6 \n\t"
00951 "movq %%mm3, %%mm7 \n\t"
00952
00953 "movq 8%1, %%mm0 \n\t"
00954 "movq 8%1, %%mm1 \n\t"
00955 "movq 8%1, %%mm2 \n\t"
00956 "pand %2, %%mm0 \n\t"
00957 "pand %3, %%mm1 \n\t"
00958 "pand %4, %%mm2 \n\t"
00959 "psllq $5, %%mm0 \n\t"
00960 "psrlq $1, %%mm2 \n\t"
00961 "pmulhw %6, %%mm0 \n\t"
00962 "pmulhw %8, %%mm1 \n\t"
00963 "pmulhw %7, %%mm2 \n\t"
00964 "movq %%mm0, %%mm3 \n\t"
00965 "movq %%mm1, %%mm4 \n\t"
00966 "movq %%mm2, %%mm5 \n\t"
00967 "punpcklwd %5, %%mm0 \n\t"
00968 "punpcklwd %5, %%mm1 \n\t"
00969 "punpcklwd %5, %%mm2 \n\t"
00970 "punpckhwd %5, %%mm3 \n\t"
00971 "punpckhwd %5, %%mm4 \n\t"
00972 "punpckhwd %5, %%mm5 \n\t"
00973 "psllq $8, %%mm1 \n\t"
00974 "psllq $16, %%mm2 \n\t"
00975 "por %%mm1, %%mm0 \n\t"
00976 "por %%mm2, %%mm0 \n\t"
00977 "psllq $8, %%mm4 \n\t"
00978 "psllq $16, %%mm5 \n\t"
00979 "por %%mm4, %%mm3 \n\t"
00980 "por %%mm5, %%mm3 \n\t"
00981 :"=m"(*d)
00982 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid)
00983 :"memory");
00984
00985 __asm__ volatile(
00986 "movq %%mm0, %%mm4 \n\t"
00987 "movq %%mm3, %%mm5 \n\t"
00988 "movq %%mm6, %%mm0 \n\t"
00989 "movq %%mm7, %%mm1 \n\t"
00990
00991 "movq %%mm4, %%mm6 \n\t"
00992 "movq %%mm5, %%mm7 \n\t"
00993 "movq %%mm0, %%mm2 \n\t"
00994 "movq %%mm1, %%mm3 \n\t"
00995
00996 STORE_BGR24_MMX
00997
00998 :"=m"(*d)
00999 :"m"(*s)
01000 :"memory");
01001 d += 24;
01002 s += 8;
01003 }
01004 __asm__ volatile(SFENCE:::"memory");
01005 __asm__ volatile(EMMS:::"memory");
01006 while (s < end) {
01007 register uint16_t bgr;
01008 bgr = *s++;
01009 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
01010 *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
01011 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
01012 }
01013 }
01014
01015
01016
01017
01018
01019
01020
01021
01022 #define PACK_RGB32 \
01023 "packuswb %%mm7, %%mm0 \n\t" \
01024 "packuswb %%mm7, %%mm1 \n\t" \
01025 "packuswb %%mm7, %%mm2 \n\t" \
01026 "punpcklbw %%mm1, %%mm0 \n\t" \
01027 "punpcklbw %%mm6, %%mm2 \n\t" \
01028 "movq %%mm0, %%mm3 \n\t" \
01029 "punpcklwd %%mm2, %%mm0 \n\t" \
01030 "punpckhwd %%mm2, %%mm3 \n\t" \
01031 MOVNTQ" %%mm0, %0 \n\t" \
01032 MOVNTQ" %%mm3, 8%0 \n\t" \
01033
01034 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
01035 {
01036 const uint16_t *end;
01037 const uint16_t *mm_end;
01038 uint8_t *d = dst;
01039 const uint16_t *s = (const uint16_t *)src;
01040 end = s + src_size/2;
01041 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01042 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01043 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
01044 mm_end = end - 3;
01045 while (s < mm_end) {
01046 __asm__ volatile(
01047 PREFETCH" 32%1 \n\t"
01048 "movq %1, %%mm0 \n\t"
01049 "movq %1, %%mm1 \n\t"
01050 "movq %1, %%mm2 \n\t"
01051 "pand %2, %%mm0 \n\t"
01052 "pand %3, %%mm1 \n\t"
01053 "pand %4, %%mm2 \n\t"
01054 "psllq $5, %%mm0 \n\t"
01055 "pmulhw %5, %%mm0 \n\t"
01056 "pmulhw %5, %%mm1 \n\t"
01057 "pmulhw %6, %%mm2 \n\t"
01058 PACK_RGB32
01059 :"=m"(*d)
01060 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid),"m"(mul15_hi)
01061 :"memory");
01062 d += 16;
01063 s += 4;
01064 }
01065 __asm__ volatile(SFENCE:::"memory");
01066 __asm__ volatile(EMMS:::"memory");
01067 while (s < end) {
01068 register uint16_t bgr;
01069 bgr = *s++;
01070 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
01071 *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
01072 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
01073 *d++ = 255;
01074 }
01075 }
01076
01077 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size)
01078 {
01079 const uint16_t *end;
01080 const uint16_t *mm_end;
01081 uint8_t *d = dst;
01082 const uint16_t *s = (const uint16_t*)src;
01083 end = s + src_size/2;
01084 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01085 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01086 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
01087 mm_end = end - 3;
01088 while (s < mm_end) {
01089 __asm__ volatile(
01090 PREFETCH" 32%1 \n\t"
01091 "movq %1, %%mm0 \n\t"
01092 "movq %1, %%mm1 \n\t"
01093 "movq %1, %%mm2 \n\t"
01094 "pand %2, %%mm0 \n\t"
01095 "pand %3, %%mm1 \n\t"
01096 "pand %4, %%mm2 \n\t"
01097 "psllq $5, %%mm0 \n\t"
01098 "psrlq $1, %%mm2 \n\t"
01099 "pmulhw %5, %%mm0 \n\t"
01100 "pmulhw %7, %%mm1 \n\t"
01101 "pmulhw %6, %%mm2 \n\t"
01102 PACK_RGB32
01103 :"=m"(*d)
01104 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid)
01105 :"memory");
01106 d += 16;
01107 s += 4;
01108 }
01109 __asm__ volatile(SFENCE:::"memory");
01110 __asm__ volatile(EMMS:::"memory");
01111 while (s < end) {
01112 register uint16_t bgr;
01113 bgr = *s++;
01114 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
01115 *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
01116 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
01117 *d++ = 255;
01118 }
01119 }
01120
01121 static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size)
01122 {
01123 x86_reg idx = 15 - src_size;
01124 const uint8_t *s = src-idx;
01125 uint8_t *d = dst-idx;
01126 __asm__ volatile(
01127 "test %0, %0 \n\t"
01128 "jns 2f \n\t"
01129 PREFETCH" (%1, %0) \n\t"
01130 "movq %3, %%mm7 \n\t"
01131 "pxor %4, %%mm7 \n\t"
01132 "movq %%mm7, %%mm6 \n\t"
01133 "pxor %5, %%mm7 \n\t"
01134 ".p2align 4 \n\t"
01135 "1: \n\t"
01136 PREFETCH" 32(%1, %0) \n\t"
01137 "movq (%1, %0), %%mm0 \n\t"
01138 "movq 8(%1, %0), %%mm1 \n\t"
01139 # if COMPILE_TEMPLATE_MMX2
01140 "pshufw $177, %%mm0, %%mm3 \n\t"
01141 "pshufw $177, %%mm1, %%mm5 \n\t"
01142 "pand %%mm7, %%mm0 \n\t"
01143 "pand %%mm6, %%mm3 \n\t"
01144 "pand %%mm7, %%mm1 \n\t"
01145 "pand %%mm6, %%mm5 \n\t"
01146 "por %%mm3, %%mm0 \n\t"
01147 "por %%mm5, %%mm1 \n\t"
01148 # else
01149 "movq %%mm0, %%mm2 \n\t"
01150 "movq %%mm1, %%mm4 \n\t"
01151 "pand %%mm7, %%mm0 \n\t"
01152 "pand %%mm6, %%mm2 \n\t"
01153 "pand %%mm7, %%mm1 \n\t"
01154 "pand %%mm6, %%mm4 \n\t"
01155 "movq %%mm2, %%mm3 \n\t"
01156 "movq %%mm4, %%mm5 \n\t"
01157 "pslld $16, %%mm2 \n\t"
01158 "psrld $16, %%mm3 \n\t"
01159 "pslld $16, %%mm4 \n\t"
01160 "psrld $16, %%mm5 \n\t"
01161 "por %%mm2, %%mm0 \n\t"
01162 "por %%mm4, %%mm1 \n\t"
01163 "por %%mm3, %%mm0 \n\t"
01164 "por %%mm5, %%mm1 \n\t"
01165 # endif
01166 MOVNTQ" %%mm0, (%2, %0) \n\t"
01167 MOVNTQ" %%mm1, 8(%2, %0) \n\t"
01168 "add $16, %0 \n\t"
01169 "js 1b \n\t"
01170 SFENCE" \n\t"
01171 EMMS" \n\t"
01172 "2: \n\t"
01173 : "+&r"(idx)
01174 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
01175 : "memory");
01176 for (; idx<15; idx+=4) {
01177 register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
01178 v &= 0xff00ff;
01179 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
01180 }
01181 }
01182
01183 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
01184 {
01185 unsigned i;
01186 x86_reg mmx_size= 23 - src_size;
01187 __asm__ volatile (
01188 "test %%"REG_a", %%"REG_a" \n\t"
01189 "jns 2f \n\t"
01190 "movq "MANGLE(mask24r)", %%mm5 \n\t"
01191 "movq "MANGLE(mask24g)", %%mm6 \n\t"
01192 "movq "MANGLE(mask24b)", %%mm7 \n\t"
01193 ".p2align 4 \n\t"
01194 "1: \n\t"
01195 PREFETCH" 32(%1, %%"REG_a") \n\t"
01196 "movq (%1, %%"REG_a"), %%mm0 \n\t"
01197 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01198 "movq 2(%1, %%"REG_a"), %%mm2 \n\t"
01199 "psllq $16, %%mm0 \n\t"
01200 "pand %%mm5, %%mm0 \n\t"
01201 "pand %%mm6, %%mm1 \n\t"
01202 "pand %%mm7, %%mm2 \n\t"
01203 "por %%mm0, %%mm1 \n\t"
01204 "por %%mm2, %%mm1 \n\t"
01205 "movq 6(%1, %%"REG_a"), %%mm0 \n\t"
01206 MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t"
01207 "movq 8(%1, %%"REG_a"), %%mm1 \n\t"
01208 "movq 10(%1, %%"REG_a"), %%mm2 \n\t"
01209 "pand %%mm7, %%mm0 \n\t"
01210 "pand %%mm5, %%mm1 \n\t"
01211 "pand %%mm6, %%mm2 \n\t"
01212 "por %%mm0, %%mm1 \n\t"
01213 "por %%mm2, %%mm1 \n\t"
01214 "movq 14(%1, %%"REG_a"), %%mm0 \n\t"
01215 MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t"
01216 "movq 16(%1, %%"REG_a"), %%mm1 \n\t"
01217 "movq 18(%1, %%"REG_a"), %%mm2 \n\t"
01218 "pand %%mm6, %%mm0 \n\t"
01219 "pand %%mm7, %%mm1 \n\t"
01220 "pand %%mm5, %%mm2 \n\t"
01221 "por %%mm0, %%mm1 \n\t"
01222 "por %%mm2, %%mm1 \n\t"
01223 MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t"
01224 "add $24, %%"REG_a" \n\t"
01225 " js 1b \n\t"
01226 "2: \n\t"
01227 : "+a" (mmx_size)
01228 : "r" (src-mmx_size), "r"(dst-mmx_size)
01229 );
01230
01231 __asm__ volatile(SFENCE:::"memory");
01232 __asm__ volatile(EMMS:::"memory");
01233
01234 if (mmx_size==23) return;
01235
01236 src+= src_size;
01237 dst+= src_size;
01238 src_size= 23-mmx_size;
01239 src-= src_size;
01240 dst-= src_size;
01241 for (i=0; i<src_size; i+=3) {
01242 register uint8_t x;
01243 x = src[i + 2];
01244 dst[i + 1] = src[i + 1];
01245 dst[i + 2] = src[i + 0];
01246 dst[i + 0] = x;
01247 }
01248 }
01249
01250 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01251 int width, int height,
01252 int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
01253 {
01254 int y;
01255 const x86_reg chromWidth= width>>1;
01256 for (y=0; y<height; y++) {
01257
01258 __asm__ volatile(
01259 "xor %%"REG_a", %%"REG_a" \n\t"
01260 ".p2align 4 \n\t"
01261 "1: \n\t"
01262 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01263 PREFETCH" 32(%2, %%"REG_a") \n\t"
01264 PREFETCH" 32(%3, %%"REG_a") \n\t"
01265 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01266 "movq %%mm0, %%mm2 \n\t"
01267 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01268 "punpcklbw %%mm1, %%mm0 \n\t"
01269 "punpckhbw %%mm1, %%mm2 \n\t"
01270
01271 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01272 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01273 "movq %%mm3, %%mm4 \n\t"
01274 "movq %%mm5, %%mm6 \n\t"
01275 "punpcklbw %%mm0, %%mm3 \n\t"
01276 "punpckhbw %%mm0, %%mm4 \n\t"
01277 "punpcklbw %%mm2, %%mm5 \n\t"
01278 "punpckhbw %%mm2, %%mm6 \n\t"
01279
01280 MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t"
01281 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01282 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t"
01283 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01284
01285 "add $8, %%"REG_a" \n\t"
01286 "cmp %4, %%"REG_a" \n\t"
01287 " jb 1b \n\t"
01288 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01289 : "%"REG_a
01290 );
01291 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
01292 usrc += chromStride;
01293 vsrc += chromStride;
01294 }
01295 ysrc += lumStride;
01296 dst += dstStride;
01297 }
01298 __asm__(EMMS" \n\t"
01299 SFENCE" \n\t"
01300 :::"memory");
01301 }
01302
01307 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01308 int width, int height,
01309 int lumStride, int chromStride, int dstStride)
01310 {
01311
01312 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01313 }
01314
01315 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01316 int width, int height,
01317 int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
01318 {
01319 int y;
01320 const x86_reg chromWidth= width>>1;
01321 for (y=0; y<height; y++) {
01322
01323 __asm__ volatile(
01324 "xor %%"REG_a", %%"REG_a" \n\t"
01325 ".p2align 4 \n\t"
01326 "1: \n\t"
01327 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01328 PREFETCH" 32(%2, %%"REG_a") \n\t"
01329 PREFETCH" 32(%3, %%"REG_a") \n\t"
01330 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01331 "movq %%mm0, %%mm2 \n\t"
01332 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01333 "punpcklbw %%mm1, %%mm0 \n\t"
01334 "punpckhbw %%mm1, %%mm2 \n\t"
01335
01336 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01337 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01338 "movq %%mm0, %%mm4 \n\t"
01339 "movq %%mm2, %%mm6 \n\t"
01340 "punpcklbw %%mm3, %%mm0 \n\t"
01341 "punpckhbw %%mm3, %%mm4 \n\t"
01342 "punpcklbw %%mm5, %%mm2 \n\t"
01343 "punpckhbw %%mm5, %%mm6 \n\t"
01344
01345 MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t"
01346 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01347 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t"
01348 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01349
01350 "add $8, %%"REG_a" \n\t"
01351 "cmp %4, %%"REG_a" \n\t"
01352 " jb 1b \n\t"
01353 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01354 : "%"REG_a
01355 );
01356 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
01357 usrc += chromStride;
01358 vsrc += chromStride;
01359 }
01360 ysrc += lumStride;
01361 dst += dstStride;
01362 }
01363 __asm__(EMMS" \n\t"
01364 SFENCE" \n\t"
01365 :::"memory");
01366 }
01367
01372 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01373 int width, int height,
01374 int lumStride, int chromStride, int dstStride)
01375 {
01376
01377 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01378 }
01379
01383 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01384 int width, int height,
01385 int lumStride, int chromStride, int dstStride)
01386 {
01387 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01388 }
01389
01393 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01394 int width, int height,
01395 int lumStride, int chromStride, int dstStride)
01396 {
01397 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01398 }
01399
01404 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01405 int width, int height,
01406 int lumStride, int chromStride, int srcStride)
01407 {
01408 int y;
01409 const x86_reg chromWidth= width>>1;
01410 for (y=0; y<height; y+=2) {
01411 __asm__ volatile(
01412 "xor %%"REG_a", %%"REG_a" \n\t"
01413 "pcmpeqw %%mm7, %%mm7 \n\t"
01414 "psrlw $8, %%mm7 \n\t"
01415 ".p2align 4 \n\t"
01416 "1: \n\t"
01417 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01418 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01419 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01420 "movq %%mm0, %%mm2 \n\t"
01421 "movq %%mm1, %%mm3 \n\t"
01422 "psrlw $8, %%mm0 \n\t"
01423 "psrlw $8, %%mm1 \n\t"
01424 "pand %%mm7, %%mm2 \n\t"
01425 "pand %%mm7, %%mm3 \n\t"
01426 "packuswb %%mm1, %%mm0 \n\t"
01427 "packuswb %%mm3, %%mm2 \n\t"
01428
01429 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
01430
01431 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
01432 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
01433 "movq %%mm1, %%mm3 \n\t"
01434 "movq %%mm2, %%mm4 \n\t"
01435 "psrlw $8, %%mm1 \n\t"
01436 "psrlw $8, %%mm2 \n\t"
01437 "pand %%mm7, %%mm3 \n\t"
01438 "pand %%mm7, %%mm4 \n\t"
01439 "packuswb %%mm2, %%mm1 \n\t"
01440 "packuswb %%mm4, %%mm3 \n\t"
01441
01442 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01443
01444 "movq %%mm0, %%mm2 \n\t"
01445 "movq %%mm1, %%mm3 \n\t"
01446 "psrlw $8, %%mm0 \n\t"
01447 "psrlw $8, %%mm1 \n\t"
01448 "pand %%mm7, %%mm2 \n\t"
01449 "pand %%mm7, %%mm3 \n\t"
01450 "packuswb %%mm1, %%mm0 \n\t"
01451 "packuswb %%mm3, %%mm2 \n\t"
01452
01453 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
01454 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
01455
01456 "add $8, %%"REG_a" \n\t"
01457 "cmp %4, %%"REG_a" \n\t"
01458 " jb 1b \n\t"
01459 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01460 : "memory", "%"REG_a
01461 );
01462
01463 ydst += lumStride;
01464 src += srcStride;
01465
01466 __asm__ volatile(
01467 "xor %%"REG_a", %%"REG_a" \n\t"
01468 ".p2align 4 \n\t"
01469 "1: \n\t"
01470 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01471 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01472 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01473 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
01474 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
01475 "pand %%mm7, %%mm0 \n\t"
01476 "pand %%mm7, %%mm1 \n\t"
01477 "pand %%mm7, %%mm2 \n\t"
01478 "pand %%mm7, %%mm3 \n\t"
01479 "packuswb %%mm1, %%mm0 \n\t"
01480 "packuswb %%mm3, %%mm2 \n\t"
01481
01482 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
01483 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01484
01485 "add $8, %%"REG_a" \n\t"
01486 "cmp %4, %%"REG_a" \n\t"
01487 " jb 1b \n\t"
01488
01489 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01490 : "memory", "%"REG_a
01491 );
01492 udst += chromStride;
01493 vdst += chromStride;
01494 ydst += lumStride;
01495 src += srcStride;
01496 }
01497 __asm__ volatile(EMMS" \n\t"
01498 SFENCE" \n\t"
01499 :::"memory");
01500 }
01501 #endif
01502
01503 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
01504 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
01505 {
01506 int x,y;
01507
01508 dst[0]= src[0];
01509
01510
01511 for (x=0; x<srcWidth-1; x++) {
01512 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01513 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01514 }
01515 dst[2*srcWidth-1]= src[srcWidth-1];
01516
01517 dst+= dstStride;
01518
01519 for (y=1; y<srcHeight; y++) {
01520 const x86_reg mmxSize= srcWidth&~15;
01521 __asm__ volatile(
01522 "mov %4, %%"REG_a" \n\t"
01523 "movq "MANGLE(mmx_ff)", %%mm0 \n\t"
01524 "movq (%0, %%"REG_a"), %%mm4 \n\t"
01525 "movq %%mm4, %%mm2 \n\t"
01526 "psllq $8, %%mm4 \n\t"
01527 "pand %%mm0, %%mm2 \n\t"
01528 "por %%mm2, %%mm4 \n\t"
01529 "movq (%1, %%"REG_a"), %%mm5 \n\t"
01530 "movq %%mm5, %%mm3 \n\t"
01531 "psllq $8, %%mm5 \n\t"
01532 "pand %%mm0, %%mm3 \n\t"
01533 "por %%mm3, %%mm5 \n\t"
01534 "1: \n\t"
01535 "movq (%0, %%"REG_a"), %%mm0 \n\t"
01536 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01537 "movq 1(%0, %%"REG_a"), %%mm2 \n\t"
01538 "movq 1(%1, %%"REG_a"), %%mm3 \n\t"
01539 PAVGB" %%mm0, %%mm5 \n\t"
01540 PAVGB" %%mm0, %%mm3 \n\t"
01541 PAVGB" %%mm0, %%mm5 \n\t"
01542 PAVGB" %%mm0, %%mm3 \n\t"
01543 PAVGB" %%mm1, %%mm4 \n\t"
01544 PAVGB" %%mm1, %%mm2 \n\t"
01545 PAVGB" %%mm1, %%mm4 \n\t"
01546 PAVGB" %%mm1, %%mm2 \n\t"
01547 "movq %%mm5, %%mm7 \n\t"
01548 "movq %%mm4, %%mm6 \n\t"
01549 "punpcklbw %%mm3, %%mm5 \n\t"
01550 "punpckhbw %%mm3, %%mm7 \n\t"
01551 "punpcklbw %%mm2, %%mm4 \n\t"
01552 "punpckhbw %%mm2, %%mm6 \n\t"
01553 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t"
01554 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01555 MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t"
01556 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01557 "add $8, %%"REG_a" \n\t"
01558 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
01559 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
01560 " js 1b \n\t"
01561 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
01562 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
01563 "g" (-mmxSize)
01564 : "%"REG_a
01565 );
01566
01567 for (x=mmxSize-1; x<srcWidth-1; x++) {
01568 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
01569 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
01570 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
01571 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
01572 }
01573 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
01574 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
01575
01576 dst+=dstStride*2;
01577 src+=srcStride;
01578 }
01579
01580
01581 dst[0]= src[0];
01582
01583 for (x=0; x<srcWidth-1; x++) {
01584 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01585 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01586 }
01587 dst[2*srcWidth-1]= src[srcWidth-1];
01588
01589 __asm__ volatile(EMMS" \n\t"
01590 SFENCE" \n\t"
01591 :::"memory");
01592 }
01593 #endif
01594
01595 #if !COMPILE_TEMPLATE_AMD3DNOW
01596
01602 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01603 int width, int height,
01604 int lumStride, int chromStride, int srcStride)
01605 {
01606 int y;
01607 const x86_reg chromWidth= width>>1;
01608 for (y=0; y<height; y+=2) {
01609 __asm__ volatile(
01610 "xor %%"REG_a", %%"REG_a" \n\t"
01611 "pcmpeqw %%mm7, %%mm7 \n\t"
01612 "psrlw $8, %%mm7 \n\t"
01613 ".p2align 4 \n\t"
01614 "1: \n\t"
01615 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01616 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01617 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01618 "movq %%mm0, %%mm2 \n\t"
01619 "movq %%mm1, %%mm3 \n\t"
01620 "pand %%mm7, %%mm0 \n\t"
01621 "pand %%mm7, %%mm1 \n\t"
01622 "psrlw $8, %%mm2 \n\t"
01623 "psrlw $8, %%mm3 \n\t"
01624 "packuswb %%mm1, %%mm0 \n\t"
01625 "packuswb %%mm3, %%mm2 \n\t"
01626
01627 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
01628
01629 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
01630 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
01631 "movq %%mm1, %%mm3 \n\t"
01632 "movq %%mm2, %%mm4 \n\t"
01633 "pand %%mm7, %%mm1 \n\t"
01634 "pand %%mm7, %%mm2 \n\t"
01635 "psrlw $8, %%mm3 \n\t"
01636 "psrlw $8, %%mm4 \n\t"
01637 "packuswb %%mm2, %%mm1 \n\t"
01638 "packuswb %%mm4, %%mm3 \n\t"
01639
01640 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01641
01642 "movq %%mm0, %%mm2 \n\t"
01643 "movq %%mm1, %%mm3 \n\t"
01644 "psrlw $8, %%mm0 \n\t"
01645 "psrlw $8, %%mm1 \n\t"
01646 "pand %%mm7, %%mm2 \n\t"
01647 "pand %%mm7, %%mm3 \n\t"
01648 "packuswb %%mm1, %%mm0 \n\t"
01649 "packuswb %%mm3, %%mm2 \n\t"
01650
01651 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
01652 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
01653
01654 "add $8, %%"REG_a" \n\t"
01655 "cmp %4, %%"REG_a" \n\t"
01656 " jb 1b \n\t"
01657 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01658 : "memory", "%"REG_a
01659 );
01660
01661 ydst += lumStride;
01662 src += srcStride;
01663
01664 __asm__ volatile(
01665 "xor %%"REG_a", %%"REG_a" \n\t"
01666 ".p2align 4 \n\t"
01667 "1: \n\t"
01668 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01669 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01670 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01671 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
01672 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
01673 "psrlw $8, %%mm0 \n\t"
01674 "psrlw $8, %%mm1 \n\t"
01675 "psrlw $8, %%mm2 \n\t"
01676 "psrlw $8, %%mm3 \n\t"
01677 "packuswb %%mm1, %%mm0 \n\t"
01678 "packuswb %%mm3, %%mm2 \n\t"
01679
01680 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
01681 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01682
01683 "add $8, %%"REG_a" \n\t"
01684 "cmp %4, %%"REG_a" \n\t"
01685 " jb 1b \n\t"
01686
01687 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01688 : "memory", "%"REG_a
01689 );
01690 udst += chromStride;
01691 vdst += chromStride;
01692 ydst += lumStride;
01693 src += srcStride;
01694 }
01695 __asm__ volatile(EMMS" \n\t"
01696 SFENCE" \n\t"
01697 :::"memory");
01698 }
01699 #endif
01700
01708 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01709 int width, int height,
01710 int lumStride, int chromStride, int srcStride)
01711 {
01712 int y;
01713 const x86_reg chromWidth= width>>1;
01714 for (y=0; y<height-2; y+=2) {
01715 int i;
01716 for (i=0; i<2; i++) {
01717 __asm__ volatile(
01718 "mov %2, %%"REG_a" \n\t"
01719 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
01720 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
01721 "pxor %%mm7, %%mm7 \n\t"
01722 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
01723 ".p2align 4 \n\t"
01724 "1: \n\t"
01725 PREFETCH" 64(%0, %%"REG_d") \n\t"
01726 "movd (%0, %%"REG_d"), %%mm0 \n\t"
01727 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
01728 "punpcklbw %%mm7, %%mm0 \n\t"
01729 "punpcklbw %%mm7, %%mm1 \n\t"
01730 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
01731 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
01732 "punpcklbw %%mm7, %%mm2 \n\t"
01733 "punpcklbw %%mm7, %%mm3 \n\t"
01734 "pmaddwd %%mm6, %%mm0 \n\t"
01735 "pmaddwd %%mm6, %%mm1 \n\t"
01736 "pmaddwd %%mm6, %%mm2 \n\t"
01737 "pmaddwd %%mm6, %%mm3 \n\t"
01738 #ifndef FAST_BGR2YV12
01739 "psrad $8, %%mm0 \n\t"
01740 "psrad $8, %%mm1 \n\t"
01741 "psrad $8, %%mm2 \n\t"
01742 "psrad $8, %%mm3 \n\t"
01743 #endif
01744 "packssdw %%mm1, %%mm0 \n\t"
01745 "packssdw %%mm3, %%mm2 \n\t"
01746 "pmaddwd %%mm5, %%mm0 \n\t"
01747 "pmaddwd %%mm5, %%mm2 \n\t"
01748 "packssdw %%mm2, %%mm0 \n\t"
01749 "psraw $7, %%mm0 \n\t"
01750
01751 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
01752 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
01753 "punpcklbw %%mm7, %%mm4 \n\t"
01754 "punpcklbw %%mm7, %%mm1 \n\t"
01755 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
01756 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
01757 "punpcklbw %%mm7, %%mm2 \n\t"
01758 "punpcklbw %%mm7, %%mm3 \n\t"
01759 "pmaddwd %%mm6, %%mm4 \n\t"
01760 "pmaddwd %%mm6, %%mm1 \n\t"
01761 "pmaddwd %%mm6, %%mm2 \n\t"
01762 "pmaddwd %%mm6, %%mm3 \n\t"
01763 #ifndef FAST_BGR2YV12
01764 "psrad $8, %%mm4 \n\t"
01765 "psrad $8, %%mm1 \n\t"
01766 "psrad $8, %%mm2 \n\t"
01767 "psrad $8, %%mm3 \n\t"
01768 #endif
01769 "packssdw %%mm1, %%mm4 \n\t"
01770 "packssdw %%mm3, %%mm2 \n\t"
01771 "pmaddwd %%mm5, %%mm4 \n\t"
01772 "pmaddwd %%mm5, %%mm2 \n\t"
01773 "add $24, %%"REG_d" \n\t"
01774 "packssdw %%mm2, %%mm4 \n\t"
01775 "psraw $7, %%mm4 \n\t"
01776
01777 "packuswb %%mm4, %%mm0 \n\t"
01778 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
01779
01780 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
01781 "add $8, %%"REG_a" \n\t"
01782 " js 1b \n\t"
01783 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
01784 : "%"REG_a, "%"REG_d
01785 );
01786 ydst += lumStride;
01787 src += srcStride;
01788 }
01789 src -= srcStride*2;
01790 __asm__ volatile(
01791 "mov %4, %%"REG_a" \n\t"
01792 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
01793 "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
01794 "pxor %%mm7, %%mm7 \n\t"
01795 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
01796 "add %%"REG_d", %%"REG_d" \n\t"
01797 ".p2align 4 \n\t"
01798 "1: \n\t"
01799 PREFETCH" 64(%0, %%"REG_d") \n\t"
01800 PREFETCH" 64(%1, %%"REG_d") \n\t"
01801 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
01802 "movq (%0, %%"REG_d"), %%mm0 \n\t"
01803 "movq (%1, %%"REG_d"), %%mm1 \n\t"
01804 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
01805 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
01806 PAVGB" %%mm1, %%mm0 \n\t"
01807 PAVGB" %%mm3, %%mm2 \n\t"
01808 "movq %%mm0, %%mm1 \n\t"
01809 "movq %%mm2, %%mm3 \n\t"
01810 "psrlq $24, %%mm0 \n\t"
01811 "psrlq $24, %%mm2 \n\t"
01812 PAVGB" %%mm1, %%mm0 \n\t"
01813 PAVGB" %%mm3, %%mm2 \n\t"
01814 "punpcklbw %%mm7, %%mm0 \n\t"
01815 "punpcklbw %%mm7, %%mm2 \n\t"
01816 #else
01817 "movd (%0, %%"REG_d"), %%mm0 \n\t"
01818 "movd (%1, %%"REG_d"), %%mm1 \n\t"
01819 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
01820 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
01821 "punpcklbw %%mm7, %%mm0 \n\t"
01822 "punpcklbw %%mm7, %%mm1 \n\t"
01823 "punpcklbw %%mm7, %%mm2 \n\t"
01824 "punpcklbw %%mm7, %%mm3 \n\t"
01825 "paddw %%mm1, %%mm0 \n\t"
01826 "paddw %%mm3, %%mm2 \n\t"
01827 "paddw %%mm2, %%mm0 \n\t"
01828 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
01829 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
01830 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
01831 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
01832 "punpcklbw %%mm7, %%mm4 \n\t"
01833 "punpcklbw %%mm7, %%mm1 \n\t"
01834 "punpcklbw %%mm7, %%mm2 \n\t"
01835 "punpcklbw %%mm7, %%mm3 \n\t"
01836 "paddw %%mm1, %%mm4 \n\t"
01837 "paddw %%mm3, %%mm2 \n\t"
01838 "paddw %%mm4, %%mm2 \n\t"
01839 "psrlw $2, %%mm0 \n\t"
01840 "psrlw $2, %%mm2 \n\t"
01841 #endif
01842 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
01843 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
01844
01845 "pmaddwd %%mm0, %%mm1 \n\t"
01846 "pmaddwd %%mm2, %%mm3 \n\t"
01847 "pmaddwd %%mm6, %%mm0 \n\t"
01848 "pmaddwd %%mm6, %%mm2 \n\t"
01849 #ifndef FAST_BGR2YV12
01850 "psrad $8, %%mm0 \n\t"
01851 "psrad $8, %%mm1 \n\t"
01852 "psrad $8, %%mm2 \n\t"
01853 "psrad $8, %%mm3 \n\t"
01854 #endif
01855 "packssdw %%mm2, %%mm0 \n\t"
01856 "packssdw %%mm3, %%mm1 \n\t"
01857 "pmaddwd %%mm5, %%mm0 \n\t"
01858 "pmaddwd %%mm5, %%mm1 \n\t"
01859 "packssdw %%mm1, %%mm0 \n\t"
01860 "psraw $7, %%mm0 \n\t"
01861
01862 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
01863 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
01864 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
01865 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
01866 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
01867 PAVGB" %%mm1, %%mm4 \n\t"
01868 PAVGB" %%mm3, %%mm2 \n\t"
01869 "movq %%mm4, %%mm1 \n\t"
01870 "movq %%mm2, %%mm3 \n\t"
01871 "psrlq $24, %%mm4 \n\t"
01872 "psrlq $24, %%mm2 \n\t"
01873 PAVGB" %%mm1, %%mm4 \n\t"
01874 PAVGB" %%mm3, %%mm2 \n\t"
01875 "punpcklbw %%mm7, %%mm4 \n\t"
01876 "punpcklbw %%mm7, %%mm2 \n\t"
01877 #else
01878 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
01879 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
01880 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
01881 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
01882 "punpcklbw %%mm7, %%mm4 \n\t"
01883 "punpcklbw %%mm7, %%mm1 \n\t"
01884 "punpcklbw %%mm7, %%mm2 \n\t"
01885 "punpcklbw %%mm7, %%mm3 \n\t"
01886 "paddw %%mm1, %%mm4 \n\t"
01887 "paddw %%mm3, %%mm2 \n\t"
01888 "paddw %%mm2, %%mm4 \n\t"
01889 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
01890 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
01891 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
01892 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
01893 "punpcklbw %%mm7, %%mm5 \n\t"
01894 "punpcklbw %%mm7, %%mm1 \n\t"
01895 "punpcklbw %%mm7, %%mm2 \n\t"
01896 "punpcklbw %%mm7, %%mm3 \n\t"
01897 "paddw %%mm1, %%mm5 \n\t"
01898 "paddw %%mm3, %%mm2 \n\t"
01899 "paddw %%mm5, %%mm2 \n\t"
01900 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
01901 "psrlw $2, %%mm4 \n\t"
01902 "psrlw $2, %%mm2 \n\t"
01903 #endif
01904 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
01905 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
01906
01907 "pmaddwd %%mm4, %%mm1 \n\t"
01908 "pmaddwd %%mm2, %%mm3 \n\t"
01909 "pmaddwd %%mm6, %%mm4 \n\t"
01910 "pmaddwd %%mm6, %%mm2 \n\t"
01911 #ifndef FAST_BGR2YV12
01912 "psrad $8, %%mm4 \n\t"
01913 "psrad $8, %%mm1 \n\t"
01914 "psrad $8, %%mm2 \n\t"
01915 "psrad $8, %%mm3 \n\t"
01916 #endif
01917 "packssdw %%mm2, %%mm4 \n\t"
01918 "packssdw %%mm3, %%mm1 \n\t"
01919 "pmaddwd %%mm5, %%mm4 \n\t"
01920 "pmaddwd %%mm5, %%mm1 \n\t"
01921 "add $24, %%"REG_d" \n\t"
01922 "packssdw %%mm1, %%mm4 \n\t"
01923 "psraw $7, %%mm4 \n\t"
01924
01925 "movq %%mm0, %%mm1 \n\t"
01926 "punpckldq %%mm4, %%mm0 \n\t"
01927 "punpckhdq %%mm4, %%mm1 \n\t"
01928 "packsswb %%mm1, %%mm0 \n\t"
01929 "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
01930 "movd %%mm0, (%2, %%"REG_a") \n\t"
01931 "punpckhdq %%mm0, %%mm0 \n\t"
01932 "movd %%mm0, (%3, %%"REG_a") \n\t"
01933 "add $4, %%"REG_a" \n\t"
01934 " js 1b \n\t"
01935 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
01936 : "%"REG_a, "%"REG_d
01937 );
01938
01939 udst += chromStride;
01940 vdst += chromStride;
01941 src += srcStride*2;
01942 }
01943
01944 __asm__ volatile(EMMS" \n\t"
01945 SFENCE" \n\t"
01946 :::"memory");
01947
01948 rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
01949 }
01950 #endif
01951
01952 #if !COMPILE_TEMPLATE_AMD3DNOW
01953 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
01954 int width, int height, int src1Stride,
01955 int src2Stride, int dstStride)
01956 {
01957 int h;
01958
01959 for (h=0; h < height; h++) {
01960 int w;
01961
01962 #if COMPILE_TEMPLATE_SSE2
01963 __asm__(
01964 "xor %%"REG_a", %%"REG_a" \n\t"
01965 "1: \n\t"
01966 PREFETCH" 64(%1, %%"REG_a") \n\t"
01967 PREFETCH" 64(%2, %%"REG_a") \n\t"
01968 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t"
01969 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t"
01970 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t"
01971 "punpcklbw %%xmm2, %%xmm0 \n\t"
01972 "punpckhbw %%xmm2, %%xmm1 \n\t"
01973 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t"
01974 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t"
01975 "add $16, %%"REG_a" \n\t"
01976 "cmp %3, %%"REG_a" \n\t"
01977 " jb 1b \n\t"
01978 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
01979 : "memory", "%"REG_a""
01980 );
01981 #else
01982 __asm__(
01983 "xor %%"REG_a", %%"REG_a" \n\t"
01984 "1: \n\t"
01985 PREFETCH" 64(%1, %%"REG_a") \n\t"
01986 PREFETCH" 64(%2, %%"REG_a") \n\t"
01987 "movq (%1, %%"REG_a"), %%mm0 \n\t"
01988 "movq 8(%1, %%"REG_a"), %%mm2 \n\t"
01989 "movq %%mm0, %%mm1 \n\t"
01990 "movq %%mm2, %%mm3 \n\t"
01991 "movq (%2, %%"REG_a"), %%mm4 \n\t"
01992 "movq 8(%2, %%"REG_a"), %%mm5 \n\t"
01993 "punpcklbw %%mm4, %%mm0 \n\t"
01994 "punpckhbw %%mm4, %%mm1 \n\t"
01995 "punpcklbw %%mm5, %%mm2 \n\t"
01996 "punpckhbw %%mm5, %%mm3 \n\t"
01997 MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t"
01998 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t"
01999 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t"
02000 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t"
02001 "add $16, %%"REG_a" \n\t"
02002 "cmp %3, %%"REG_a" \n\t"
02003 " jb 1b \n\t"
02004 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
02005 : "memory", "%"REG_a
02006 );
02007 #endif
02008 for (w= (width&(~15)); w < width; w++) {
02009 dest[2*w+0] = src1[w];
02010 dest[2*w+1] = src2[w];
02011 }
02012 dest += dstStride;
02013 src1 += src1Stride;
02014 src2 += src2Stride;
02015 }
02016 __asm__(
02017 EMMS" \n\t"
02018 SFENCE" \n\t"
02019 ::: "memory"
02020 );
02021 }
02022 #endif
02023
02024 #if !COMPILE_TEMPLATE_SSE2
02025 #if !COMPILE_TEMPLATE_AMD3DNOW
02026 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
02027 uint8_t *dst1, uint8_t *dst2,
02028 int width, int height,
02029 int srcStride1, int srcStride2,
02030 int dstStride1, int dstStride2)
02031 {
02032 x86_reg y;
02033 int x,w,h;
02034 w=width/2; h=height/2;
02035 __asm__ volatile(
02036 PREFETCH" %0 \n\t"
02037 PREFETCH" %1 \n\t"
02038 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
02039 for (y=0;y<h;y++) {
02040 const uint8_t* s1=src1+srcStride1*(y>>1);
02041 uint8_t* d=dst1+dstStride1*y;
02042 x=0;
02043 for (;x<w-31;x+=32) {
02044 __asm__ volatile(
02045 PREFETCH" 32%1 \n\t"
02046 "movq %1, %%mm0 \n\t"
02047 "movq 8%1, %%mm2 \n\t"
02048 "movq 16%1, %%mm4 \n\t"
02049 "movq 24%1, %%mm6 \n\t"
02050 "movq %%mm0, %%mm1 \n\t"
02051 "movq %%mm2, %%mm3 \n\t"
02052 "movq %%mm4, %%mm5 \n\t"
02053 "movq %%mm6, %%mm7 \n\t"
02054 "punpcklbw %%mm0, %%mm0 \n\t"
02055 "punpckhbw %%mm1, %%mm1 \n\t"
02056 "punpcklbw %%mm2, %%mm2 \n\t"
02057 "punpckhbw %%mm3, %%mm3 \n\t"
02058 "punpcklbw %%mm4, %%mm4 \n\t"
02059 "punpckhbw %%mm5, %%mm5 \n\t"
02060 "punpcklbw %%mm6, %%mm6 \n\t"
02061 "punpckhbw %%mm7, %%mm7 \n\t"
02062 MOVNTQ" %%mm0, %0 \n\t"
02063 MOVNTQ" %%mm1, 8%0 \n\t"
02064 MOVNTQ" %%mm2, 16%0 \n\t"
02065 MOVNTQ" %%mm3, 24%0 \n\t"
02066 MOVNTQ" %%mm4, 32%0 \n\t"
02067 MOVNTQ" %%mm5, 40%0 \n\t"
02068 MOVNTQ" %%mm6, 48%0 \n\t"
02069 MOVNTQ" %%mm7, 56%0"
02070 :"=m"(d[2*x])
02071 :"m"(s1[x])
02072 :"memory");
02073 }
02074 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
02075 }
02076 for (y=0;y<h;y++) {
02077 const uint8_t* s2=src2+srcStride2*(y>>1);
02078 uint8_t* d=dst2+dstStride2*y;
02079 x=0;
02080 for (;x<w-31;x+=32) {
02081 __asm__ volatile(
02082 PREFETCH" 32%1 \n\t"
02083 "movq %1, %%mm0 \n\t"
02084 "movq 8%1, %%mm2 \n\t"
02085 "movq 16%1, %%mm4 \n\t"
02086 "movq 24%1, %%mm6 \n\t"
02087 "movq %%mm0, %%mm1 \n\t"
02088 "movq %%mm2, %%mm3 \n\t"
02089 "movq %%mm4, %%mm5 \n\t"
02090 "movq %%mm6, %%mm7 \n\t"
02091 "punpcklbw %%mm0, %%mm0 \n\t"
02092 "punpckhbw %%mm1, %%mm1 \n\t"
02093 "punpcklbw %%mm2, %%mm2 \n\t"
02094 "punpckhbw %%mm3, %%mm3 \n\t"
02095 "punpcklbw %%mm4, %%mm4 \n\t"
02096 "punpckhbw %%mm5, %%mm5 \n\t"
02097 "punpcklbw %%mm6, %%mm6 \n\t"
02098 "punpckhbw %%mm7, %%mm7 \n\t"
02099 MOVNTQ" %%mm0, %0 \n\t"
02100 MOVNTQ" %%mm1, 8%0 \n\t"
02101 MOVNTQ" %%mm2, 16%0 \n\t"
02102 MOVNTQ" %%mm3, 24%0 \n\t"
02103 MOVNTQ" %%mm4, 32%0 \n\t"
02104 MOVNTQ" %%mm5, 40%0 \n\t"
02105 MOVNTQ" %%mm6, 48%0 \n\t"
02106 MOVNTQ" %%mm7, 56%0"
02107 :"=m"(d[2*x])
02108 :"m"(s2[x])
02109 :"memory");
02110 }
02111 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
02112 }
02113 __asm__(
02114 EMMS" \n\t"
02115 SFENCE" \n\t"
02116 ::: "memory"
02117 );
02118 }
02119
02120 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
02121 uint8_t *dst,
02122 int width, int height,
02123 int srcStride1, int srcStride2,
02124 int srcStride3, int dstStride)
02125 {
02126 x86_reg x;
02127 int y,w,h;
02128 w=width/2; h=height;
02129 for (y=0;y<h;y++) {
02130 const uint8_t* yp=src1+srcStride1*y;
02131 const uint8_t* up=src2+srcStride2*(y>>2);
02132 const uint8_t* vp=src3+srcStride3*(y>>2);
02133 uint8_t* d=dst+dstStride*y;
02134 x=0;
02135 for (;x<w-7;x+=8) {
02136 __asm__ volatile(
02137 PREFETCH" 32(%1, %0) \n\t"
02138 PREFETCH" 32(%2, %0) \n\t"
02139 PREFETCH" 32(%3, %0) \n\t"
02140 "movq (%1, %0, 4), %%mm0 \n\t"
02141 "movq (%2, %0), %%mm1 \n\t"
02142 "movq (%3, %0), %%mm2 \n\t"
02143 "movq %%mm0, %%mm3 \n\t"
02144 "movq %%mm1, %%mm4 \n\t"
02145 "movq %%mm2, %%mm5 \n\t"
02146 "punpcklbw %%mm1, %%mm1 \n\t"
02147 "punpcklbw %%mm2, %%mm2 \n\t"
02148 "punpckhbw %%mm4, %%mm4 \n\t"
02149 "punpckhbw %%mm5, %%mm5 \n\t"
02150
02151 "movq %%mm1, %%mm6 \n\t"
02152 "punpcklbw %%mm2, %%mm1 \n\t"
02153 "punpcklbw %%mm1, %%mm0 \n\t"
02154 "punpckhbw %%mm1, %%mm3 \n\t"
02155 MOVNTQ" %%mm0, (%4, %0, 8) \n\t"
02156 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t"
02157
02158 "punpckhbw %%mm2, %%mm6 \n\t"
02159 "movq 8(%1, %0, 4), %%mm0 \n\t"
02160 "movq %%mm0, %%mm3 \n\t"
02161 "punpcklbw %%mm6, %%mm0 \n\t"
02162 "punpckhbw %%mm6, %%mm3 \n\t"
02163 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t"
02164 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t"
02165
02166 "movq %%mm4, %%mm6 \n\t"
02167 "movq 16(%1, %0, 4), %%mm0 \n\t"
02168 "movq %%mm0, %%mm3 \n\t"
02169 "punpcklbw %%mm5, %%mm4 \n\t"
02170 "punpcklbw %%mm4, %%mm0 \n\t"
02171 "punpckhbw %%mm4, %%mm3 \n\t"
02172 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t"
02173 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t"
02174
02175 "punpckhbw %%mm5, %%mm6 \n\t"
02176 "movq 24(%1, %0, 4), %%mm0 \n\t"
02177 "movq %%mm0, %%mm3 \n\t"
02178 "punpcklbw %%mm6, %%mm0 \n\t"
02179 "punpckhbw %%mm6, %%mm3 \n\t"
02180 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t"
02181 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t"
02182
02183 : "+r" (x)
02184 : "r"(yp), "r" (up), "r"(vp), "r"(d)
02185 :"memory");
02186 }
02187 for (; x<w; x++) {
02188 const int x2 = x<<2;
02189 d[8*x+0] = yp[x2];
02190 d[8*x+1] = up[x];
02191 d[8*x+2] = yp[x2+1];
02192 d[8*x+3] = vp[x];
02193 d[8*x+4] = yp[x2+2];
02194 d[8*x+5] = up[x];
02195 d[8*x+6] = yp[x2+3];
02196 d[8*x+7] = vp[x];
02197 }
02198 }
02199 __asm__(
02200 EMMS" \n\t"
02201 SFENCE" \n\t"
02202 ::: "memory"
02203 );
02204 }
02205 #endif
02206
02207 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
02208 {
02209 dst += count;
02210 src += 2*count;
02211 count= - count;
02212
02213 if(count <= -16) {
02214 count += 15;
02215 __asm__ volatile(
02216 "pcmpeqw %%mm7, %%mm7 \n\t"
02217 "psrlw $8, %%mm7 \n\t"
02218 "1: \n\t"
02219 "movq -30(%1, %0, 2), %%mm0 \n\t"
02220 "movq -22(%1, %0, 2), %%mm1 \n\t"
02221 "movq -14(%1, %0, 2), %%mm2 \n\t"
02222 "movq -6(%1, %0, 2), %%mm3 \n\t"
02223 "pand %%mm7, %%mm0 \n\t"
02224 "pand %%mm7, %%mm1 \n\t"
02225 "pand %%mm7, %%mm2 \n\t"
02226 "pand %%mm7, %%mm3 \n\t"
02227 "packuswb %%mm1, %%mm0 \n\t"
02228 "packuswb %%mm3, %%mm2 \n\t"
02229 MOVNTQ" %%mm0,-15(%2, %0) \n\t"
02230 MOVNTQ" %%mm2,- 7(%2, %0) \n\t"
02231 "add $16, %0 \n\t"
02232 " js 1b \n\t"
02233 : "+r"(count)
02234 : "r"(src), "r"(dst)
02235 );
02236 count -= 15;
02237 }
02238 while(count<0) {
02239 dst[count]= src[2*count];
02240 count++;
02241 }
02242 }
02243
02244 #if !COMPILE_TEMPLATE_AMD3DNOW
02245 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02246 {
02247 dst0+= count;
02248 dst1+= count;
02249 src += 4*count;
02250 count= - count;
02251 if(count <= -8) {
02252 count += 7;
02253 __asm__ volatile(
02254 "pcmpeqw %%mm7, %%mm7 \n\t"
02255 "psrlw $8, %%mm7 \n\t"
02256 "1: \n\t"
02257 "movq -28(%1, %0, 4), %%mm0 \n\t"
02258 "movq -20(%1, %0, 4), %%mm1 \n\t"
02259 "movq -12(%1, %0, 4), %%mm2 \n\t"
02260 "movq -4(%1, %0, 4), %%mm3 \n\t"
02261 "pand %%mm7, %%mm0 \n\t"
02262 "pand %%mm7, %%mm1 \n\t"
02263 "pand %%mm7, %%mm2 \n\t"
02264 "pand %%mm7, %%mm3 \n\t"
02265 "packuswb %%mm1, %%mm0 \n\t"
02266 "packuswb %%mm3, %%mm2 \n\t"
02267 "movq %%mm0, %%mm1 \n\t"
02268 "movq %%mm2, %%mm3 \n\t"
02269 "psrlw $8, %%mm0 \n\t"
02270 "psrlw $8, %%mm2 \n\t"
02271 "pand %%mm7, %%mm1 \n\t"
02272 "pand %%mm7, %%mm3 \n\t"
02273 "packuswb %%mm2, %%mm0 \n\t"
02274 "packuswb %%mm3, %%mm1 \n\t"
02275 MOVNTQ" %%mm0,- 7(%3, %0) \n\t"
02276 MOVNTQ" %%mm1,- 7(%2, %0) \n\t"
02277 "add $8, %0 \n\t"
02278 " js 1b \n\t"
02279 : "+r"(count)
02280 : "r"(src), "r"(dst0), "r"(dst1)
02281 );
02282 count -= 7;
02283 }
02284 while(count<0) {
02285 dst0[count]= src[4*count+0];
02286 dst1[count]= src[4*count+2];
02287 count++;
02288 }
02289 }
02290 #endif
02291
02292 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02293 {
02294 dst0 += count;
02295 dst1 += count;
02296 src0 += 4*count;
02297 src1 += 4*count;
02298 count= - count;
02299 #ifdef PAVGB
02300 if(count <= -8) {
02301 count += 7;
02302 __asm__ volatile(
02303 "pcmpeqw %%mm7, %%mm7 \n\t"
02304 "psrlw $8, %%mm7 \n\t"
02305 "1: \n\t"
02306 "movq -28(%1, %0, 4), %%mm0 \n\t"
02307 "movq -20(%1, %0, 4), %%mm1 \n\t"
02308 "movq -12(%1, %0, 4), %%mm2 \n\t"
02309 "movq -4(%1, %0, 4), %%mm3 \n\t"
02310 PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
02311 PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
02312 PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
02313 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
02314 "pand %%mm7, %%mm0 \n\t"
02315 "pand %%mm7, %%mm1 \n\t"
02316 "pand %%mm7, %%mm2 \n\t"
02317 "pand %%mm7, %%mm3 \n\t"
02318 "packuswb %%mm1, %%mm0 \n\t"
02319 "packuswb %%mm3, %%mm2 \n\t"
02320 "movq %%mm0, %%mm1 \n\t"
02321 "movq %%mm2, %%mm3 \n\t"
02322 "psrlw $8, %%mm0 \n\t"
02323 "psrlw $8, %%mm2 \n\t"
02324 "pand %%mm7, %%mm1 \n\t"
02325 "pand %%mm7, %%mm3 \n\t"
02326 "packuswb %%mm2, %%mm0 \n\t"
02327 "packuswb %%mm3, %%mm1 \n\t"
02328 MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
02329 MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
02330 "add $8, %0 \n\t"
02331 " js 1b \n\t"
02332 : "+r"(count)
02333 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
02334 );
02335 count -= 7;
02336 }
02337 #endif
02338 while(count<0) {
02339 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
02340 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
02341 count++;
02342 }
02343 }
02344
02345 #if !COMPILE_TEMPLATE_AMD3DNOW
02346 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02347 {
02348 dst0+= count;
02349 dst1+= count;
02350 src += 4*count;
02351 count= - count;
02352 if(count <= -8) {
02353 count += 7;
02354 __asm__ volatile(
02355 "pcmpeqw %%mm7, %%mm7 \n\t"
02356 "psrlw $8, %%mm7 \n\t"
02357 "1: \n\t"
02358 "movq -28(%1, %0, 4), %%mm0 \n\t"
02359 "movq -20(%1, %0, 4), %%mm1 \n\t"
02360 "movq -12(%1, %0, 4), %%mm2 \n\t"
02361 "movq -4(%1, %0, 4), %%mm3 \n\t"
02362 "psrlw $8, %%mm0 \n\t"
02363 "psrlw $8, %%mm1 \n\t"
02364 "psrlw $8, %%mm2 \n\t"
02365 "psrlw $8, %%mm3 \n\t"
02366 "packuswb %%mm1, %%mm0 \n\t"
02367 "packuswb %%mm3, %%mm2 \n\t"
02368 "movq %%mm0, %%mm1 \n\t"
02369 "movq %%mm2, %%mm3 \n\t"
02370 "psrlw $8, %%mm0 \n\t"
02371 "psrlw $8, %%mm2 \n\t"
02372 "pand %%mm7, %%mm1 \n\t"
02373 "pand %%mm7, %%mm3 \n\t"
02374 "packuswb %%mm2, %%mm0 \n\t"
02375 "packuswb %%mm3, %%mm1 \n\t"
02376 MOVNTQ" %%mm0,- 7(%3, %0) \n\t"
02377 MOVNTQ" %%mm1,- 7(%2, %0) \n\t"
02378 "add $8, %0 \n\t"
02379 " js 1b \n\t"
02380 : "+r"(count)
02381 : "r"(src), "r"(dst0), "r"(dst1)
02382 );
02383 count -= 7;
02384 }
02385 src++;
02386 while(count<0) {
02387 dst0[count]= src[4*count+0];
02388 dst1[count]= src[4*count+2];
02389 count++;
02390 }
02391 }
02392 #endif
02393
02394 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02395 {
02396 dst0 += count;
02397 dst1 += count;
02398 src0 += 4*count;
02399 src1 += 4*count;
02400 count= - count;
02401 #ifdef PAVGB
02402 if(count <= -8) {
02403 count += 7;
02404 __asm__ volatile(
02405 "pcmpeqw %%mm7, %%mm7 \n\t"
02406 "psrlw $8, %%mm7 \n\t"
02407 "1: \n\t"
02408 "movq -28(%1, %0, 4), %%mm0 \n\t"
02409 "movq -20(%1, %0, 4), %%mm1 \n\t"
02410 "movq -12(%1, %0, 4), %%mm2 \n\t"
02411 "movq -4(%1, %0, 4), %%mm3 \n\t"
02412 PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
02413 PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
02414 PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
02415 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
02416 "psrlw $8, %%mm0 \n\t"
02417 "psrlw $8, %%mm1 \n\t"
02418 "psrlw $8, %%mm2 \n\t"
02419 "psrlw $8, %%mm3 \n\t"
02420 "packuswb %%mm1, %%mm0 \n\t"
02421 "packuswb %%mm3, %%mm2 \n\t"
02422 "movq %%mm0, %%mm1 \n\t"
02423 "movq %%mm2, %%mm3 \n\t"
02424 "psrlw $8, %%mm0 \n\t"
02425 "psrlw $8, %%mm2 \n\t"
02426 "pand %%mm7, %%mm1 \n\t"
02427 "pand %%mm7, %%mm3 \n\t"
02428 "packuswb %%mm2, %%mm0 \n\t"
02429 "packuswb %%mm3, %%mm1 \n\t"
02430 MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
02431 MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
02432 "add $8, %0 \n\t"
02433 " js 1b \n\t"
02434 : "+r"(count)
02435 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
02436 );
02437 count -= 7;
02438 }
02439 #endif
02440 src0++;
02441 src1++;
02442 while(count<0) {
02443 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
02444 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
02445 count++;
02446 }
02447 }
02448
02449 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02450 int width, int height,
02451 int lumStride, int chromStride, int srcStride)
02452 {
02453 int y;
02454 const int chromWidth= -((-width)>>1);
02455
02456 for (y=0; y<height; y++) {
02457 RENAME(extract_even)(src, ydst, width);
02458 if(y&1) {
02459 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
02460 udst+= chromStride;
02461 vdst+= chromStride;
02462 }
02463
02464 src += srcStride;
02465 ydst+= lumStride;
02466 }
02467 __asm__(
02468 EMMS" \n\t"
02469 SFENCE" \n\t"
02470 ::: "memory"
02471 );
02472 }
02473
02474 #if !COMPILE_TEMPLATE_AMD3DNOW
02475 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02476 int width, int height,
02477 int lumStride, int chromStride, int srcStride)
02478 {
02479 int y;
02480 const int chromWidth= -((-width)>>1);
02481
02482 for (y=0; y<height; y++) {
02483 RENAME(extract_even)(src, ydst, width);
02484 RENAME(extract_odd2)(src, udst, vdst, chromWidth);
02485
02486 src += srcStride;
02487 ydst+= lumStride;
02488 udst+= chromStride;
02489 vdst+= chromStride;
02490 }
02491 __asm__(
02492 EMMS" \n\t"
02493 SFENCE" \n\t"
02494 ::: "memory"
02495 );
02496 }
02497 #endif
02498
02499 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02500 int width, int height,
02501 int lumStride, int chromStride, int srcStride)
02502 {
02503 int y;
02504 const int chromWidth= -((-width)>>1);
02505
02506 for (y=0; y<height; y++) {
02507 RENAME(extract_even)(src+1, ydst, width);
02508 if(y&1) {
02509 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
02510 udst+= chromStride;
02511 vdst+= chromStride;
02512 }
02513
02514 src += srcStride;
02515 ydst+= lumStride;
02516 }
02517 __asm__(
02518 EMMS" \n\t"
02519 SFENCE" \n\t"
02520 ::: "memory"
02521 );
02522 }
02523
02524 #if !COMPILE_TEMPLATE_AMD3DNOW
02525 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02526 int width, int height,
02527 int lumStride, int chromStride, int srcStride)
02528 {
02529 int y;
02530 const int chromWidth= -((-width)>>1);
02531
02532 for (y=0; y<height; y++) {
02533 RENAME(extract_even)(src+1, ydst, width);
02534 RENAME(extract_even2)(src, udst, vdst, chromWidth);
02535
02536 src += srcStride;
02537 ydst+= lumStride;
02538 udst+= chromStride;
02539 vdst+= chromStride;
02540 }
02541 __asm__(
02542 EMMS" \n\t"
02543 SFENCE" \n\t"
02544 ::: "memory"
02545 );
02546 }
02547 #endif
02548 #endif
02549
02550 static inline void RENAME(rgb2rgb_init)(void)
02551 {
02552 #if !COMPILE_TEMPLATE_SSE2
02553 #if !COMPILE_TEMPLATE_AMD3DNOW
02554 rgb15to16 = RENAME(rgb15to16);
02555 rgb15tobgr24 = RENAME(rgb15tobgr24);
02556 rgb15to32 = RENAME(rgb15to32);
02557 rgb16tobgr24 = RENAME(rgb16tobgr24);
02558 rgb16to32 = RENAME(rgb16to32);
02559 rgb16to15 = RENAME(rgb16to15);
02560 rgb24tobgr16 = RENAME(rgb24tobgr16);
02561 rgb24tobgr15 = RENAME(rgb24tobgr15);
02562 rgb24tobgr32 = RENAME(rgb24tobgr32);
02563 rgb32to16 = RENAME(rgb32to16);
02564 rgb32to15 = RENAME(rgb32to15);
02565 rgb32tobgr24 = RENAME(rgb32tobgr24);
02566 rgb24to15 = RENAME(rgb24to15);
02567 rgb24to16 = RENAME(rgb24to16);
02568 rgb24tobgr24 = RENAME(rgb24tobgr24);
02569 shuffle_bytes_2103 = RENAME(shuffle_bytes_2103);
02570 rgb32tobgr16 = RENAME(rgb32tobgr16);
02571 rgb32tobgr15 = RENAME(rgb32tobgr15);
02572 yv12toyuy2 = RENAME(yv12toyuy2);
02573 yv12touyvy = RENAME(yv12touyvy);
02574 yuv422ptoyuy2 = RENAME(yuv422ptoyuy2);
02575 yuv422ptouyvy = RENAME(yuv422ptouyvy);
02576 yuy2toyv12 = RENAME(yuy2toyv12);
02577 vu9_to_vu12 = RENAME(vu9_to_vu12);
02578 yvu9_to_yuy2 = RENAME(yvu9_to_yuy2);
02579 uyvytoyuv422 = RENAME(uyvytoyuv422);
02580 yuyvtoyuv422 = RENAME(yuyvtoyuv422);
02581 #endif
02582
02583 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
02584 planar2x = RENAME(planar2x);
02585 #endif
02586 rgb24toyv12 = RENAME(rgb24toyv12);
02587
02588 yuyvtoyuv420 = RENAME(yuyvtoyuv420);
02589 uyvytoyuv420 = RENAME(uyvytoyuv420);
02590 #endif
02591
02592 #if !COMPILE_TEMPLATE_AMD3DNOW
02593 interleaveBytes = RENAME(interleaveBytes);
02594 #endif
02595 }