00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "libavutil/common.h"
00026 #include "libavutil/cpu.h"
00027 #include "libavutil/x86_cpu.h"
00028 #include "libavcodec/dsputil.h"
00029 #include "libavcodec/cavsdsp.h"
00030 #include "dsputil_mmx.h"
00031
00032
00033
00034
00035
00036
00037
00038 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
00039 {
00040 __asm__ volatile(
00041 "movq 112(%0), %%mm4 \n\t"
00042 "movq 16(%0), %%mm5 \n\t"
00043 "movq 80(%0), %%mm2 \n\t"
00044 "movq 48(%0), %%mm7 \n\t"
00045 "movq %%mm4, %%mm0 \n\t"
00046 "movq %%mm5, %%mm3 \n\t"
00047 "movq %%mm2, %%mm6 \n\t"
00048 "movq %%mm7, %%mm1 \n\t"
00049
00050 "paddw %%mm4, %%mm4 \n\t"
00051 "paddw %%mm3, %%mm3 \n\t"
00052 "paddw %%mm6, %%mm6 \n\t"
00053 "paddw %%mm1, %%mm1 \n\t"
00054 "paddw %%mm4, %%mm0 \n\t"
00055 "paddw %%mm3, %%mm5 \n\t"
00056 "paddw %%mm6, %%mm2 \n\t"
00057 "paddw %%mm1, %%mm7 \n\t"
00058 "psubw %%mm4, %%mm5 \n\t"
00059 "paddw %%mm6, %%mm7 \n\t"
00060 "psubw %%mm2, %%mm1 \n\t"
00061 "paddw %%mm0, %%mm3 \n\t"
00062
00063 "movq %%mm5, %%mm4 \n\t"
00064 "movq %%mm7, %%mm6 \n\t"
00065 "movq %%mm3, %%mm0 \n\t"
00066 "movq %%mm1, %%mm2 \n\t"
00067 SUMSUB_BA( %%mm7, %%mm5 )
00068 "paddw %%mm3, %%mm7 \n\t"
00069 "paddw %%mm1, %%mm5 \n\t"
00070 "paddw %%mm7, %%mm7 \n\t"
00071 "paddw %%mm5, %%mm5 \n\t"
00072 "paddw %%mm6, %%mm7 \n\t"
00073 "paddw %%mm4, %%mm5 \n\t"
00074
00075 SUMSUB_BA( %%mm1, %%mm3 )
00076 "psubw %%mm1, %%mm4 \n\t"
00077 "movq %%mm4, %%mm1 \n\t"
00078 "psubw %%mm6, %%mm3 \n\t"
00079 "paddw %%mm1, %%mm1 \n\t"
00080 "paddw %%mm3, %%mm3 \n\t"
00081 "psubw %%mm2, %%mm1 \n\t"
00082 "paddw %%mm0, %%mm3 \n\t"
00083
00084 "movq 32(%0), %%mm2 \n\t"
00085 "movq 96(%0), %%mm6 \n\t"
00086 "movq %%mm2, %%mm4 \n\t"
00087 "movq %%mm6, %%mm0 \n\t"
00088 "psllw $2, %%mm4 \n\t"
00089 "psllw $2, %%mm6 \n\t"
00090 "paddw %%mm4, %%mm2 \n\t"
00091 "paddw %%mm6, %%mm0 \n\t"
00092 "paddw %%mm2, %%mm2 \n\t"
00093 "paddw %%mm0, %%mm0 \n\t"
00094 "psubw %%mm0, %%mm4 \n\t"
00095 "paddw %%mm2, %%mm6 \n\t"
00096
00097 "movq (%0), %%mm2 \n\t"
00098 "movq 64(%0), %%mm0 \n\t"
00099 SUMSUB_BA( %%mm0, %%mm2 )
00100 "psllw $3, %%mm0 \n\t"
00101 "psllw $3, %%mm2 \n\t"
00102 "paddw %1, %%mm0 \n\t"
00103 "paddw %1, %%mm2 \n\t"
00104
00105 SUMSUB_BA( %%mm6, %%mm0 )
00106 SUMSUB_BA( %%mm4, %%mm2 )
00107 SUMSUB_BA( %%mm7, %%mm6 )
00108 SUMSUB_BA( %%mm5, %%mm4 )
00109 SUMSUB_BA( %%mm3, %%mm2 )
00110 SUMSUB_BA( %%mm1, %%mm0 )
00111 :: "r"(block), "m"(bias)
00112 );
00113 }
00114
00115 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
00116 {
00117 int i;
00118 DECLARE_ALIGNED(8, int16_t, b2)[64];
00119
00120 for(i=0; i<2; i++){
00121 DECLARE_ALIGNED(8, uint64_t, tmp);
00122
00123 cavs_idct8_1d(block+4*i, ff_pw_4.a);
00124
00125 __asm__ volatile(
00126 "psraw $3, %%mm7 \n\t"
00127 "psraw $3, %%mm6 \n\t"
00128 "psraw $3, %%mm5 \n\t"
00129 "psraw $3, %%mm4 \n\t"
00130 "psraw $3, %%mm3 \n\t"
00131 "psraw $3, %%mm2 \n\t"
00132 "psraw $3, %%mm1 \n\t"
00133 "psraw $3, %%mm0 \n\t"
00134 "movq %%mm7, %0 \n\t"
00135 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
00136 "movq %%mm0, 8(%1) \n\t"
00137 "movq %%mm6, 24(%1) \n\t"
00138 "movq %%mm7, 40(%1) \n\t"
00139 "movq %%mm4, 56(%1) \n\t"
00140 "movq %0, %%mm7 \n\t"
00141 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
00142 "movq %%mm7, (%1) \n\t"
00143 "movq %%mm1, 16(%1) \n\t"
00144 "movq %%mm0, 32(%1) \n\t"
00145 "movq %%mm3, 48(%1) \n\t"
00146 : "=m"(tmp)
00147 : "r"(b2+32*i)
00148 : "memory"
00149 );
00150 }
00151
00152 for(i=0; i<2; i++){
00153 cavs_idct8_1d(b2+4*i, ff_pw_64.a);
00154
00155 __asm__ volatile(
00156 "psraw $7, %%mm7 \n\t"
00157 "psraw $7, %%mm6 \n\t"
00158 "psraw $7, %%mm5 \n\t"
00159 "psraw $7, %%mm4 \n\t"
00160 "psraw $7, %%mm3 \n\t"
00161 "psraw $7, %%mm2 \n\t"
00162 "psraw $7, %%mm1 \n\t"
00163 "psraw $7, %%mm0 \n\t"
00164 "movq %%mm7, (%0) \n\t"
00165 "movq %%mm5, 16(%0) \n\t"
00166 "movq %%mm3, 32(%0) \n\t"
00167 "movq %%mm1, 48(%0) \n\t"
00168 "movq %%mm0, 64(%0) \n\t"
00169 "movq %%mm2, 80(%0) \n\t"
00170 "movq %%mm4, 96(%0) \n\t"
00171 "movq %%mm6, 112(%0) \n\t"
00172 :: "r"(b2+4*i)
00173 : "memory"
00174 );
00175 }
00176
00177 ff_add_pixels_clamped_mmx(b2, dst, stride);
00178 }
00179
00180
00181
00182
00183
00184
00185
00186
00187 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,MUL2) \
00188 "movd (%0), "#F" \n\t"\
00189 "movq "#C", %%mm6 \n\t"\
00190 "pmullw %5, %%mm6 \n\t"\
00191 "movq "#D", %%mm7 \n\t"\
00192 "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
00193 "psllw $3, "#E" \n\t"\
00194 "psubw "#E", %%mm6 \n\t"\
00195 "psraw $3, "#E" \n\t"\
00196 "paddw %%mm7, %%mm6 \n\t"\
00197 "paddw "#E", %%mm6 \n\t"\
00198 "paddw "#B", "#B" \n\t"\
00199 "pxor %%mm7, %%mm7 \n\t"\
00200 "add %2, %0 \n\t"\
00201 "punpcklbw %%mm7, "#F" \n\t"\
00202 "psubw "#B", %%mm6 \n\t"\
00203 "psraw $1, "#B" \n\t"\
00204 "psubw "#A", %%mm6 \n\t"\
00205 "paddw %4, %%mm6 \n\t"\
00206 "psraw $7, %%mm6 \n\t"\
00207 "packuswb %%mm6, %%mm6 \n\t"\
00208 OP(%%mm6, (%1), A, d) \
00209 "add %3, %1 \n\t"
00210
00211
00212 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,MUL2) \
00213 "movd (%0), "#F" \n\t"\
00214 "movq "#C", %%mm6 \n\t"\
00215 "paddw "#D", %%mm6 \n\t"\
00216 "pmullw %5, %%mm6 \n\t"\
00217 "add %2, %0 \n\t"\
00218 "punpcklbw %%mm7, "#F" \n\t"\
00219 "psubw "#B", %%mm6 \n\t"\
00220 "psubw "#E", %%mm6 \n\t"\
00221 "paddw %4, %%mm6 \n\t"\
00222 "psraw $3, %%mm6 \n\t"\
00223 "packuswb %%mm6, %%mm6 \n\t"\
00224 OP(%%mm6, (%1), A, d) \
00225 "add %3, %1 \n\t"
00226
00227
00228 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,MUL2) \
00229 "movd (%0), "#F" \n\t"\
00230 "movq "#C", %%mm6 \n\t"\
00231 "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
00232 "movq "#D", %%mm7 \n\t"\
00233 "pmullw %5, %%mm7 \n\t"\
00234 "psllw $3, "#B" \n\t"\
00235 "psubw "#B", %%mm6 \n\t"\
00236 "psraw $3, "#B" \n\t"\
00237 "paddw %%mm7, %%mm6 \n\t"\
00238 "paddw "#B", %%mm6 \n\t"\
00239 "paddw "#E", "#E" \n\t"\
00240 "pxor %%mm7, %%mm7 \n\t"\
00241 "add %2, %0 \n\t"\
00242 "punpcklbw %%mm7, "#F" \n\t"\
00243 "psubw "#E", %%mm6 \n\t"\
00244 "psraw $1, "#E" \n\t"\
00245 "psubw "#F", %%mm6 \n\t"\
00246 "paddw %4, %%mm6 \n\t"\
00247 "psraw $7, %%mm6 \n\t"\
00248 "packuswb %%mm6, %%mm6 \n\t"\
00249 OP(%%mm6, (%1), A, d) \
00250 "add %3, %1 \n\t"
00251
00252
00253 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
00254 int w= 2;\
00255 src -= 2*srcStride;\
00256 \
00257 while(w--){\
00258 __asm__ volatile(\
00259 "pxor %%mm7, %%mm7 \n\t"\
00260 "movd (%0), %%mm0 \n\t"\
00261 "add %2, %0 \n\t"\
00262 "movd (%0), %%mm1 \n\t"\
00263 "add %2, %0 \n\t"\
00264 "movd (%0), %%mm2 \n\t"\
00265 "add %2, %0 \n\t"\
00266 "movd (%0), %%mm3 \n\t"\
00267 "add %2, %0 \n\t"\
00268 "movd (%0), %%mm4 \n\t"\
00269 "add %2, %0 \n\t"\
00270 "punpcklbw %%mm7, %%mm0 \n\t"\
00271 "punpcklbw %%mm7, %%mm1 \n\t"\
00272 "punpcklbw %%mm7, %%mm2 \n\t"\
00273 "punpcklbw %%mm7, %%mm3 \n\t"\
00274 "punpcklbw %%mm7, %%mm4 \n\t"\
00275 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\
00276 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\
00277 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\
00278 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\
00279 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\
00280 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\
00281 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\
00282 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\
00283 \
00284 : "+a"(src), "+c"(dst)\
00285 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
00286 : "memory"\
00287 );\
00288 if(h==16){\
00289 __asm__ volatile(\
00290 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\
00291 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\
00292 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\
00293 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\
00294 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\
00295 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\
00296 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\
00297 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\
00298 \
00299 : "+a"(src), "+c"(dst)\
00300 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
00301 : "memory"\
00302 );\
00303 }\
00304 src += 4-(h+5)*srcStride;\
00305 dst += 4-h*dstStride;\
00306 }
00307
00308 #define QPEL_CAVS(OPNAME, OP, MMX)\
00309 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00310 int h=8;\
00311 __asm__ volatile(\
00312 "pxor %%mm7, %%mm7 \n\t"\
00313 "movq %5, %%mm6 \n\t"\
00314 "1: \n\t"\
00315 "movq (%0), %%mm0 \n\t"\
00316 "movq 1(%0), %%mm2 \n\t"\
00317 "movq %%mm0, %%mm1 \n\t"\
00318 "movq %%mm2, %%mm3 \n\t"\
00319 "punpcklbw %%mm7, %%mm0 \n\t"\
00320 "punpckhbw %%mm7, %%mm1 \n\t"\
00321 "punpcklbw %%mm7, %%mm2 \n\t"\
00322 "punpckhbw %%mm7, %%mm3 \n\t"\
00323 "paddw %%mm2, %%mm0 \n\t"\
00324 "paddw %%mm3, %%mm1 \n\t"\
00325 "pmullw %%mm6, %%mm0 \n\t"\
00326 "pmullw %%mm6, %%mm1 \n\t"\
00327 "movq -1(%0), %%mm2 \n\t"\
00328 "movq 2(%0), %%mm4 \n\t"\
00329 "movq %%mm2, %%mm3 \n\t"\
00330 "movq %%mm4, %%mm5 \n\t"\
00331 "punpcklbw %%mm7, %%mm2 \n\t"\
00332 "punpckhbw %%mm7, %%mm3 \n\t"\
00333 "punpcklbw %%mm7, %%mm4 \n\t"\
00334 "punpckhbw %%mm7, %%mm5 \n\t"\
00335 "paddw %%mm4, %%mm2 \n\t"\
00336 "paddw %%mm3, %%mm5 \n\t"\
00337 "psubw %%mm2, %%mm0 \n\t"\
00338 "psubw %%mm5, %%mm1 \n\t"\
00339 "movq %6, %%mm5 \n\t"\
00340 "paddw %%mm5, %%mm0 \n\t"\
00341 "paddw %%mm5, %%mm1 \n\t"\
00342 "psraw $3, %%mm0 \n\t"\
00343 "psraw $3, %%mm1 \n\t"\
00344 "packuswb %%mm1, %%mm0 \n\t"\
00345 OP(%%mm0, (%1),%%mm5, q) \
00346 "add %3, %0 \n\t"\
00347 "add %4, %1 \n\t"\
00348 "decl %2 \n\t"\
00349 " jnz 1b \n\t"\
00350 : "+a"(src), "+c"(dst), "+m"(h)\
00351 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
00352 : "memory"\
00353 );\
00354 }\
00355 \
00356 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00357 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
00358 }\
00359 \
00360 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00361 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
00362 }\
00363 \
00364 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00365 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
00366 }\
00367 \
00368 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00369 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
00370 }\
00371 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00372 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
00373 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00374 }\
00375 \
00376 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00377 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
00378 }\
00379 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00380 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
00381 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00382 }\
00383 \
00384 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00385 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
00386 }\
00387 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00388 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
00389 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
00390 }\
00391 \
00392 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00393 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
00394 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00395 src += 8*srcStride;\
00396 dst += 8*dstStride;\
00397 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
00398 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
00399 }\
00400
00401 #define CAVS_MC(OPNAME, SIZE, MMX) \
00402 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00403 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
00404 }\
00405 \
00406 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00407 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
00408 }\
00409 \
00410 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00411 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
00412 }\
00413 \
00414 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
00415 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
00416 }\
00417
00418 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
00419 #define AVG_3DNOW_OP(a,b,temp, size) \
00420 "mov" #size " " #b ", " #temp " \n\t"\
00421 "pavgusb " #temp ", " #a " \n\t"\
00422 "mov" #size " " #a ", " #b " \n\t"
00423 #define AVG_MMX2_OP(a,b,temp, size) \
00424 "mov" #size " " #b ", " #temp " \n\t"\
00425 "pavgb " #temp ", " #a " \n\t"\
00426 "mov" #size " " #a ", " #b " \n\t"
00427
00428 QPEL_CAVS(put_, PUT_OP, 3dnow)
00429 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
00430 QPEL_CAVS(put_, PUT_OP, mmx2)
00431 QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
00432
00433 CAVS_MC(put_, 8, 3dnow)
00434 CAVS_MC(put_, 16,3dnow)
00435 CAVS_MC(avg_, 8, 3dnow)
00436 CAVS_MC(avg_, 16,3dnow)
00437 CAVS_MC(put_, 8, mmx2)
00438 CAVS_MC(put_, 16,mmx2)
00439 CAVS_MC(avg_, 8, mmx2)
00440 CAVS_MC(avg_, 16,mmx2)
00441
00442 static void ff_cavsdsp_init_mmx2(CAVSDSPContext* c, AVCodecContext *avctx) {
00443 #define dspfunc(PFX, IDX, NUM) \
00444 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
00445 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
00446 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
00447 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
00448 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
00449
00450 dspfunc(put_cavs_qpel, 0, 16);
00451 dspfunc(put_cavs_qpel, 1, 8);
00452 dspfunc(avg_cavs_qpel, 0, 16);
00453 dspfunc(avg_cavs_qpel, 1, 8);
00454 #undef dspfunc
00455 c->cavs_idct8_add = cavs_idct8_add_mmx;
00456 }
00457
00458 static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) {
00459 #define dspfunc(PFX, IDX, NUM) \
00460 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
00461 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
00462 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
00463 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
00464 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
00465
00466 dspfunc(put_cavs_qpel, 0, 16);
00467 dspfunc(put_cavs_qpel, 1, 8);
00468 dspfunc(avg_cavs_qpel, 0, 16);
00469 dspfunc(avg_cavs_qpel, 1, 8);
00470 #undef dspfunc
00471 c->cavs_idct8_add = cavs_idct8_add_mmx;
00472 }
00473
00474 void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx)
00475 {
00476 int mm_flags = av_get_cpu_flags();
00477
00478 if (mm_flags & AV_CPU_FLAG_MMX2) ff_cavsdsp_init_mmx2 (c, avctx);
00479 if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx);
00480 }