00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavutil/cpu.h"
00024 #include "libavutil/mem.h"
00025 #include "libavutil/x86/asm.h"
00026 #include "libavcodec/vp8dsp.h"
00027
00028 #if HAVE_YASM
00029
00030
00031
00032
00033 extern void ff_put_vp8_epel4_h4_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00034 uint8_t *src, ptrdiff_t srcstride,
00035 int height, int mx, int my);
00036 extern void ff_put_vp8_epel4_h6_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00037 uint8_t *src, ptrdiff_t srcstride,
00038 int height, int mx, int my);
00039 extern void ff_put_vp8_epel4_v4_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00040 uint8_t *src, ptrdiff_t srcstride,
00041 int height, int mx, int my);
00042 extern void ff_put_vp8_epel4_v6_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00043 uint8_t *src, ptrdiff_t srcstride,
00044 int height, int mx, int my);
00045
00046 extern void ff_put_vp8_epel8_h4_sse2 (uint8_t *dst, ptrdiff_t dststride,
00047 uint8_t *src, ptrdiff_t srcstride,
00048 int height, int mx, int my);
00049 extern void ff_put_vp8_epel8_h6_sse2 (uint8_t *dst, ptrdiff_t dststride,
00050 uint8_t *src, ptrdiff_t srcstride,
00051 int height, int mx, int my);
00052 extern void ff_put_vp8_epel8_v4_sse2 (uint8_t *dst, ptrdiff_t dststride,
00053 uint8_t *src, ptrdiff_t srcstride,
00054 int height, int mx, int my);
00055 extern void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, ptrdiff_t dststride,
00056 uint8_t *src, ptrdiff_t srcstride,
00057 int height, int mx, int my);
00058
00059 extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00060 uint8_t *src, ptrdiff_t srcstride,
00061 int height, int mx, int my);
00062 extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00063 uint8_t *src, ptrdiff_t srcstride,
00064 int height, int mx, int my);
00065 extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00066 uint8_t *src, ptrdiff_t srcstride,
00067 int height, int mx, int my);
00068 extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00069 uint8_t *src, ptrdiff_t srcstride,
00070 int height, int mx, int my);
00071 extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00072 uint8_t *src, ptrdiff_t srcstride,
00073 int height, int mx, int my);
00074 extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00075 uint8_t *src, ptrdiff_t srcstride,
00076 int height, int mx, int my);
00077 extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00078 uint8_t *src, ptrdiff_t srcstride,
00079 int height, int mx, int my);
00080 extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00081 uint8_t *src, ptrdiff_t srcstride,
00082 int height, int mx, int my);
00083
00084 extern void ff_put_vp8_bilinear4_h_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00085 uint8_t *src, ptrdiff_t srcstride,
00086 int height, int mx, int my);
00087 extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, ptrdiff_t dststride,
00088 uint8_t *src, ptrdiff_t srcstride,
00089 int height, int mx, int my);
00090 extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00091 uint8_t *src, ptrdiff_t srcstride,
00092 int height, int mx, int my);
00093 extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00094 uint8_t *src, ptrdiff_t srcstride,
00095 int height, int mx, int my);
00096
00097 extern void ff_put_vp8_bilinear4_v_mmx2 (uint8_t *dst, ptrdiff_t dststride,
00098 uint8_t *src, ptrdiff_t srcstride,
00099 int height, int mx, int my);
00100 extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, ptrdiff_t dststride,
00101 uint8_t *src, ptrdiff_t srcstride,
00102 int height, int mx, int my);
00103 extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00104 uint8_t *src, ptrdiff_t srcstride,
00105 int height, int mx, int my);
00106 extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
00107 uint8_t *src, ptrdiff_t srcstride,
00108 int height, int mx, int my);
00109
00110
00111 extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride,
00112 uint8_t *src, ptrdiff_t srcstride,
00113 int height, int mx, int my);
00114 extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, ptrdiff_t dststride,
00115 uint8_t *src, ptrdiff_t srcstride,
00116 int height, int mx, int my);
00117 extern void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride,
00118 uint8_t *src, ptrdiff_t srcstride,
00119 int height, int mx, int my);
00120
00121 #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
00122 static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
00123 uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
00124 ptrdiff_t srcstride, int height, int mx, int my) \
00125 { \
00126 ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00127 dst, dststride, src, srcstride, height, mx, my); \
00128 ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00129 dst + 8, dststride, src + 8, srcstride, height, mx, my); \
00130 }
00131 #define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
00132 static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00133 uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
00134 ptrdiff_t srcstride, int height, int mx, int my) \
00135 { \
00136 ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
00137 dst, dststride, src, srcstride, height, mx, my); \
00138 ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
00139 dst + 4, dststride, src + 4, srcstride, height, mx, my); \
00140 }
00141
00142 #if ARCH_X86_32
00143 TAP_W8 (mmx2, epel, h4)
00144 TAP_W8 (mmx2, epel, h6)
00145 TAP_W16(mmx2, epel, h6)
00146 TAP_W8 (mmx2, epel, v4)
00147 TAP_W8 (mmx2, epel, v6)
00148 TAP_W16(mmx2, epel, v6)
00149 TAP_W8 (mmx2, bilinear, h)
00150 TAP_W16(mmx2, bilinear, h)
00151 TAP_W8 (mmx2, bilinear, v)
00152 TAP_W16(mmx2, bilinear, v)
00153 #endif
00154
00155 TAP_W16(sse2, epel, h6)
00156 TAP_W16(sse2, epel, v6)
00157 TAP_W16(sse2, bilinear, h)
00158 TAP_W16(sse2, bilinear, v)
00159
00160 TAP_W16(ssse3, epel, h6)
00161 TAP_W16(ssse3, epel, v6)
00162 TAP_W16(ssse3, bilinear, h)
00163 TAP_W16(ssse3, bilinear, v)
00164
00165 #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
00166 static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
00167 uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
00168 ptrdiff_t srcstride, int height, int mx, int my) \
00169 { \
00170 DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \
00171 uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
00172 src -= srcstride * (TAPNUMY / 2 - 1); \
00173 ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
00174 tmp, SIZE, src, srcstride, height + TAPNUMY - 1, mx, my); \
00175 ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
00176 dst, dststride, tmpptr, SIZE, height, mx, my); \
00177 }
00178
00179 #if ARCH_X86_32
00180 #define HVTAPMMX(x, y) \
00181 HVTAP(mmx2, 8, x, y, 4, 8) \
00182 HVTAP(mmx2, 8, x, y, 8, 16)
00183
00184 HVTAP(mmx2, 8, 6, 6, 16, 16)
00185 #else
00186 #define HVTAPMMX(x, y) \
00187 HVTAP(mmx2, 8, x, y, 4, 8)
00188 #endif
00189
00190 HVTAPMMX(4, 4)
00191 HVTAPMMX(4, 6)
00192 HVTAPMMX(6, 4)
00193 HVTAPMMX(6, 6)
00194
00195 #define HVTAPSSE2(x, y, w) \
00196 HVTAP(sse2, 16, x, y, w, 16) \
00197 HVTAP(ssse3, 16, x, y, w, 16)
00198
00199 HVTAPSSE2(4, 4, 8)
00200 HVTAPSSE2(4, 6, 8)
00201 HVTAPSSE2(6, 4, 8)
00202 HVTAPSSE2(6, 6, 8)
00203 HVTAPSSE2(6, 6, 16)
00204
00205 HVTAP(ssse3, 16, 4, 4, 4, 8)
00206 HVTAP(ssse3, 16, 4, 6, 4, 8)
00207 HVTAP(ssse3, 16, 6, 4, 4, 8)
00208 HVTAP(ssse3, 16, 6, 6, 4, 8)
00209
00210 #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
00211 static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
00212 uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
00213 ptrdiff_t srcstride, int height, int mx, int my) \
00214 { \
00215 DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \
00216 ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
00217 tmp, SIZE, src, srcstride, height + 1, mx, my); \
00218 ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
00219 dst, dststride, tmp, SIZE, height, mx, my); \
00220 }
00221
00222 HVBILIN(mmx2, 8, 4, 8)
00223 #if ARCH_X86_32
00224 HVBILIN(mmx2, 8, 8, 16)
00225 HVBILIN(mmx2, 8, 16, 16)
00226 #endif
00227 HVBILIN(sse2, 8, 8, 16)
00228 HVBILIN(sse2, 8, 16, 16)
00229 HVBILIN(ssse3, 8, 4, 8)
00230 HVBILIN(ssse3, 8, 8, 16)
00231 HVBILIN(ssse3, 8, 16, 16)
00232
00233 extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16],
00234 ptrdiff_t stride);
00235 extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16],
00236 ptrdiff_t stride);
00237 extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16],
00238 ptrdiff_t stride);
00239 extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16],
00240 ptrdiff_t stride);
00241 extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16],
00242 ptrdiff_t stride);
00243 extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
00244 extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
00245 extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16],
00246 ptrdiff_t stride);
00247 extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16],
00248 ptrdiff_t stride);
00249
00250 #define DECLARE_LOOP_FILTER(NAME)\
00251 extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, \
00252 ptrdiff_t stride, \
00253 int flim);\
00254 extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, \
00255 ptrdiff_t stride, \
00256 int flim);\
00257 extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, \
00258 ptrdiff_t stride,\
00259 int e, int i, int hvt);\
00260 extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, \
00261 ptrdiff_t stride,\
00262 int e, int i, int hvt);\
00263 extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \
00264 uint8_t *dstV,\
00265 ptrdiff_t s, \
00266 int e, int i, int hvt);\
00267 extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \
00268 uint8_t *dstV,\
00269 ptrdiff_t s, \
00270 int e, int i, int hvt);\
00271 extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \
00272 ptrdiff_t stride,\
00273 int e, int i, int hvt);\
00274 extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \
00275 ptrdiff_t stride,\
00276 int e, int i, int hvt);\
00277 extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \
00278 uint8_t *dstV,\
00279 ptrdiff_t s, \
00280 int e, int i, int hvt);\
00281 extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \
00282 uint8_t *dstV,\
00283 ptrdiff_t s, \
00284 int e, int i, int hvt);
00285
00286 DECLARE_LOOP_FILTER(mmx)
00287 DECLARE_LOOP_FILTER(mmx2)
00288 DECLARE_LOOP_FILTER(sse2)
00289 DECLARE_LOOP_FILTER(ssse3)
00290 DECLARE_LOOP_FILTER(sse4)
00291
00292 #endif
00293
00294 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
00295 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
00296 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
00297 c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
00298
00299 #define VP8_MC_FUNC(IDX, SIZE, OPT) \
00300 c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
00301 c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
00302 c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
00303 c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
00304 c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
00305 VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
00306
00307 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
00308 c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
00309 c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
00310 c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
00311 c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00312 c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00313 c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
00314 c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00315 c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
00316
00317
00318 av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
00319 {
00320 #if HAVE_YASM
00321 int mm_flags = av_get_cpu_flags();
00322
00323 if (mm_flags & AV_CPU_FLAG_MMX) {
00324 c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
00325 c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
00326 #if ARCH_X86_32
00327 c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx;
00328 c->vp8_idct_add = ff_vp8_idct_add_mmx;
00329 c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx;
00330 c->put_vp8_epel_pixels_tab[0][0][0] =
00331 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
00332 #endif
00333 c->put_vp8_epel_pixels_tab[1][0][0] =
00334 c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
00335
00336 #if ARCH_X86_32
00337 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
00338 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
00339
00340 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
00341 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
00342 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
00343 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
00344
00345 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx;
00346 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx;
00347 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx;
00348 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx;
00349 #endif
00350 }
00351
00352
00353
00354 if (mm_flags & AV_CPU_FLAG_MMXEXT) {
00355 VP8_MC_FUNC(2, 4, mmx2);
00356 VP8_BILINEAR_MC_FUNC(2, 4, mmx2);
00357 #if ARCH_X86_32
00358 VP8_LUMA_MC_FUNC(0, 16, mmx2);
00359 VP8_MC_FUNC(1, 8, mmx2);
00360 VP8_BILINEAR_MC_FUNC(0, 16, mmx2);
00361 VP8_BILINEAR_MC_FUNC(1, 8, mmx2);
00362
00363 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx2;
00364 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx2;
00365
00366 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx2;
00367 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx2;
00368 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx2;
00369 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx2;
00370
00371 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx2;
00372 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx2;
00373 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx2;
00374 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx2;
00375 #endif
00376 }
00377
00378 if (mm_flags & AV_CPU_FLAG_SSE) {
00379 c->vp8_idct_add = ff_vp8_idct_add_sse;
00380 c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
00381 c->put_vp8_epel_pixels_tab[0][0][0] =
00382 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
00383 }
00384
00385 if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
00386 VP8_LUMA_MC_FUNC(0, 16, sse2);
00387 VP8_MC_FUNC(1, 8, sse2);
00388 VP8_BILINEAR_MC_FUNC(0, 16, sse2);
00389 VP8_BILINEAR_MC_FUNC(1, 8, sse2);
00390
00391 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
00392
00393 #if ARCH_X86_64 || HAVE_ALIGNED_STACK
00394 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
00395 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
00396
00397 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2;
00398 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
00399 #endif
00400 }
00401
00402 if (mm_flags & AV_CPU_FLAG_SSE2) {
00403 c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
00404
00405 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
00406
00407 #if ARCH_X86_64 || HAVE_ALIGNED_STACK
00408 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
00409 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
00410
00411 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2;
00412 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
00413 #endif
00414 }
00415
00416 if (mm_flags & AV_CPU_FLAG_SSSE3) {
00417 VP8_LUMA_MC_FUNC(0, 16, ssse3);
00418 VP8_MC_FUNC(1, 8, ssse3);
00419 VP8_MC_FUNC(2, 4, ssse3);
00420 VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
00421 VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
00422 VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
00423
00424 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
00425 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
00426
00427 #if ARCH_X86_64 || HAVE_ALIGNED_STACK
00428 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
00429 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
00430 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
00431 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
00432
00433 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3;
00434 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3;
00435 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
00436 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
00437 #endif
00438 }
00439
00440 if (mm_flags & AV_CPU_FLAG_SSE4) {
00441 c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
00442
00443 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
00444 #if ARCH_X86_64 || HAVE_ALIGNED_STACK
00445 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4;
00446 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4;
00447 #endif
00448 }
00449 #endif
00450 }