00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavutil/cpu.h"
00024 #include "libavutil/x86_cpu.h"
00025 #include "libavcodec/vp8dsp.h"
00026
00027 #if HAVE_YASM
00028
00029
00030
00031
00032 extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, int dststride,
00033 uint8_t *src, int srcstride,
00034 int height, int mx, int my);
00035 extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, int dststride,
00036 uint8_t *src, int srcstride,
00037 int height, int mx, int my);
00038 extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, int dststride,
00039 uint8_t *src, int srcstride,
00040 int height, int mx, int my);
00041 extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, int dststride,
00042 uint8_t *src, int srcstride,
00043 int height, int mx, int my);
00044
00045 extern void ff_put_vp8_epel8_h4_sse2 (uint8_t *dst, int dststride,
00046 uint8_t *src, int srcstride,
00047 int height, int mx, int my);
00048 extern void ff_put_vp8_epel8_h6_sse2 (uint8_t *dst, int dststride,
00049 uint8_t *src, int srcstride,
00050 int height, int mx, int my);
00051 extern void ff_put_vp8_epel8_v4_sse2 (uint8_t *dst, int dststride,
00052 uint8_t *src, int srcstride,
00053 int height, int mx, int my);
00054 extern void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, int dststride,
00055 uint8_t *src, int srcstride,
00056 int height, int mx, int my);
00057
00058 extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride,
00059 uint8_t *src, int srcstride,
00060 int height, int mx, int my);
00061 extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride,
00062 uint8_t *src, int srcstride,
00063 int height, int mx, int my);
00064 extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride,
00065 uint8_t *src, int srcstride,
00066 int height, int mx, int my);
00067 extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride,
00068 uint8_t *src, int srcstride,
00069 int height, int mx, int my);
00070 extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride,
00071 uint8_t *src, int srcstride,
00072 int height, int mx, int my);
00073 extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride,
00074 uint8_t *src, int srcstride,
00075 int height, int mx, int my);
00076 extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, int dststride,
00077 uint8_t *src, int srcstride,
00078 int height, int mx, int my);
00079 extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, int dststride,
00080 uint8_t *src, int srcstride,
00081 int height, int mx, int my);
00082
00083 extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride,
00084 uint8_t *src, int srcstride,
00085 int height, int mx, int my);
00086 extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, int dststride,
00087 uint8_t *src, int srcstride,
00088 int height, int mx, int my);
00089 extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
00090 uint8_t *src, int srcstride,
00091 int height, int mx, int my);
00092 extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
00093 uint8_t *src, int srcstride,
00094 int height, int mx, int my);
00095
00096 extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
00097 uint8_t *src, int srcstride,
00098 int height, int mx, int my);
00099 extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, int dststride,
00100 uint8_t *src, int srcstride,
00101 int height, int mx, int my);
00102 extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
00103 uint8_t *src, int srcstride,
00104 int height, int mx, int my);
00105 extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
00106 uint8_t *src, int srcstride,
00107 int height, int mx, int my);
00108
00109
00110 extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
00111 uint8_t *src, int srcstride,
00112 int height, int mx, int my);
00113 extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride,
00114 uint8_t *src, int srcstride,
00115 int height, int mx, int my);
00116 extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride,
00117 uint8_t *src, int srcstride,
00118 int height, int mx, int my);
00119
00120 #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
00121 static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
00122 uint8_t *dst, int dststride, uint8_t *src, \
00123 int srcstride, int height, int mx, int my) \
00124 { \
00125 ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00126 dst, dststride, src, srcstride, height, mx, my); \
00127 ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00128 dst + 8, dststride, src + 8, srcstride, height, mx, my); \
00129 }
00130 #define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
00131 static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00132 uint8_t *dst, int dststride, uint8_t *src, \
00133 int srcstride, int height, int mx, int my) \
00134 { \
00135 ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
00136 dst, dststride, src, srcstride, height, mx, my); \
00137 ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
00138 dst + 4, dststride, src + 4, srcstride, height, mx, my); \
00139 }
00140
00141 TAP_W8 (mmxext, epel, h4)
00142 TAP_W8 (mmxext, epel, h6)
00143 TAP_W16(mmxext, epel, h6)
00144 TAP_W8 (mmxext, epel, v4)
00145 TAP_W8 (mmxext, epel, v6)
00146 TAP_W16(mmxext, epel, v6)
00147 TAP_W8 (mmxext, bilinear, h)
00148 TAP_W16(mmxext, bilinear, h)
00149 TAP_W8 (mmxext, bilinear, v)
00150 TAP_W16(mmxext, bilinear, v)
00151
00152 TAP_W16(sse2, epel, h6)
00153 TAP_W16(sse2, epel, v6)
00154 TAP_W16(sse2, bilinear, h)
00155 TAP_W16(sse2, bilinear, v)
00156
00157 TAP_W16(ssse3, epel, h6)
00158 TAP_W16(ssse3, epel, v6)
00159 TAP_W16(ssse3, bilinear, h)
00160 TAP_W16(ssse3, bilinear, v)
00161
00162 #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
00163 static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
00164 uint8_t *dst, int dststride, uint8_t *src, \
00165 int srcstride, int height, int mx, int my) \
00166 { \
00167 DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \
00168 uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
00169 src -= srcstride * (TAPNUMY / 2 - 1); \
00170 ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
00171 tmp, SIZE, src, srcstride, height + TAPNUMY - 1, mx, my); \
00172 ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
00173 dst, dststride, tmpptr, SIZE, height, mx, my); \
00174 }
00175
00176 #define HVTAPMMX(x, y) \
00177 HVTAP(mmxext, 8, x, y, 4, 8) \
00178 HVTAP(mmxext, 8, x, y, 8, 16)
00179
00180 HVTAPMMX(4, 4)
00181 HVTAPMMX(4, 6)
00182 HVTAPMMX(6, 4)
00183 HVTAPMMX(6, 6)
00184 HVTAP(mmxext, 8, 6, 6, 16, 16)
00185
00186 #define HVTAPSSE2(x, y, w) \
00187 HVTAP(sse2, 16, x, y, w, 16) \
00188 HVTAP(ssse3, 16, x, y, w, 16)
00189
00190 HVTAPSSE2(4, 4, 8)
00191 HVTAPSSE2(4, 6, 8)
00192 HVTAPSSE2(6, 4, 8)
00193 HVTAPSSE2(6, 6, 8)
00194 HVTAPSSE2(6, 6, 16)
00195
00196 HVTAP(ssse3, 16, 4, 4, 4, 8)
00197 HVTAP(ssse3, 16, 4, 6, 4, 8)
00198 HVTAP(ssse3, 16, 6, 4, 4, 8)
00199 HVTAP(ssse3, 16, 6, 6, 4, 8)
00200
00201 #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
00202 static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
00203 uint8_t *dst, int dststride, uint8_t *src, \
00204 int srcstride, int height, int mx, int my) \
00205 { \
00206 DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \
00207 ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
00208 tmp, SIZE, src, srcstride, height + 1, mx, my); \
00209 ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
00210 dst, dststride, tmp, SIZE, height, mx, my); \
00211 }
00212
00213 HVBILIN(mmxext, 8, 4, 8)
00214 HVBILIN(mmxext, 8, 8, 16)
00215 HVBILIN(mmxext, 8, 16, 16)
00216 HVBILIN(sse2, 8, 8, 16)
00217 HVBILIN(sse2, 8, 16, 16)
00218 HVBILIN(ssse3, 8, 4, 8)
00219 HVBILIN(ssse3, 8, 8, 16)
00220 HVBILIN(ssse3, 8, 16, 16)
00221
00222 extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
00223 extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride);
00224 extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16], int stride);
00225 extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride);
00226 extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride);
00227 extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
00228 extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
00229 extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
00230 extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride);
00231
00232 #define DECLARE_LOOP_FILTER(NAME)\
00233 extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
00234 extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
00235 extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
00236 int e, int i, int hvt);\
00237 extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
00238 int e, int i, int hvt);\
00239 extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
00240 int s, int e, int i, int hvt);\
00241 extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
00242 int s, int e, int i, int hvt);\
00243 extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
00244 int e, int i, int hvt);\
00245 extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
00246 int e, int i, int hvt);\
00247 extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
00248 int s, int e, int i, int hvt);\
00249 extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
00250 int s, int e, int i, int hvt);
00251
00252 DECLARE_LOOP_FILTER(mmx)
00253 DECLARE_LOOP_FILTER(mmxext)
00254 DECLARE_LOOP_FILTER(sse2)
00255 DECLARE_LOOP_FILTER(ssse3)
00256 DECLARE_LOOP_FILTER(sse4)
00257
00258 #endif
00259
00260 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
00261 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
00262 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
00263 c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
00264
00265 #define VP8_MC_FUNC(IDX, SIZE, OPT) \
00266 c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
00267 c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
00268 c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
00269 c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
00270 c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
00271 VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
00272
00273 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
00274 c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
00275 c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
00276 c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
00277 c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00278 c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00279 c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
00280 c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00281 c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
00282
00283
00284 av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
00285 {
00286 int mm_flags = av_get_cpu_flags();
00287
00288 #if HAVE_YASM
00289 if (mm_flags & AV_CPU_FLAG_MMX) {
00290 c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
00291 c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx;
00292 c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
00293 c->vp8_idct_add = ff_vp8_idct_add_mmx;
00294 c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx;
00295 c->put_vp8_epel_pixels_tab[0][0][0] =
00296 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
00297 c->put_vp8_epel_pixels_tab[1][0][0] =
00298 c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
00299
00300 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
00301 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
00302
00303 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
00304 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
00305 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
00306 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
00307
00308 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx;
00309 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx;
00310 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx;
00311 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx;
00312 }
00313
00314
00315
00316 if (mm_flags & AV_CPU_FLAG_MMX2) {
00317 VP8_LUMA_MC_FUNC(0, 16, mmxext);
00318 VP8_MC_FUNC(1, 8, mmxext);
00319 VP8_MC_FUNC(2, 4, mmxext);
00320 VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
00321 VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
00322 VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
00323
00324 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
00325 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
00326
00327 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
00328 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
00329 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
00330 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
00331
00332 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext;
00333 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext;
00334 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
00335 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
00336 }
00337
00338 if (mm_flags & AV_CPU_FLAG_SSE) {
00339 c->vp8_idct_add = ff_vp8_idct_add_sse;
00340 c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
00341 c->put_vp8_epel_pixels_tab[0][0][0] =
00342 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
00343 }
00344
00345 if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
00346 VP8_LUMA_MC_FUNC(0, 16, sse2);
00347 VP8_MC_FUNC(1, 8, sse2);
00348 VP8_BILINEAR_MC_FUNC(0, 16, sse2);
00349 VP8_BILINEAR_MC_FUNC(1, 8, sse2);
00350
00351 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
00352
00353 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
00354 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
00355
00356 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2;
00357 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
00358 }
00359
00360 if (mm_flags & AV_CPU_FLAG_SSE2) {
00361 c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
00362
00363 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
00364
00365 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
00366 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
00367
00368 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2;
00369 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
00370 }
00371
00372 if (mm_flags & AV_CPU_FLAG_SSSE3) {
00373 VP8_LUMA_MC_FUNC(0, 16, ssse3);
00374 VP8_MC_FUNC(1, 8, ssse3);
00375 VP8_MC_FUNC(2, 4, ssse3);
00376 VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
00377 VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
00378 VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
00379
00380 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
00381 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
00382
00383 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
00384 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
00385 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
00386 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
00387
00388 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3;
00389 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3;
00390 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
00391 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
00392 }
00393
00394 if (mm_flags & AV_CPU_FLAG_SSE4) {
00395 c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
00396
00397 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
00398 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4;
00399 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4;
00400 }
00401 #endif
00402 }