00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00029 #include "libavcodec/x86/dsputil_mmx.h"
00030 #include "libavcodec/rv34dsp.h"
00031
00032 void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
00033 int stride, int h, int x, int y);
00034 void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src,
00035 int stride, int h, int x, int y);
00036 void ff_avg_rv40_chroma_mc8_3dnow(uint8_t *dst, uint8_t *src,
00037 int stride, int h, int x, int y);
00038
00039 void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
00040 int stride, int h, int x, int y);
00041 void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
00042 int stride, int h, int x, int y);
00043 void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src,
00044 int stride, int h, int x, int y);
00045
00046 #define DECLARE_WEIGHT(opt) \
00047 void ff_rv40_weight_func_rnd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \
00048 int w1, int w2, ptrdiff_t stride); \
00049 void ff_rv40_weight_func_rnd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \
00050 int w1, int w2, ptrdiff_t stride); \
00051 void ff_rv40_weight_func_nornd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \
00052 int w1, int w2, ptrdiff_t stride); \
00053 void ff_rv40_weight_func_nornd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \
00054 int w1, int w2, ptrdiff_t stride);
00055 DECLARE_WEIGHT(mmx2)
00056 DECLARE_WEIGHT(sse2)
00057 DECLARE_WEIGHT(ssse3)
00058
00067 #define QPEL_FUNC_DECL(OP, SIZE, PH, PV, OPT) \
00068 static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst, \
00069 uint8_t *src, \
00070 int stride) \
00071 { \
00072 int i; \
00073 if (PH && PV) { \
00074 DECLARE_ALIGNED(16, uint8_t, tmp)[SIZE * (SIZE + 5)]; \
00075 uint8_t *tmpptr = tmp + SIZE * 2; \
00076 src -= stride * 2; \
00077 \
00078 for (i = 0; i < SIZE; i += LOOPSIZE) \
00079 ff_put_rv40_qpel_h ##OPT(tmp + i, SIZE, src + i, stride, \
00080 SIZE + 5, HCOFF(PH)); \
00081 for (i = 0; i < SIZE; i += LOOPSIZE) \
00082 ff_ ##OP ##rv40_qpel_v ##OPT(dst + i, stride, tmpptr + i, \
00083 SIZE, SIZE, VCOFF(PV)); \
00084 } else if (PV) { \
00085 for (i = 0; i < SIZE; i += LOOPSIZE) \
00086 ff_ ##OP ##rv40_qpel_v ## OPT(dst + i, stride, src + i, \
00087 stride, SIZE, VCOFF(PV)); \
00088 } else { \
00089 for (i = 0; i < SIZE; i += LOOPSIZE) \
00090 ff_ ##OP ##rv40_qpel_h ## OPT(dst + i, stride, src + i, \
00091 stride, SIZE, HCOFF(PH)); \
00092 } \
00093 };
00094
00097 #define QPEL_FUNCS_DECL(OP, PH, PV, OPT) \
00098 QPEL_FUNC_DECL(OP, 8, PH, PV, OPT) \
00099 QPEL_FUNC_DECL(OP, 16, PH, PV, OPT)
00100
00102 #define QPEL_MC_DECL(OP, OPT) \
00103 void ff_ ##OP ##rv40_qpel_h ##OPT(uint8_t *dst, ptrdiff_t dstStride, \
00104 const uint8_t *src, \
00105 ptrdiff_t srcStride, \
00106 int len, int m); \
00107 void ff_ ##OP ##rv40_qpel_v ##OPT(uint8_t *dst, ptrdiff_t dstStride, \
00108 const uint8_t *src, \
00109 ptrdiff_t srcStride, \
00110 int len, int m); \
00111 QPEL_FUNCS_DECL(OP, 0, 1, OPT) \
00112 QPEL_FUNCS_DECL(OP, 0, 3, OPT) \
00113 QPEL_FUNCS_DECL(OP, 1, 0, OPT) \
00114 QPEL_FUNCS_DECL(OP, 1, 1, OPT) \
00115 QPEL_FUNCS_DECL(OP, 1, 2, OPT) \
00116 QPEL_FUNCS_DECL(OP, 1, 3, OPT) \
00117 QPEL_FUNCS_DECL(OP, 2, 1, OPT) \
00118 QPEL_FUNCS_DECL(OP, 2, 2, OPT) \
00119 QPEL_FUNCS_DECL(OP, 2, 3, OPT) \
00120 QPEL_FUNCS_DECL(OP, 3, 0, OPT) \
00121 QPEL_FUNCS_DECL(OP, 3, 1, OPT) \
00122 QPEL_FUNCS_DECL(OP, 3, 2, OPT)
00123
00125 #define LOOPSIZE 8
00126 #define HCOFF(x) (32 * (x - 1))
00127 #define VCOFF(x) (32 * (x - 1))
00128 QPEL_MC_DECL(put_, _ssse3)
00129 QPEL_MC_DECL(avg_, _ssse3)
00130
00131 #undef LOOPSIZE
00132 #undef HCOFF
00133 #undef VCOFF
00134 #define LOOPSIZE 8
00135 #define HCOFF(x) (64 * (x - 1))
00136 #define VCOFF(x) (64 * (x - 1))
00137 QPEL_MC_DECL(put_, _sse2)
00138 QPEL_MC_DECL(avg_, _sse2)
00139
00140 #if ARCH_X86_32
00141 #undef LOOPSIZE
00142 #undef HCOFF
00143 #undef VCOFF
00144 #define LOOPSIZE 4
00145 #define HCOFF(x) (64 * (x - 1))
00146 #define VCOFF(x) (64 * (x - 1))
00147
00148 QPEL_MC_DECL(put_, _mmx)
00149
00150 #define ff_put_rv40_qpel_h_mmx2 ff_put_rv40_qpel_h_mmx
00151 #define ff_put_rv40_qpel_v_mmx2 ff_put_rv40_qpel_v_mmx
00152 QPEL_MC_DECL(avg_, _mmx2)
00153
00154 #define ff_put_rv40_qpel_h_3dnow ff_put_rv40_qpel_h_mmx
00155 #define ff_put_rv40_qpel_v_3dnow ff_put_rv40_qpel_v_mmx
00156 QPEL_MC_DECL(avg_, _3dnow)
00157 #endif
00158
00161 #define QPEL_FUNC_SET(OP, SIZE, PH, PV, OPT) \
00162 c-> OP ## pixels_tab[2 - SIZE / 8][4 * PV + PH] = OP ## rv40_qpel ##SIZE ## _mc ##PH ##PV ##OPT;
00163
00165 #define QPEL_FUNCS_SET(OP, PH, PV, OPT) \
00166 QPEL_FUNC_SET(OP, 8, PH, PV, OPT) \
00167 QPEL_FUNC_SET(OP, 16, PH, PV, OPT)
00168
00170 #define QPEL_MC_SET(OP, OPT) \
00171 QPEL_FUNCS_SET (OP, 0, 1, OPT) \
00172 QPEL_FUNCS_SET (OP, 0, 3, OPT) \
00173 QPEL_FUNCS_SET (OP, 1, 0, OPT) \
00174 QPEL_FUNCS_SET (OP, 1, 1, OPT) \
00175 QPEL_FUNCS_SET (OP, 1, 2, OPT) \
00176 QPEL_FUNCS_SET (OP, 1, 3, OPT) \
00177 QPEL_FUNCS_SET (OP, 2, 1, OPT) \
00178 QPEL_FUNCS_SET (OP, 2, 2, OPT) \
00179 QPEL_FUNCS_SET (OP, 2, 3, OPT) \
00180 QPEL_FUNCS_SET (OP, 3, 0, OPT) \
00181 QPEL_FUNCS_SET (OP, 3, 1, OPT) \
00182 QPEL_FUNCS_SET (OP, 3, 2, OPT)
00183
00185 void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
00186 {
00187 #if HAVE_YASM
00188 int mm_flags = av_get_cpu_flags();
00189
00190 if (mm_flags & AV_CPU_FLAG_MMX) {
00191 c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx;
00192 c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx;
00193 c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_mmx;
00194 c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_mmx;
00195 c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_mmx;
00196 c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_mmx;
00197 #if ARCH_X86_32
00198 QPEL_MC_SET(put_, _mmx)
00199 #endif
00200 }
00201 if (mm_flags & AV_CPU_FLAG_MMX2) {
00202 c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
00203 c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2;
00204 c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2;
00205 c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx2;
00206 c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx2;
00207 c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx2;
00208 #if ARCH_X86_32
00209 QPEL_MC_SET(avg_, _mmx2)
00210 #endif
00211 } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
00212 c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow;
00213 c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow;
00214 #if ARCH_X86_32
00215 QPEL_MC_SET(avg_, _3dnow)
00216 #endif
00217 }
00218 if (mm_flags & AV_CPU_FLAG_SSE2) {
00219 c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
00220 c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
00221 c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
00222 c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_sse2;
00223 QPEL_MC_SET(put_, _sse2)
00224 QPEL_MC_SET(avg_, _sse2)
00225 }
00226 if (mm_flags & AV_CPU_FLAG_SSSE3) {
00227 c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
00228 c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
00229 c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
00230 c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_ssse3;
00231 QPEL_MC_SET(put_, _ssse3)
00232 QPEL_MC_SET(avg_, _ssse3)
00233 }
00234 #endif
00235 }