Go to the documentation of this file.
41 void ff_cavs_idct8_mmx(int16_t *
out,
const int16_t *in);
43 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *
block, ptrdiff_t
stride)
50 void ff_cavs_idct8_sse2(int16_t *
out,
const int16_t *in);
52 static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *
block, ptrdiff_t
stride)
61 #if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE)
70 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
71 "movd (%0), "#F" \n\t"\
72 "movq "#C", %%mm6 \n\t"\
73 "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
74 "movq "#D", %%mm7 \n\t"\
75 "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
76 "psllw $3, "#E" \n\t"\
77 "psubw "#E", %%mm6 \n\t"\
78 "psraw $3, "#E" \n\t"\
79 "paddw %%mm7, %%mm6 \n\t"\
80 "paddw "#E", %%mm6 \n\t"\
81 "paddw "#B", "#B" \n\t"\
82 "pxor %%mm7, %%mm7 \n\t"\
84 "punpcklbw %%mm7, "#F" \n\t"\
85 "psubw "#B", %%mm6 \n\t"\
86 "psraw $1, "#B" \n\t"\
87 "psubw "#A", %%mm6 \n\t"\
88 "paddw "MANGLE(ADD)", %%mm6 \n\t"\
89 "psraw $7, %%mm6 \n\t"\
90 "packuswb %%mm6, %%mm6 \n\t"\
91 OP(%%mm6, (%1), A, d) \
95 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
96 "movd (%0), "#F" \n\t"\
97 "movq "#C", %%mm6 \n\t"\
98 "paddw "#D", %%mm6 \n\t"\
99 "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
101 "punpcklbw %%mm7, "#F" \n\t"\
102 "psubw "#B", %%mm6 \n\t"\
103 "psubw "#E", %%mm6 \n\t"\
104 "paddw "MANGLE(ADD)", %%mm6 \n\t"\
105 "psraw $3, %%mm6 \n\t"\
106 "packuswb %%mm6, %%mm6 \n\t"\
107 OP(%%mm6, (%1), A, d) \
111 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
112 "movd (%0), "#F" \n\t"\
113 "movq "#C", %%mm6 \n\t"\
114 "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
115 "movq "#D", %%mm7 \n\t"\
116 "pmullw "MANGLE(MUL1)", %%mm7\n\t"\
117 "psllw $3, "#B" \n\t"\
118 "psubw "#B", %%mm6 \n\t"\
119 "psraw $3, "#B" \n\t"\
120 "paddw %%mm7, %%mm6 \n\t"\
121 "paddw "#B", %%mm6 \n\t"\
122 "paddw "#E", "#E" \n\t"\
123 "pxor %%mm7, %%mm7 \n\t"\
125 "punpcklbw %%mm7, "#F" \n\t"\
126 "psubw "#E", %%mm6 \n\t"\
127 "psraw $1, "#E" \n\t"\
128 "psubw "#F", %%mm6 \n\t"\
129 "paddw "MANGLE(ADD)", %%mm6 \n\t"\
130 "psraw $7, %%mm6 \n\t"\
131 "packuswb %%mm6, %%mm6 \n\t"\
132 OP(%%mm6, (%1), A, d) \
136 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
142 "pxor %%mm7, %%mm7 \n\t"\
143 "movd (%0), %%mm0 \n\t"\
145 "movd (%0), %%mm1 \n\t"\
147 "movd (%0), %%mm2 \n\t"\
149 "movd (%0), %%mm3 \n\t"\
151 "movd (%0), %%mm4 \n\t"\
153 "punpcklbw %%mm7, %%mm0 \n\t"\
154 "punpcklbw %%mm7, %%mm1 \n\t"\
155 "punpcklbw %%mm7, %%mm2 \n\t"\
156 "punpcklbw %%mm7, %%mm3 \n\t"\
157 "punpcklbw %%mm7, %%mm4 \n\t"\
158 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
159 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
160 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
161 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
162 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
163 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
164 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
165 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
167 : "+a"(src), "+c"(dst)\
168 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
169 NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
174 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
175 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
176 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
177 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
178 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
179 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
180 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
181 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
183 : "+a"(src), "+c"(dst)\
184 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
185 NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
189 src += 4-(h+5)*srcStride;\
190 dst += 4-h*dstStride;\
193 #define QPEL_CAVS(OPNAME, OP, MMX)\
194 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
198 "pxor %%mm7, %%mm7 \n\t"\
199 "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
201 "movq (%0), %%mm0 \n\t"\
202 "movq 1(%0), %%mm2 \n\t"\
203 "movq %%mm0, %%mm1 \n\t"\
204 "movq %%mm2, %%mm3 \n\t"\
205 "punpcklbw %%mm7, %%mm0 \n\t"\
206 "punpckhbw %%mm7, %%mm1 \n\t"\
207 "punpcklbw %%mm7, %%mm2 \n\t"\
208 "punpckhbw %%mm7, %%mm3 \n\t"\
209 "paddw %%mm2, %%mm0 \n\t"\
210 "paddw %%mm3, %%mm1 \n\t"\
211 "pmullw %%mm6, %%mm0 \n\t"\
212 "pmullw %%mm6, %%mm1 \n\t"\
213 "movq -1(%0), %%mm2 \n\t"\
214 "movq 2(%0), %%mm4 \n\t"\
215 "movq %%mm2, %%mm3 \n\t"\
216 "movq %%mm4, %%mm5 \n\t"\
217 "punpcklbw %%mm7, %%mm2 \n\t"\
218 "punpckhbw %%mm7, %%mm3 \n\t"\
219 "punpcklbw %%mm7, %%mm4 \n\t"\
220 "punpckhbw %%mm7, %%mm5 \n\t"\
221 "paddw %%mm4, %%mm2 \n\t"\
222 "paddw %%mm3, %%mm5 \n\t"\
223 "psubw %%mm2, %%mm0 \n\t"\
224 "psubw %%mm5, %%mm1 \n\t"\
225 "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\
226 "paddw %%mm5, %%mm0 \n\t"\
227 "paddw %%mm5, %%mm1 \n\t"\
228 "psraw $3, %%mm0 \n\t"\
229 "psraw $3, %%mm1 \n\t"\
230 "packuswb %%mm1, %%mm0 \n\t"\
231 OP(%%mm0, (%1),%%mm5, q) \
236 : "+a"(src), "+c"(dst), "+m"(h)\
237 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
238 NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\
243 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
245 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
248 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
250 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42) \
253 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
255 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
258 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
260 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
262 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
264 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
265 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
268 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
270 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
272 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
274 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
275 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
278 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
280 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
282 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
284 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
285 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
288 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
290 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
291 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
294 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
295 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
298 #define CAVS_MC(OPNAME, SIZE, MMX) \
299 static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
301 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
304 static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
306 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
309 static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
311 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
314 static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
316 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
319 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
320 #define AVG_3DNOW_OP(a,b,temp, size) \
321 "mov" #size " " #b ", " #temp " \n\t"\
322 "pavgusb " #temp ", " #a " \n\t"\
323 "mov" #size " " #a ", " #b " \n\t"
324 #define AVG_MMXEXT_OP(a, b, temp, size) \
325 "mov" #size " " #b ", " #temp " \n\t"\
326 "pavgb " #temp ", " #a " \n\t"\
327 "mov" #size " " #a ", " #b " \n\t"
331 #if HAVE_MMX_EXTERNAL
332 static void put_cavs_qpel8_mc00_mmx(uint8_t *dst,
const uint8_t *
src,
338 static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst,
const uint8_t *
src,
344 static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst,
const uint8_t *
src,
350 static void put_cavs_qpel16_mc00_mmx(uint8_t *dst,
const uint8_t *
src,
356 static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst,
const uint8_t *
src,
362 static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst,
const uint8_t *
src,
368 static void put_cavs_qpel16_mc00_sse2(uint8_t *dst,
const uint8_t *
src,
374 static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst,
const uint8_t *
src,
384 #if HAVE_MMX_EXTERNAL
385 c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx;
386 c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
387 c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx;
388 c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx;
390 c->cavs_idct8_add = cavs_idct8_add_mmx;
395 #define DSPFUNC(PFX, IDX, NUM, EXT) \
396 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \
397 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \
398 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \
399 c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \
401 #if HAVE_MMXEXT_INLINE
402 QPEL_CAVS(put_, PUT_OP, mmxext)
403 QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
411 #if HAVE_AMD3DNOW_INLINE
412 QPEL_CAVS(put_, PUT_OP, 3dnow)
413 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
437 #if HAVE_AMD3DNOW_INLINE
439 cavsdsp_init_3dnow(
c, avctx);
441 #if HAVE_MMXEXT_INLINE
449 #if HAVE_MMX_EXTERNAL
451 c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmxext;
452 c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
455 #if HAVE_SSE2_EXTERNAL
457 c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
458 c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
460 c->cavs_idct8_add = cavs_idct8_add_sse2;
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
#define LOCAL_ALIGNED(a, t, v,...)
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
#define CAVS_MC(OPNAME, SIZE)
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static double b2(void *priv, double x, double y)
#define EXTERNAL_SSE2(flags)
#define DSPFUNC(PFX, IDX, NUM, EXT)
main external API structure.
#define INLINE_MMXEXT(flags)
The exact code depends on how similar the blocks are and how related they are to the block
#define INLINE_AMD3DNOW(flags)
#define EXTERNAL_MMXEXT(flags)