Go to the documentation of this file.
39 #if HAVE_SSE2_EXTERNAL
41 void ff_cavs_idct8_sse2(int16_t *
out,
const int16_t *in);
43 static void cavs_idct8_add_sse2(uint8_t *
dst, int16_t *
block, ptrdiff_t
stride)
52 #if HAVE_MMXEXT_INLINE
61 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
62 "movd (%0), "#F" \n\t"\
63 "movq "#C", %%mm6 \n\t"\
64 "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
65 "movq "#D", %%mm7 \n\t"\
66 "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
67 "psllw $3, "#E" \n\t"\
68 "psubw "#E", %%mm6 \n\t"\
69 "psraw $3, "#E" \n\t"\
70 "paddw %%mm7, %%mm6 \n\t"\
71 "paddw "#E", %%mm6 \n\t"\
72 "paddw "#B", "#B" \n\t"\
73 "pxor %%mm7, %%mm7 \n\t"\
75 "punpcklbw %%mm7, "#F" \n\t"\
76 "psubw "#B", %%mm6 \n\t"\
77 "psraw $1, "#B" \n\t"\
78 "psubw "#A", %%mm6 \n\t"\
79 "paddw "MANGLE(ADD)", %%mm6 \n\t"\
80 "psraw $7, %%mm6 \n\t"\
81 "packuswb %%mm6, %%mm6 \n\t"\
82 OP(%%mm6, (%1), A, d) \
86 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
87 "movd (%0), "#F" \n\t"\
88 "movq "#C", %%mm6 \n\t"\
89 "paddw "#D", %%mm6 \n\t"\
90 "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
92 "punpcklbw %%mm7, "#F" \n\t"\
93 "psubw "#B", %%mm6 \n\t"\
94 "psubw "#E", %%mm6 \n\t"\
95 "paddw "MANGLE(ADD)", %%mm6 \n\t"\
96 "psraw $3, %%mm6 \n\t"\
97 "packuswb %%mm6, %%mm6 \n\t"\
98 OP(%%mm6, (%1), A, d) \
102 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
103 "movd (%0), "#F" \n\t"\
104 "movq "#C", %%mm6 \n\t"\
105 "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
106 "movq "#D", %%mm7 \n\t"\
107 "pmullw "MANGLE(MUL1)", %%mm7\n\t"\
108 "psllw $3, "#B" \n\t"\
109 "psubw "#B", %%mm6 \n\t"\
110 "psraw $3, "#B" \n\t"\
111 "paddw %%mm7, %%mm6 \n\t"\
112 "paddw "#B", %%mm6 \n\t"\
113 "paddw "#E", "#E" \n\t"\
114 "pxor %%mm7, %%mm7 \n\t"\
116 "punpcklbw %%mm7, "#F" \n\t"\
117 "psubw "#E", %%mm6 \n\t"\
118 "psraw $1, "#E" \n\t"\
119 "psubw "#F", %%mm6 \n\t"\
120 "paddw "MANGLE(ADD)", %%mm6 \n\t"\
121 "psraw $7, %%mm6 \n\t"\
122 "packuswb %%mm6, %%mm6 \n\t"\
123 OP(%%mm6, (%1), A, d) \
127 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
133 "pxor %%mm7, %%mm7 \n\t"\
134 "movd (%0), %%mm0 \n\t"\
136 "movd (%0), %%mm1 \n\t"\
138 "movd (%0), %%mm2 \n\t"\
140 "movd (%0), %%mm3 \n\t"\
142 "movd (%0), %%mm4 \n\t"\
144 "punpcklbw %%mm7, %%mm0 \n\t"\
145 "punpcklbw %%mm7, %%mm1 \n\t"\
146 "punpcklbw %%mm7, %%mm2 \n\t"\
147 "punpcklbw %%mm7, %%mm3 \n\t"\
148 "punpcklbw %%mm7, %%mm4 \n\t"\
149 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
150 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
151 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
152 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
153 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
154 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
155 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
156 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
158 : "+a"(src), "+c"(dst)\
159 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
160 NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
165 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
166 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
167 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
168 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
169 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
170 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
171 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
172 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
174 : "+a"(src), "+c"(dst)\
175 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
176 NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
180 src += 4-(h+5)*srcStride;\
181 dst += 4-h*dstStride;\
184 #define QPEL_CAVS(OPNAME, OP, MMX)\
185 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
189 "pxor %%mm7, %%mm7 \n\t"\
190 "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
192 "movq (%0), %%mm0 \n\t"\
193 "movq 1(%0), %%mm2 \n\t"\
194 "movq %%mm0, %%mm1 \n\t"\
195 "movq %%mm2, %%mm3 \n\t"\
196 "punpcklbw %%mm7, %%mm0 \n\t"\
197 "punpckhbw %%mm7, %%mm1 \n\t"\
198 "punpcklbw %%mm7, %%mm2 \n\t"\
199 "punpckhbw %%mm7, %%mm3 \n\t"\
200 "paddw %%mm2, %%mm0 \n\t"\
201 "paddw %%mm3, %%mm1 \n\t"\
202 "pmullw %%mm6, %%mm0 \n\t"\
203 "pmullw %%mm6, %%mm1 \n\t"\
204 "movq -1(%0), %%mm2 \n\t"\
205 "movq 2(%0), %%mm4 \n\t"\
206 "movq %%mm2, %%mm3 \n\t"\
207 "movq %%mm4, %%mm5 \n\t"\
208 "punpcklbw %%mm7, %%mm2 \n\t"\
209 "punpckhbw %%mm7, %%mm3 \n\t"\
210 "punpcklbw %%mm7, %%mm4 \n\t"\
211 "punpckhbw %%mm7, %%mm5 \n\t"\
212 "paddw %%mm4, %%mm2 \n\t"\
213 "paddw %%mm3, %%mm5 \n\t"\
214 "psubw %%mm2, %%mm0 \n\t"\
215 "psubw %%mm5, %%mm1 \n\t"\
216 "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\
217 "paddw %%mm5, %%mm0 \n\t"\
218 "paddw %%mm5, %%mm1 \n\t"\
219 "psraw $3, %%mm0 \n\t"\
220 "psraw $3, %%mm1 \n\t"\
221 "packuswb %%mm1, %%mm0 \n\t"\
222 OP(%%mm0, (%1),%%mm5, q) \
227 : "+a"(src), "+c"(dst), "+m"(h)\
228 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
229 NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\
234 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
236 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
239 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
241 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42) \
244 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
246 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
249 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
251 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
253 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
255 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
256 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
259 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
261 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
263 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
265 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
266 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
269 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
271 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
273 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
275 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
276 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
279 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
281 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
282 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
285 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
286 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
289 #define CAVS_MC(OPNAME, SIZE, MMX) \
290 static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
292 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
295 static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
297 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
300 static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
302 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
305 static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
307 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
310 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
311 #define AVG_MMXEXT_OP(a, b, temp, size) \
312 "mov" #size " " #b ", " #temp " \n\t"\
313 "pavgb " #temp ", " #a " \n\t"\
314 "mov" #size " " #a ", " #b " \n\t"
318 #if HAVE_MMX_EXTERNAL
319 static void put_cavs_qpel8_mc00_mmx(uint8_t *
dst,
const uint8_t *
src,
325 static void avg_cavs_qpel8_mc00_mmxext(uint8_t *
dst,
const uint8_t *
src,
331 static void put_cavs_qpel16_mc00_sse2(uint8_t *
dst,
const uint8_t *
src,
337 static void avg_cavs_qpel16_mc00_sse2(uint8_t *
dst,
const uint8_t *
src,
346 #if HAVE_MMX_EXTERNAL
347 c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
351 #define DSPFUNC(PFX, IDX, NUM, EXT) \
352 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \
353 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \
354 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \
355 c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \
357 #if HAVE_MMXEXT_INLINE
358 QPEL_CAVS(put_, PUT_OP, mmxext)
359 QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
374 #if HAVE_MMXEXT_INLINE
382 #if HAVE_MMX_EXTERNAL
384 c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
387 #if HAVE_SSE2_EXTERNAL
389 c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
390 c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
392 c->cavs_idct8_add = cavs_idct8_add_sse2;
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c)
#define LOCAL_ALIGNED(a, t, v,...)
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c)
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define CAVS_MC(OPNAME, SIZE)
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
static double b2(void *priv, double x, double y)
#define EXTERNAL_SSE2(flags)
#define DSPFUNC(PFX, IDX, NUM, EXT)
#define INLINE_MMXEXT(flags)
The exact code depends on how similar the blocks are and how related they are to the block
#define EXTERNAL_MMXEXT(flags)