FFmpeg
cavsdsp.c
Go to the documentation of this file.
1 /*
2  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
3  * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de>
4  *
5  * MMX-optimized DSP functions, based on H.264 optimizations by
6  * Michael Niedermayer and Loren Merritt
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "libavutil/attributes.h"
26 #include "libavutil/common.h"
27 #include "libavutil/cpu.h"
28 #include "libavutil/x86/asm.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/cavsdsp.h"
31 #include "libavcodec/idctdsp.h"
32 #include "constants.h"
33 #include "fpel.h"
34 #include "idctdsp.h"
35 #include "config.h"
36 
37 
38 #if HAVE_MMX_EXTERNAL
39 
40 void ff_cavs_idct8_mmx(int16_t *out, const int16_t *in);
41 
42 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, ptrdiff_t stride)
43 {
44  LOCAL_ALIGNED(16, int16_t, b2, [64]);
45  ff_cavs_idct8_mmx(b2, block);
47 }
48 
49 void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in);
50 
51 static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride)
52 {
53  LOCAL_ALIGNED(16, int16_t, b2, [64]);
54  ff_cavs_idct8_sse2(b2, block);
56 }
57 
58 #endif /* HAVE_MMX_EXTERNAL */
59 
60 #if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE)
61 
62 /*****************************************************************************
63  *
64  * motion compensation
65  *
66  ****************************************************************************/
67 
68 /* vertical filter [-1 -2 96 42 -7 0] */
69 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
70  "movd (%0), "#F" \n\t"\
71  "movq "#C", %%mm6 \n\t"\
72  "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
73  "movq "#D", %%mm7 \n\t"\
74  "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
75  "psllw $3, "#E" \n\t"\
76  "psubw "#E", %%mm6 \n\t"\
77  "psraw $3, "#E" \n\t"\
78  "paddw %%mm7, %%mm6 \n\t"\
79  "paddw "#E", %%mm6 \n\t"\
80  "paddw "#B", "#B" \n\t"\
81  "pxor %%mm7, %%mm7 \n\t"\
82  "add %2, %0 \n\t"\
83  "punpcklbw %%mm7, "#F" \n\t"\
84  "psubw "#B", %%mm6 \n\t"\
85  "psraw $1, "#B" \n\t"\
86  "psubw "#A", %%mm6 \n\t"\
87  "paddw "MANGLE(ADD)", %%mm6 \n\t"\
88  "psraw $7, %%mm6 \n\t"\
89  "packuswb %%mm6, %%mm6 \n\t"\
90  OP(%%mm6, (%1), A, d) \
91  "add %3, %1 \n\t"
92 
93 /* vertical filter [ 0 -1 5 5 -1 0] */
94 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
95  "movd (%0), "#F" \n\t"\
96  "movq "#C", %%mm6 \n\t"\
97  "paddw "#D", %%mm6 \n\t"\
98  "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
99  "add %2, %0 \n\t"\
100  "punpcklbw %%mm7, "#F" \n\t"\
101  "psubw "#B", %%mm6 \n\t"\
102  "psubw "#E", %%mm6 \n\t"\
103  "paddw "MANGLE(ADD)", %%mm6 \n\t"\
104  "psraw $3, %%mm6 \n\t"\
105  "packuswb %%mm6, %%mm6 \n\t"\
106  OP(%%mm6, (%1), A, d) \
107  "add %3, %1 \n\t"
108 
109 /* vertical filter [ 0 -7 42 96 -2 -1] */
110 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
111  "movd (%0), "#F" \n\t"\
112  "movq "#C", %%mm6 \n\t"\
113  "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
114  "movq "#D", %%mm7 \n\t"\
115  "pmullw "MANGLE(MUL1)", %%mm7\n\t"\
116  "psllw $3, "#B" \n\t"\
117  "psubw "#B", %%mm6 \n\t"\
118  "psraw $3, "#B" \n\t"\
119  "paddw %%mm7, %%mm6 \n\t"\
120  "paddw "#B", %%mm6 \n\t"\
121  "paddw "#E", "#E" \n\t"\
122  "pxor %%mm7, %%mm7 \n\t"\
123  "add %2, %0 \n\t"\
124  "punpcklbw %%mm7, "#F" \n\t"\
125  "psubw "#E", %%mm6 \n\t"\
126  "psraw $1, "#E" \n\t"\
127  "psubw "#F", %%mm6 \n\t"\
128  "paddw "MANGLE(ADD)", %%mm6 \n\t"\
129  "psraw $7, %%mm6 \n\t"\
130  "packuswb %%mm6, %%mm6 \n\t"\
131  OP(%%mm6, (%1), A, d) \
132  "add %3, %1 \n\t"
133 
134 
135 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
136  int w= 2;\
137  src -= 2*srcStride;\
138  \
139  while(w--){\
140  __asm__ volatile(\
141  "pxor %%mm7, %%mm7 \n\t"\
142  "movd (%0), %%mm0 \n\t"\
143  "add %2, %0 \n\t"\
144  "movd (%0), %%mm1 \n\t"\
145  "add %2, %0 \n\t"\
146  "movd (%0), %%mm2 \n\t"\
147  "add %2, %0 \n\t"\
148  "movd (%0), %%mm3 \n\t"\
149  "add %2, %0 \n\t"\
150  "movd (%0), %%mm4 \n\t"\
151  "add %2, %0 \n\t"\
152  "punpcklbw %%mm7, %%mm0 \n\t"\
153  "punpcklbw %%mm7, %%mm1 \n\t"\
154  "punpcklbw %%mm7, %%mm2 \n\t"\
155  "punpcklbw %%mm7, %%mm3 \n\t"\
156  "punpcklbw %%mm7, %%mm4 \n\t"\
157  VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
158  VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
159  VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
160  VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
161  VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
162  VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
163  VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
164  VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
165  \
166  : "+a"(src), "+c"(dst)\
167  : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
168  NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
169  : "memory"\
170  );\
171  if(h==16){\
172  __asm__ volatile(\
173  VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
174  VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
175  VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
176  VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
177  VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
178  VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
179  VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
180  VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
181  \
182  : "+a"(src), "+c"(dst)\
183  : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
184  NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
185  : "memory"\
186  );\
187  }\
188  src += 4-(h+5)*srcStride;\
189  dst += 4-h*dstStride;\
190  }
191 
192 #define QPEL_CAVS(OPNAME, OP, MMX)\
193 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
194 {\
195  int h=8;\
196  __asm__ volatile(\
197  "pxor %%mm7, %%mm7 \n\t"\
198  "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
199  "1: \n\t"\
200  "movq (%0), %%mm0 \n\t"\
201  "movq 1(%0), %%mm2 \n\t"\
202  "movq %%mm0, %%mm1 \n\t"\
203  "movq %%mm2, %%mm3 \n\t"\
204  "punpcklbw %%mm7, %%mm0 \n\t"\
205  "punpckhbw %%mm7, %%mm1 \n\t"\
206  "punpcklbw %%mm7, %%mm2 \n\t"\
207  "punpckhbw %%mm7, %%mm3 \n\t"\
208  "paddw %%mm2, %%mm0 \n\t"\
209  "paddw %%mm3, %%mm1 \n\t"\
210  "pmullw %%mm6, %%mm0 \n\t"\
211  "pmullw %%mm6, %%mm1 \n\t"\
212  "movq -1(%0), %%mm2 \n\t"\
213  "movq 2(%0), %%mm4 \n\t"\
214  "movq %%mm2, %%mm3 \n\t"\
215  "movq %%mm4, %%mm5 \n\t"\
216  "punpcklbw %%mm7, %%mm2 \n\t"\
217  "punpckhbw %%mm7, %%mm3 \n\t"\
218  "punpcklbw %%mm7, %%mm4 \n\t"\
219  "punpckhbw %%mm7, %%mm5 \n\t"\
220  "paddw %%mm4, %%mm2 \n\t"\
221  "paddw %%mm3, %%mm5 \n\t"\
222  "psubw %%mm2, %%mm0 \n\t"\
223  "psubw %%mm5, %%mm1 \n\t"\
224  "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\
225  "paddw %%mm5, %%mm0 \n\t"\
226  "paddw %%mm5, %%mm1 \n\t"\
227  "psraw $3, %%mm0 \n\t"\
228  "psraw $3, %%mm1 \n\t"\
229  "packuswb %%mm1, %%mm0 \n\t"\
230  OP(%%mm0, (%1),%%mm5, q) \
231  "add %3, %0 \n\t"\
232  "add %4, %1 \n\t"\
233  "decl %2 \n\t"\
234  " jnz 1b \n\t"\
235  : "+a"(src), "+c"(dst), "+m"(h)\
236  : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
237  NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\
238  : "memory"\
239  );\
240 }\
241 \
242 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
243 { \
244  QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
245 }\
246 \
247 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
248 { \
249  QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42) \
250 }\
251 \
252 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
253 { \
254  QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
255 }\
256 \
257 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
258 { \
259  OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
260 }\
261 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
262 { \
263  OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
264  OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
265 }\
266 \
267 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
268 { \
269  OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
270 }\
271 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
272 { \
273  OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
274  OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
275 }\
276 \
277 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
278 { \
279  OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
280 }\
281 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
282 { \
283  OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
284  OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
285 }\
286 \
287 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
288 { \
289  OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
290  OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
291  src += 8*srcStride;\
292  dst += 8*dstStride;\
293  OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
294  OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
295 }\
296 
297 #define CAVS_MC(OPNAME, SIZE, MMX) \
298 static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
299 {\
300  OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
301 }\
302 \
303 static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
304 {\
305  OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
306 }\
307 \
308 static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
309 {\
310  OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
311 }\
312 \
313 static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
314 {\
315  OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
316 }\
317 
318 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
319 #define AVG_3DNOW_OP(a,b,temp, size) \
320 "mov" #size " " #b ", " #temp " \n\t"\
321 "pavgusb " #temp ", " #a " \n\t"\
322 "mov" #size " " #a ", " #b " \n\t"
323 #define AVG_MMXEXT_OP(a, b, temp, size) \
324 "mov" #size " " #b ", " #temp " \n\t"\
325 "pavgb " #temp ", " #a " \n\t"\
326 "mov" #size " " #a ", " #b " \n\t"
327 
328 #endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */
329 
330 #if HAVE_MMX_EXTERNAL
331 static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
332  ptrdiff_t stride)
333 {
334  ff_put_pixels8_mmx(dst, src, stride, 8);
335 }
336 
337 static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
338  ptrdiff_t stride)
339 {
340  ff_avg_pixels8_mmx(dst, src, stride, 8);
341 }
342 
343 static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src,
344  ptrdiff_t stride)
345 {
346  ff_avg_pixels8_mmxext(dst, src, stride, 8);
347 }
348 
349 static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
350  ptrdiff_t stride)
351 {
352  ff_put_pixels16_mmx(dst, src, stride, 16);
353 }
354 
355 static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
356  ptrdiff_t stride)
357 {
358  ff_avg_pixels16_mmx(dst, src, stride, 16);
359 }
360 
361 static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, const uint8_t *src,
362  ptrdiff_t stride)
363 {
364  ff_avg_pixels16_mmxext(dst, src, stride, 16);
365 }
366 
367 static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
368  ptrdiff_t stride)
369 {
370  ff_put_pixels16_sse2(dst, src, stride, 16);
371 }
372 
373 static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
374  ptrdiff_t stride)
375 {
376  ff_avg_pixels16_sse2(dst, src, stride, 16);
377 }
378 #endif
379 
381  AVCodecContext *avctx)
382 {
383 #if HAVE_MMX_EXTERNAL
384  c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx;
385  c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
386  c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx;
387  c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx;
388 
389  c->cavs_idct8_add = cavs_idct8_add_mmx;
390  c->idct_perm = FF_IDCT_PERM_TRANSPOSE;
391 #endif /* HAVE_MMX_EXTERNAL */
392 }
393 
394 #define DSPFUNC(PFX, IDX, NUM, EXT) \
395  c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \
396  c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \
397  c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \
398  c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \
399 
400 #if HAVE_MMXEXT_INLINE
401 QPEL_CAVS(put_, PUT_OP, mmxext)
402 QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
403 
404 CAVS_MC(put_, 8, mmxext)
405 CAVS_MC(put_, 16, mmxext)
406 CAVS_MC(avg_, 8, mmxext)
407 CAVS_MC(avg_, 16, mmxext)
408 #endif /* HAVE_MMXEXT_INLINE */
409 
410 #if HAVE_AMD3DNOW_INLINE
411 QPEL_CAVS(put_, PUT_OP, 3dnow)
412 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
413 
414 CAVS_MC(put_, 8, 3dnow)
415 CAVS_MC(put_, 16,3dnow)
416 CAVS_MC(avg_, 8, 3dnow)
417 CAVS_MC(avg_, 16,3dnow)
418 
419 static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c,
420  AVCodecContext *avctx)
421 {
422  DSPFUNC(put, 0, 16, 3dnow);
423  DSPFUNC(put, 1, 8, 3dnow);
424  DSPFUNC(avg, 0, 16, 3dnow);
425  DSPFUNC(avg, 1, 8, 3dnow);
426 }
427 #endif /* HAVE_AMD3DNOW_INLINE */
428 
430 {
432 
433  if (X86_MMX(cpu_flags))
434  cavsdsp_init_mmx(c, avctx);
435 
436 #if HAVE_AMD3DNOW_INLINE
438  cavsdsp_init_3dnow(c, avctx);
439 #endif /* HAVE_AMD3DNOW_INLINE */
440 #if HAVE_MMXEXT_INLINE
441  if (INLINE_MMXEXT(cpu_flags)) {
442  DSPFUNC(put, 0, 16, mmxext);
443  DSPFUNC(put, 1, 8, mmxext);
444  DSPFUNC(avg, 0, 16, mmxext);
445  DSPFUNC(avg, 1, 8, mmxext);
446  }
447 #endif
448 #if HAVE_MMX_EXTERNAL
449  if (EXTERNAL_MMXEXT(cpu_flags)) {
450  c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmxext;
451  c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
452  }
453 #endif
454 #if HAVE_SSE2_EXTERNAL
455  if (EXTERNAL_SSE2(cpu_flags)) {
456  c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
457  c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
458 
459  c->cavs_idct8_add = cavs_idct8_add_sse2;
460  c->idct_perm = FF_IDCT_PERM_TRANSPOSE;
461  }
462 #endif
463 }
ff_put_pixels8_mmx
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
ff_put_pixels16_mmx
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
stride
int stride
Definition: mace.c:144
cpu.h
out
FILE * out
Definition: movenc.c:54
av_unused
#define av_unused
Definition: attributes.h:131
ff_avg_pixels8_mmx
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cavsdsp_init_mmx
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx)
Definition: cavsdsp.c:380
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
ff_cavsdsp_init_x86
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
Definition: cavsdsp.c:429
CAVSDSPContext
Definition: cavsdsp.h:30
av_cold
#define av_cold
Definition: attributes.h:90
ff_avg_pixels8_mmxext
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
ff_put_pixels16_sse2
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
ff_add_pixels_clamped_mmx
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
ff_add_pixels_clamped_sse2
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
src
#define src
Definition: vp8dsp.c:254
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
constants.h
ff_avg_pixels16_mmx
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
CAVS_MC
#define CAVS_MC(OPNAME, SIZE)
Definition: cavsdsp.c:439
ff_avg_pixels16_sse2
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cpu.h
ff_avg_pixels16_mmxext
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
asm.h
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
b2
static double b2(void *priv, double x, double y)
Definition: vf_xfade.c:1333
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
common.h
uint8_t
uint8_t
Definition: audio_convert.c:194
idctdsp.h
FF_IDCT_PERM_TRANSPOSE
@ FF_IDCT_PERM_TRANSPOSE
Definition: idctdsp.h:41
DSPFUNC
#define DSPFUNC(PFX, IDX, NUM, EXT)
Definition: cavsdsp.c:394
AVCodecContext
main external API structure.
Definition: avcodec.h:526
fpel.h
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
LOCAL_ALIGNED
#define LOCAL_ALIGNED(a, t, v,...)
Definition: internal.h:114
X86_MMX
#define X86_MMX(flags)
Definition: cpu.h:30
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
INLINE_AMD3DNOW
#define INLINE_AMD3DNOW(flags)
Definition: cpu.h:84
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
cavsdsp.h