FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dsputil_template.c
Go to the documentation of this file.
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * DSP utils
28  */
29 
30 #include "bit_depth_template.c"
31 
32 static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
33 {
34  int i;
35  for(i=0; i<h; i++)
36  {
37  AV_WN2P(dst , AV_RN2P(src ));
38  dst+=dstStride;
39  src+=srcStride;
40  }
41 }
42 
43 static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
44 {
45  int i;
46  for(i=0; i<h; i++)
47  {
48  AV_WN4P(dst , AV_RN4P(src ));
49  dst+=dstStride;
50  src+=srcStride;
51  }
52 }
53 
54 static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
55 {
56  int i;
57  for(i=0; i<h; i++)
58  {
59  AV_WN4P(dst , AV_RN4P(src ));
60  AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
61  dst+=dstStride;
62  src+=srcStride;
63  }
64 }
65 
66 static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
67 {
68  int i;
69  for(i=0; i<h; i++)
70  {
71  AV_WN4P(dst , AV_RN4P(src ));
72  AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
73  AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
74  AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
75  dst+=dstStride;
76  src+=srcStride;
77  }
78 }
79 
80 /* draw the edges of width 'w' of an image of size width, height */
81 //FIXME check that this is ok for mpeg4 interlaced
82 static void FUNCC(draw_edges)(uint8_t *p_buf, int p_wrap, int width, int height, int w, int h, int sides)
83 {
84  pixel *buf = (pixel*)p_buf;
85  int wrap = p_wrap / sizeof(pixel);
86  pixel *ptr, *last_line;
87  int i;
88 
89  /* left and right */
90  ptr = buf;
91  for(i=0;i<height;i++) {
92 #if BIT_DEPTH > 8
93  int j;
94  for (j = 0; j < w; j++) {
95  ptr[j-w] = ptr[0];
96  ptr[j+width] = ptr[width-1];
97  }
98 #else
99  memset(ptr - w, ptr[0], w);
100  memset(ptr + width, ptr[width-1], w);
101 #endif
102  ptr += wrap;
103  }
104 
105  /* top and bottom + corners */
106  buf -= w;
107  last_line = buf + (height - 1) * wrap;
108  if (sides & EDGE_TOP)
109  for(i = 0; i < h; i++)
110  memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
111  if (sides & EDGE_BOTTOM)
112  for (i = 0; i < h; i++)
113  memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
114 }
115 
116 #define DCTELEM_FUNCS(dctcoef, suffix) \
117 static void FUNCC(get_pixels ## suffix)(DCTELEM *av_restrict _block, \
118  const uint8_t *_pixels, \
119  int line_size) \
120 { \
121  const pixel *pixels = (const pixel *) _pixels; \
122  dctcoef *av_restrict block = (dctcoef *) _block; \
123  int i; \
124  \
125  /* read the pixels */ \
126  for(i=0;i<8;i++) { \
127  block[0] = pixels[0]; \
128  block[1] = pixels[1]; \
129  block[2] = pixels[2]; \
130  block[3] = pixels[3]; \
131  block[4] = pixels[4]; \
132  block[5] = pixels[5]; \
133  block[6] = pixels[6]; \
134  block[7] = pixels[7]; \
135  pixels += line_size / sizeof(pixel); \
136  block += 8; \
137  } \
138 } \
139  \
140 static void FUNCC(add_pixels8 ## suffix)(uint8_t *av_restrict _pixels, \
141  DCTELEM *_block, \
142  int line_size) \
143 { \
144  int i; \
145  pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
146  dctcoef *block = (dctcoef*)_block; \
147  line_size /= sizeof(pixel); \
148  \
149  for(i=0;i<8;i++) { \
150  pixels[0] += block[0]; \
151  pixels[1] += block[1]; \
152  pixels[2] += block[2]; \
153  pixels[3] += block[3]; \
154  pixels[4] += block[4]; \
155  pixels[5] += block[5]; \
156  pixels[6] += block[6]; \
157  pixels[7] += block[7]; \
158  pixels += line_size; \
159  block += 8; \
160  } \
161 } \
162  \
163 static void FUNCC(add_pixels4 ## suffix)(uint8_t *av_restrict _pixels, \
164  DCTELEM *_block, \
165  int line_size) \
166 { \
167  int i; \
168  pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
169  dctcoef *block = (dctcoef*)_block; \
170  line_size /= sizeof(pixel); \
171  \
172  for(i=0;i<4;i++) { \
173  pixels[0] += block[0]; \
174  pixels[1] += block[1]; \
175  pixels[2] += block[2]; \
176  pixels[3] += block[3]; \
177  pixels += line_size; \
178  block += 4; \
179  } \
180 } \
181  \
182 static void FUNCC(clear_block ## suffix)(DCTELEM *block) \
183 { \
184  memset(block, 0, sizeof(dctcoef)*64); \
185 } \
186  \
187 /** \
188  * memset(blocks, 0, sizeof(DCTELEM)*6*64) \
189  */ \
190 static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks) \
191 { \
192  memset(blocks, 0, sizeof(dctcoef)*6*64); \
193 }
194 
196 #if BIT_DEPTH > 8
198 #endif
199 
200 #define PIXOP2(OPNAME, OP) \
201 static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
202  int i;\
203  for(i=0; i<h; i++){\
204  OP(*((pixel2*)(block )), AV_RN2P(pixels ));\
205  pixels+=line_size;\
206  block +=line_size;\
207  }\
208 }\
209 static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
210  int i;\
211  for(i=0; i<h; i++){\
212  OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
213  pixels+=line_size;\
214  block +=line_size;\
215  }\
216 }\
217 static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
218  int i;\
219  for(i=0; i<h; i++){\
220  OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
221  OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
222  pixels+=line_size;\
223  block +=line_size;\
224  }\
225 }\
226 static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
227  FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
228 }\
229 \
230 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
231  int src_stride1, int src_stride2, int h){\
232  int i;\
233  for(i=0; i<h; i++){\
234  pixel4 a,b;\
235  a= AV_RN4P(&src1[i*src_stride1 ]);\
236  b= AV_RN4P(&src2[i*src_stride2 ]);\
237  OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
238  a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
239  b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
240  OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
241  }\
242 }\
243 \
244 static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
245  int src_stride1, int src_stride2, int h){\
246  int i;\
247  for(i=0; i<h; i++){\
248  pixel4 a,b;\
249  a= AV_RN4P(&src1[i*src_stride1 ]);\
250  b= AV_RN4P(&src2[i*src_stride2 ]);\
251  OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
252  a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
253  b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
254  OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
255  }\
256 }\
257 \
258 static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
259  int src_stride1, int src_stride2, int h){\
260  int i;\
261  for(i=0; i<h; i++){\
262  pixel4 a,b;\
263  a= AV_RN4P(&src1[i*src_stride1 ]);\
264  b= AV_RN4P(&src2[i*src_stride2 ]);\
265  OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
266  }\
267 }\
268 \
269 static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
270  int src_stride1, int src_stride2, int h){\
271  int i;\
272  for(i=0; i<h; i++){\
273  pixel4 a,b;\
274  a= AV_RN2P(&src1[i*src_stride1 ]);\
275  b= AV_RN2P(&src2[i*src_stride2 ]);\
276  OP(*((pixel2*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
277  }\
278 }\
279 \
280 static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
281  int src_stride1, int src_stride2, int h){\
282  FUNC(OPNAME ## _pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
283  FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
284 }\
285 \
286 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
287  int src_stride1, int src_stride2, int h){\
288  FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
289  FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
290 }\
291 \
292 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
293  FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
294 }\
295 \
296 static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
297  FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
298 }\
299 \
300 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
301  FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
302 }\
303 \
304 static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
305  FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
306 }\
307 \
308 static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
309  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
310  /* FIXME HIGH BIT DEPTH */\
311  int i;\
312  for(i=0; i<h; i++){\
313  uint32_t a, b, c, d, l0, l1, h0, h1;\
314  a= AV_RN32(&src1[i*src_stride1]);\
315  b= AV_RN32(&src2[i*src_stride2]);\
316  c= AV_RN32(&src3[i*src_stride3]);\
317  d= AV_RN32(&src4[i*src_stride4]);\
318  l0= (a&0x03030303UL)\
319  + (b&0x03030303UL)\
320  + 0x02020202UL;\
321  h0= ((a&0xFCFCFCFCUL)>>2)\
322  + ((b&0xFCFCFCFCUL)>>2);\
323  l1= (c&0x03030303UL)\
324  + (d&0x03030303UL);\
325  h1= ((c&0xFCFCFCFCUL)>>2)\
326  + ((d&0xFCFCFCFCUL)>>2);\
327  OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
328  a= AV_RN32(&src1[i*src_stride1+4]);\
329  b= AV_RN32(&src2[i*src_stride2+4]);\
330  c= AV_RN32(&src3[i*src_stride3+4]);\
331  d= AV_RN32(&src4[i*src_stride4+4]);\
332  l0= (a&0x03030303UL)\
333  + (b&0x03030303UL)\
334  + 0x02020202UL;\
335  h0= ((a&0xFCFCFCFCUL)>>2)\
336  + ((b&0xFCFCFCFCUL)>>2);\
337  l1= (c&0x03030303UL)\
338  + (d&0x03030303UL);\
339  h1= ((c&0xFCFCFCFCUL)>>2)\
340  + ((d&0xFCFCFCFCUL)>>2);\
341  OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
342  }\
343 }\
344 \
345 static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
346  FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
347 }\
348 \
349 static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
350  FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
351 }\
352 \
353 static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
354  FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
355 }\
356 \
357 static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
358  FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
359 }\
360 \
361 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
362  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
363  /* FIXME HIGH BIT DEPTH*/\
364  int i;\
365  for(i=0; i<h; i++){\
366  uint32_t a, b, c, d, l0, l1, h0, h1;\
367  a= AV_RN32(&src1[i*src_stride1]);\
368  b= AV_RN32(&src2[i*src_stride2]);\
369  c= AV_RN32(&src3[i*src_stride3]);\
370  d= AV_RN32(&src4[i*src_stride4]);\
371  l0= (a&0x03030303UL)\
372  + (b&0x03030303UL)\
373  + 0x01010101UL;\
374  h0= ((a&0xFCFCFCFCUL)>>2)\
375  + ((b&0xFCFCFCFCUL)>>2);\
376  l1= (c&0x03030303UL)\
377  + (d&0x03030303UL);\
378  h1= ((c&0xFCFCFCFCUL)>>2)\
379  + ((d&0xFCFCFCFCUL)>>2);\
380  OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
381  a= AV_RN32(&src1[i*src_stride1+4]);\
382  b= AV_RN32(&src2[i*src_stride2+4]);\
383  c= AV_RN32(&src3[i*src_stride3+4]);\
384  d= AV_RN32(&src4[i*src_stride4+4]);\
385  l0= (a&0x03030303UL)\
386  + (b&0x03030303UL)\
387  + 0x01010101UL;\
388  h0= ((a&0xFCFCFCFCUL)>>2)\
389  + ((b&0xFCFCFCFCUL)>>2);\
390  l1= (c&0x03030303UL)\
391  + (d&0x03030303UL);\
392  h1= ((c&0xFCFCFCFCUL)>>2)\
393  + ((d&0xFCFCFCFCUL)>>2);\
394  OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
395  }\
396 }\
397 static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
398  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
399  FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
400  FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
401 }\
402 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
403  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
404  FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
405  FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
406 }\
407 \
408 static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *p_block, const uint8_t *p_pixels, int line_size, int h)\
409 {\
410  int i, a0, b0, a1, b1;\
411  pixel *block = (pixel*)p_block;\
412  const pixel *pixels = (const pixel*)p_pixels;\
413  line_size >>= sizeof(pixel)-1;\
414  a0= pixels[0];\
415  b0= pixels[1] + 2;\
416  a0 += b0;\
417  b0 += pixels[2];\
418 \
419  pixels+=line_size;\
420  for(i=0; i<h; i+=2){\
421  a1= pixels[0];\
422  b1= pixels[1];\
423  a1 += b1;\
424  b1 += pixels[2];\
425 \
426  block[0]= (a1+a0)>>2; /* FIXME non put */\
427  block[1]= (b1+b0)>>2;\
428 \
429  pixels+=line_size;\
430  block +=line_size;\
431 \
432  a0= pixels[0];\
433  b0= pixels[1] + 2;\
434  a0 += b0;\
435  b0 += pixels[2];\
436 \
437  block[0]= (a1+a0)>>2;\
438  block[1]= (b1+b0)>>2;\
439  pixels+=line_size;\
440  block +=line_size;\
441  }\
442 }\
443 \
444 static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
445 {\
446  /* FIXME HIGH BIT DEPTH */\
447  int i;\
448  const uint32_t a= AV_RN32(pixels );\
449  const uint32_t b= AV_RN32(pixels+1);\
450  uint32_t l0= (a&0x03030303UL)\
451  + (b&0x03030303UL)\
452  + 0x02020202UL;\
453  uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
454  + ((b&0xFCFCFCFCUL)>>2);\
455  uint32_t l1,h1;\
456 \
457  pixels+=line_size;\
458  for(i=0; i<h; i+=2){\
459  uint32_t a= AV_RN32(pixels );\
460  uint32_t b= AV_RN32(pixels+1);\
461  l1= (a&0x03030303UL)\
462  + (b&0x03030303UL);\
463  h1= ((a&0xFCFCFCFCUL)>>2)\
464  + ((b&0xFCFCFCFCUL)>>2);\
465  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
466  pixels+=line_size;\
467  block +=line_size;\
468  a= AV_RN32(pixels );\
469  b= AV_RN32(pixels+1);\
470  l0= (a&0x03030303UL)\
471  + (b&0x03030303UL)\
472  + 0x02020202UL;\
473  h0= ((a&0xFCFCFCFCUL)>>2)\
474  + ((b&0xFCFCFCFCUL)>>2);\
475  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
476  pixels+=line_size;\
477  block +=line_size;\
478  }\
479 }\
480 \
481 static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
482 {\
483  /* FIXME HIGH BIT DEPTH */\
484  int j;\
485  for(j=0; j<2; j++){\
486  int i;\
487  const uint32_t a= AV_RN32(pixels );\
488  const uint32_t b= AV_RN32(pixels+1);\
489  uint32_t l0= (a&0x03030303UL)\
490  + (b&0x03030303UL)\
491  + 0x02020202UL;\
492  uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
493  + ((b&0xFCFCFCFCUL)>>2);\
494  uint32_t l1,h1;\
495 \
496  pixels+=line_size;\
497  for(i=0; i<h; i+=2){\
498  uint32_t a= AV_RN32(pixels );\
499  uint32_t b= AV_RN32(pixels+1);\
500  l1= (a&0x03030303UL)\
501  + (b&0x03030303UL);\
502  h1= ((a&0xFCFCFCFCUL)>>2)\
503  + ((b&0xFCFCFCFCUL)>>2);\
504  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
505  pixels+=line_size;\
506  block +=line_size;\
507  a= AV_RN32(pixels );\
508  b= AV_RN32(pixels+1);\
509  l0= (a&0x03030303UL)\
510  + (b&0x03030303UL)\
511  + 0x02020202UL;\
512  h0= ((a&0xFCFCFCFCUL)>>2)\
513  + ((b&0xFCFCFCFCUL)>>2);\
514  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
515  pixels+=line_size;\
516  block +=line_size;\
517  }\
518  pixels+=4-line_size*(h+1);\
519  block +=4-line_size*h;\
520  }\
521 }\
522 \
523 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
524 {\
525  /* FIXME HIGH BIT DEPTH */\
526  int j;\
527  for(j=0; j<2; j++){\
528  int i;\
529  const uint32_t a= AV_RN32(pixels );\
530  const uint32_t b= AV_RN32(pixels+1);\
531  uint32_t l0= (a&0x03030303UL)\
532  + (b&0x03030303UL)\
533  + 0x01010101UL;\
534  uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
535  + ((b&0xFCFCFCFCUL)>>2);\
536  uint32_t l1,h1;\
537 \
538  pixels+=line_size;\
539  for(i=0; i<h; i+=2){\
540  uint32_t a= AV_RN32(pixels );\
541  uint32_t b= AV_RN32(pixels+1);\
542  l1= (a&0x03030303UL)\
543  + (b&0x03030303UL);\
544  h1= ((a&0xFCFCFCFCUL)>>2)\
545  + ((b&0xFCFCFCFCUL)>>2);\
546  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
547  pixels+=line_size;\
548  block +=line_size;\
549  a= AV_RN32(pixels );\
550  b= AV_RN32(pixels+1);\
551  l0= (a&0x03030303UL)\
552  + (b&0x03030303UL)\
553  + 0x01010101UL;\
554  h0= ((a&0xFCFCFCFCUL)>>2)\
555  + ((b&0xFCFCFCFCUL)>>2);\
556  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
557  pixels+=line_size;\
558  block +=line_size;\
559  }\
560  pixels+=4-line_size*(h+1);\
561  block +=4-line_size*h;\
562  }\
563 }\
564 \
565 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
566 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
567 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
568 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
569 av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
570 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
571 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
572 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
573 
574 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
575 #define op_put(a, b) a = b
576 
577 PIXOP2(avg, op_avg)
578 PIXOP2(put, op_put)
579 #undef op_avg
580 #undef op_put
582 #define put_no_rnd_pixels8_c put_pixels8_c
583 #define put_no_rnd_pixels16_c put_pixels16_c
584 
585 static void FUNCC(put_no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
587 }
588 
589 static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
591 }
592 
593 #define H264_CHROMA_MC(OPNAME, OP)\
594 static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
595  pixel *dst = (pixel*)p_dst;\
596  pixel *src = (pixel*)p_src;\
597  const int A=(8-x)*(8-y);\
598  const int B=( x)*(8-y);\
599  const int C=(8-x)*( y);\
600  const int D=( x)*( y);\
601  int i;\
602  stride >>= sizeof(pixel)-1;\
603  \
604  av_assert2(x<8 && y<8 && x>=0 && y>=0);\
605 \
606  if(D){\
607  for(i=0; i<h; i++){\
608  OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
609  OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
610  dst+= stride;\
611  src+= stride;\
612  }\
613  }else{\
614  const int E= B+C;\
615  const int step= C ? stride : 1;\
616  for(i=0; i<h; i++){\
617  OP(dst[0], (A*src[0] + E*src[step+0]));\
618  OP(dst[1], (A*src[1] + E*src[step+1]));\
619  dst+= stride;\
620  src+= stride;\
621  }\
622  }\
623 }\
624 \
625 static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
626  pixel *dst = (pixel*)p_dst;\
627  pixel *src = (pixel*)p_src;\
628  const int A=(8-x)*(8-y);\
629  const int B=( x)*(8-y);\
630  const int C=(8-x)*( y);\
631  const int D=( x)*( y);\
632  int i;\
633  stride >>= sizeof(pixel)-1;\
634  \
635  av_assert2(x<8 && y<8 && x>=0 && y>=0);\
636 \
637  if(D){\
638  for(i=0; i<h; i++){\
639  OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
640  OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
641  OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
642  OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
643  dst+= stride;\
644  src+= stride;\
645  }\
646  }else{\
647  const int E= B+C;\
648  const int step= C ? stride : 1;\
649  for(i=0; i<h; i++){\
650  OP(dst[0], (A*src[0] + E*src[step+0]));\
651  OP(dst[1], (A*src[1] + E*src[step+1]));\
652  OP(dst[2], (A*src[2] + E*src[step+2]));\
653  OP(dst[3], (A*src[3] + E*src[step+3]));\
654  dst+= stride;\
655  src+= stride;\
656  }\
657  }\
658 }\
659 \
660 static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
661  pixel *dst = (pixel*)p_dst;\
662  pixel *src = (pixel*)p_src;\
663  const int A=(8-x)*(8-y);\
664  const int B=( x)*(8-y);\
665  const int C=(8-x)*( y);\
666  const int D=( x)*( y);\
667  int i;\
668  stride >>= sizeof(pixel)-1;\
669  \
670  av_assert2(x<8 && y<8 && x>=0 && y>=0);\
671 \
672  if(D){\
673  for(i=0; i<h; i++){\
674  OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
675  OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
676  OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
677  OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
678  OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
679  OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
680  OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
681  OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
682  dst+= stride;\
683  src+= stride;\
684  }\
685  }else{\
686  const int E= B+C;\
687  const int step= C ? stride : 1;\
688  for(i=0; i<h; i++){\
689  OP(dst[0], (A*src[0] + E*src[step+0]));\
690  OP(dst[1], (A*src[1] + E*src[step+1]));\
691  OP(dst[2], (A*src[2] + E*src[step+2]));\
692  OP(dst[3], (A*src[3] + E*src[step+3]));\
693  OP(dst[4], (A*src[4] + E*src[step+4]));\
694  OP(dst[5], (A*src[5] + E*src[step+5]));\
695  OP(dst[6], (A*src[6] + E*src[step+6]));\
696  OP(dst[7], (A*src[7] + E*src[step+7]));\
697  dst+= stride;\
698  src+= stride;\
699  }\
700  }\
701 }
702 
703 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
704 #define op_put(a, b) a = (((b) + 32)>>6)
705 
706 H264_CHROMA_MC(put_ , op_put)
707 H264_CHROMA_MC(avg_ , op_avg)
708 #undef op_avg
709 #undef op_put
710 
711 #define H264_LOWPASS(OPNAME, OP, OP2) \
712 static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
713  const int h=2;\
714  INIT_CLIP\
715  int i;\
716  pixel *dst = (pixel*)p_dst;\
717  pixel *src = (pixel*)p_src;\
718  dstStride >>= sizeof(pixel)-1;\
719  srcStride >>= sizeof(pixel)-1;\
720  for(i=0; i<h; i++)\
721  {\
722  OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
723  OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
724  dst+=dstStride;\
725  src+=srcStride;\
726  }\
727 }\
728 \
729 static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
730  const int w=2;\
731  INIT_CLIP\
732  int i;\
733  pixel *dst = (pixel*)p_dst;\
734  pixel *src = (pixel*)p_src;\
735  dstStride >>= sizeof(pixel)-1;\
736  srcStride >>= sizeof(pixel)-1;\
737  for(i=0; i<w; i++)\
738  {\
739  const int srcB= src[-2*srcStride];\
740  const int srcA= src[-1*srcStride];\
741  const int src0= src[0 *srcStride];\
742  const int src1= src[1 *srcStride];\
743  const int src2= src[2 *srcStride];\
744  const int src3= src[3 *srcStride];\
745  const int src4= src[4 *srcStride];\
746  OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
747  OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
748  dst++;\
749  src++;\
750  }\
751 }\
752 \
753 static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *p_dst, pixeltmp *tmp, uint8_t *p_src, int dstStride, int tmpStride, int srcStride){\
754  const int h=2;\
755  const int w=2;\
756  const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
757  INIT_CLIP\
758  int i;\
759  pixel *dst = (pixel*)p_dst;\
760  pixel *src = (pixel*)p_src;\
761  dstStride >>= sizeof(pixel)-1;\
762  srcStride >>= sizeof(pixel)-1;\
763  src -= 2*srcStride;\
764  for(i=0; i<h+5; i++)\
765  {\
766  tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
767  tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
768  tmp+=tmpStride;\
769  src+=srcStride;\
770  }\
771  tmp -= tmpStride*(h+5-2);\
772  for(i=0; i<w; i++)\
773  {\
774  const int tmpB= tmp[-2*tmpStride] - pad;\
775  const int tmpA= tmp[-1*tmpStride] - pad;\
776  const int tmp0= tmp[0 *tmpStride] - pad;\
777  const int tmp1= tmp[1 *tmpStride] - pad;\
778  const int tmp2= tmp[2 *tmpStride] - pad;\
779  const int tmp3= tmp[3 *tmpStride] - pad;\
780  const int tmp4= tmp[4 *tmpStride] - pad;\
781  OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
782  OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
783  dst++;\
784  tmp++;\
785  }\
786 }\
787 static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
788  const int h=4;\
789  INIT_CLIP\
790  int i;\
791  pixel *dst = (pixel*)p_dst;\
792  pixel *src = (pixel*)p_src;\
793  dstStride >>= sizeof(pixel)-1;\
794  srcStride >>= sizeof(pixel)-1;\
795  for(i=0; i<h; i++)\
796  {\
797  OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
798  OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
799  OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
800  OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
801  dst+=dstStride;\
802  src+=srcStride;\
803  }\
804 }\
805 \
806 static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
807  const int w=4;\
808  INIT_CLIP\
809  int i;\
810  pixel *dst = (pixel*)p_dst;\
811  pixel *src = (pixel*)p_src;\
812  dstStride >>= sizeof(pixel)-1;\
813  srcStride >>= sizeof(pixel)-1;\
814  for(i=0; i<w; i++)\
815  {\
816  const int srcB= src[-2*srcStride];\
817  const int srcA= src[-1*srcStride];\
818  const int src0= src[0 *srcStride];\
819  const int src1= src[1 *srcStride];\
820  const int src2= src[2 *srcStride];\
821  const int src3= src[3 *srcStride];\
822  const int src4= src[4 *srcStride];\
823  const int src5= src[5 *srcStride];\
824  const int src6= src[6 *srcStride];\
825  OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
826  OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
827  OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
828  OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
829  dst++;\
830  src++;\
831  }\
832 }\
833 \
834 static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *p_dst, pixeltmp *tmp, uint8_t *p_src, int dstStride, int tmpStride, int srcStride){\
835  const int h=4;\
836  const int w=4;\
837  const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
838  INIT_CLIP\
839  int i;\
840  pixel *dst = (pixel*)p_dst;\
841  pixel *src = (pixel*)p_src;\
842  dstStride >>= sizeof(pixel)-1;\
843  srcStride >>= sizeof(pixel)-1;\
844  src -= 2*srcStride;\
845  for(i=0; i<h+5; i++)\
846  {\
847  tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
848  tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
849  tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
850  tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
851  tmp+=tmpStride;\
852  src+=srcStride;\
853  }\
854  tmp -= tmpStride*(h+5-2);\
855  for(i=0; i<w; i++)\
856  {\
857  const int tmpB= tmp[-2*tmpStride] - pad;\
858  const int tmpA= tmp[-1*tmpStride] - pad;\
859  const int tmp0= tmp[0 *tmpStride] - pad;\
860  const int tmp1= tmp[1 *tmpStride] - pad;\
861  const int tmp2= tmp[2 *tmpStride] - pad;\
862  const int tmp3= tmp[3 *tmpStride] - pad;\
863  const int tmp4= tmp[4 *tmpStride] - pad;\
864  const int tmp5= tmp[5 *tmpStride] - pad;\
865  const int tmp6= tmp[6 *tmpStride] - pad;\
866  OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
867  OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
868  OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
869  OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
870  dst++;\
871  tmp++;\
872  }\
873 }\
874 \
875 static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
876  const int h=8;\
877  INIT_CLIP\
878  int i;\
879  pixel *dst = (pixel*)p_dst;\
880  pixel *src = (pixel*)p_src;\
881  dstStride >>= sizeof(pixel)-1;\
882  srcStride >>= sizeof(pixel)-1;\
883  for(i=0; i<h; i++)\
884  {\
885  OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
886  OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
887  OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
888  OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
889  OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
890  OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
891  OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
892  OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
893  dst+=dstStride;\
894  src+=srcStride;\
895  }\
896 }\
897 \
898 static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *p_dst, uint8_t *p_src, int dstStride, int srcStride){\
899  const int w=8;\
900  INIT_CLIP\
901  int i;\
902  pixel *dst = (pixel*)p_dst;\
903  pixel *src = (pixel*)p_src;\
904  dstStride >>= sizeof(pixel)-1;\
905  srcStride >>= sizeof(pixel)-1;\
906  for(i=0; i<w; i++)\
907  {\
908  const int srcB= src[-2*srcStride];\
909  const int srcA= src[-1*srcStride];\
910  const int src0= src[0 *srcStride];\
911  const int src1= src[1 *srcStride];\
912  const int src2= src[2 *srcStride];\
913  const int src3= src[3 *srcStride];\
914  const int src4= src[4 *srcStride];\
915  const int src5= src[5 *srcStride];\
916  const int src6= src[6 *srcStride];\
917  const int src7= src[7 *srcStride];\
918  const int src8= src[8 *srcStride];\
919  const int src9= src[9 *srcStride];\
920  const int src10=src[10*srcStride];\
921  OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
922  OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
923  OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
924  OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
925  OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
926  OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
927  OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
928  OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
929  dst++;\
930  src++;\
931  }\
932 }\
933 \
934 static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *p_dst, pixeltmp *tmp, uint8_t *p_src, int dstStride, int tmpStride, int srcStride){\
935  const int h=8;\
936  const int w=8;\
937  const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
938  INIT_CLIP\
939  int i;\
940  pixel *dst = (pixel*)p_dst;\
941  pixel *src = (pixel*)p_src;\
942  dstStride >>= sizeof(pixel)-1;\
943  srcStride >>= sizeof(pixel)-1;\
944  src -= 2*srcStride;\
945  for(i=0; i<h+5; i++)\
946  {\
947  tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
948  tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
949  tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
950  tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
951  tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
952  tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
953  tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
954  tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
955  tmp+=tmpStride;\
956  src+=srcStride;\
957  }\
958  tmp -= tmpStride*(h+5-2);\
959  for(i=0; i<w; i++)\
960  {\
961  const int tmpB= tmp[-2*tmpStride] - pad;\
962  const int tmpA= tmp[-1*tmpStride] - pad;\
963  const int tmp0= tmp[0 *tmpStride] - pad;\
964  const int tmp1= tmp[1 *tmpStride] - pad;\
965  const int tmp2= tmp[2 *tmpStride] - pad;\
966  const int tmp3= tmp[3 *tmpStride] - pad;\
967  const int tmp4= tmp[4 *tmpStride] - pad;\
968  const int tmp5= tmp[5 *tmpStride] - pad;\
969  const int tmp6= tmp[6 *tmpStride] - pad;\
970  const int tmp7= tmp[7 *tmpStride] - pad;\
971  const int tmp8= tmp[8 *tmpStride] - pad;\
972  const int tmp9= tmp[9 *tmpStride] - pad;\
973  const int tmp10=tmp[10*tmpStride] - pad;\
974  OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
975  OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
976  OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
977  OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
978  OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
979  OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
980  OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
981  OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
982  dst++;\
983  tmp++;\
984  }\
985 }\
986 \
987 static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
988  FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
989  FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
990  src += 8*srcStride;\
991  dst += 8*dstStride;\
992  FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
993  FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
994 }\
995 \
996 static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
997  FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
998  FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
999  src += 8*srcStride;\
1000  dst += 8*dstStride;\
1001  FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
1002  FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
1003 }\
1004 \
1005 static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, pixeltmp *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1006  FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
1007  FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
1008  src += 8*srcStride;\
1009  dst += 8*dstStride;\
1010  FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
1011  FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
1012 }\
1013 
1014 #define H264_MC(OPNAME, SIZE) \
1015 static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
1016  FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
1017 }\
1018 \
1019 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
1020  uint8_t half[SIZE*SIZE*sizeof(pixel)];\
1021  FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
1022  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
1023 }\
1024 \
1025 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
1026  FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
1027 }\
1028 \
1029 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
1030  uint8_t half[SIZE*SIZE*sizeof(pixel)];\
1031  FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
1032  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
1033 }\
1034 \
1035 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
1036  uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1037  uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1038  uint8_t half[SIZE*SIZE*sizeof(pixel)];\
1039  FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
1040  FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1041  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1042 }\
1043 \
1044 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
1045  uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1046  uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1047  FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
1048  FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
1049 }\
1050 \
1051 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
1052  uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1053  uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1054  uint8_t half[SIZE*SIZE*sizeof(pixel)];\
1055  FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
1056  FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1057  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1058 }\
1059 \
1060 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
1061  uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1062  uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1063  uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1064  uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1065  FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
1066  FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
1067  FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1068  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1069 }\
1070 \
1071 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
1072  uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1073  uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1074  uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1075  uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1076  FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
1077  FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
1078  FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1079  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1080 }\
1081 \
1082 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
1083  uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1084  uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1085  uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1086  uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1087  FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
1088  FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
1089  FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1090  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1091 }\
1092 \
1093 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
1094  uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1095  uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1096  uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1097  uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1098  FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
1099  FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
1100  FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1101  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1102 }\
1103 \
1104 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
1105  pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1106  FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
1107 }\
1108 \
1109 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
1110  pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1111  uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1112  uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
1113  FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
1114  FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
1115  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1116 }\
1117 \
1118 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
1119  pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1120  uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
1121  uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
1122  FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
1123  FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
1124  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1125 }\
1126 \
1127 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
1128  uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1129  uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1130  pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1131  uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1132  uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
1133  FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
1134  FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1135  FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
1136  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1137 }\
1138 \
1139 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
1140  uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
1141  uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
1142  pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
1143  uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
1144  uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
1145  FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
1146  FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
1147  FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
1148  FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
1149 }\
1150 
1151 #define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
1152 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
1153 #define op_put(a, b) a = CLIP(((b) + 16)>>5)
1154 #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
1155 #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
1156 
1157 H264_LOWPASS(put_ , op_put, op2_put)
1158 H264_LOWPASS(avg_ , op_avg, op2_avg)
1159 H264_MC(put_, 2)
1160 H264_MC(put_, 4)
1161 H264_MC(put_, 8)
1162 H264_MC(put_, 16)
1163 H264_MC(avg_, 4)
1164 H264_MC(avg_, 8)
1165 H264_MC(avg_, 16)
1166 
1167 #undef op_avg
1168 #undef op_put
1169 #undef op2_avg
1170 #undef op2_put
1172 #if BIT_DEPTH == 8
1173 # define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
1174 # define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
1175 # define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
1176 # define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
1177 #elif BIT_DEPTH == 9
1178 # define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
1179 # define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
1180 # define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
1181 # define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
1182 #elif BIT_DEPTH == 10
1183 # define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
1184 # define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
1185 # define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
1186 # define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
1187 #elif BIT_DEPTH == 12
1188 # define put_h264_qpel8_mc00_12_c ff_put_pixels8x8_12_c
1189 # define avg_h264_qpel8_mc00_12_c ff_avg_pixels8x8_12_c
1190 # define put_h264_qpel16_mc00_12_c ff_put_pixels16x16_12_c
1191 # define avg_h264_qpel16_mc00_12_c ff_avg_pixels16x16_12_c
1192 #elif BIT_DEPTH == 14
1193 # define put_h264_qpel8_mc00_14_c ff_put_pixels8x8_14_c
1194 # define avg_h264_qpel8_mc00_14_c ff_avg_pixels8x8_14_c
1195 # define put_h264_qpel16_mc00_14_c ff_put_pixels16x16_14_c
1196 # define avg_h264_qpel16_mc00_14_c ff_avg_pixels16x16_14_c
1197 #endif
1198 
1199 void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
1200  FUNCC(put_pixels8)(dst, src, stride, 8);
1201 }
1202 void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
1203  FUNCC(avg_pixels8)(dst, src, stride, 8);
1204 }
1205 void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
1206  FUNCC(put_pixels16)(dst, src, stride, 16);
1207 }
1208 void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
1209  FUNCC(avg_pixels16)(dst, src, stride, 16);
1210 }
1211