FFmpeg
h264pred_template.c
Go to the documentation of this file.
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3  * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * H.264 / AVC / MPEG-4 part10 prediction functions.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27 
28 #include "libavutil/intreadwrite.h"
29 
30 #include "mathops.h"
31 
32 #include "bit_depth_template.c"
33 
34 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
35  ptrdiff_t _stride)
36 {
37  pixel *src = (pixel*)_src;
38  int stride = _stride>>(sizeof(pixel)-1);
39  const pixel4 a= AV_RN4PA(src-stride);
40 
41  AV_WN4PA(src+0*stride, a);
42  AV_WN4PA(src+1*stride, a);
43  AV_WN4PA(src+2*stride, a);
44  AV_WN4PA(src+3*stride, a);
45 }
46 
47 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
48  ptrdiff_t _stride)
49 {
50  pixel *src = (pixel*)_src;
51  int stride = _stride>>(sizeof(pixel)-1);
56 }
57 
58 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
59  ptrdiff_t _stride)
60 {
61  pixel *src = (pixel*)_src;
62  int stride = _stride>>(sizeof(pixel)-1);
63  const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
64  + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
65  const pixel4 a = PIXEL_SPLAT_X4(dc);
66 
67  AV_WN4PA(src+0*stride, a);
68  AV_WN4PA(src+1*stride, a);
69  AV_WN4PA(src+2*stride, a);
70  AV_WN4PA(src+3*stride, a);
71 }
72 
73 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
74  ptrdiff_t _stride)
75 {
76  pixel *src = (pixel*)_src;
77  int stride = _stride>>(sizeof(pixel)-1);
78  const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
79  const pixel4 a = PIXEL_SPLAT_X4(dc);
80 
81  AV_WN4PA(src+0*stride, a);
82  AV_WN4PA(src+1*stride, a);
83  AV_WN4PA(src+2*stride, a);
84  AV_WN4PA(src+3*stride, a);
85 }
86 
87 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
88  ptrdiff_t _stride)
89 {
90  pixel *src = (pixel*)_src;
91  int stride = _stride>>(sizeof(pixel)-1);
92  const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
93  const pixel4 a = PIXEL_SPLAT_X4(dc);
94 
95  AV_WN4PA(src+0*stride, a);
96  AV_WN4PA(src+1*stride, a);
97  AV_WN4PA(src+2*stride, a);
98  AV_WN4PA(src+3*stride, a);
99 }
100 
101 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
102  ptrdiff_t _stride)
103 {
104  pixel *src = (pixel*)_src;
105  int stride = _stride>>(sizeof(pixel)-1);
106  const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
107 
108  AV_WN4PA(src+0*stride, a);
109  AV_WN4PA(src+1*stride, a);
110  AV_WN4PA(src+2*stride, a);
111  AV_WN4PA(src+3*stride, a);
112 }
113 
114 static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright,
115  ptrdiff_t _stride)
116 {
117  pixel *src = (pixel*)_src;
118  int stride = _stride>>(sizeof(pixel)-1);
119  const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
120 
121  AV_WN4PA(src+0*stride, a);
122  AV_WN4PA(src+1*stride, a);
123  AV_WN4PA(src+2*stride, a);
124  AV_WN4PA(src+3*stride, a);
125 }
126 
127 static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright,
128  ptrdiff_t _stride)
129 {
130  pixel *src = (pixel*)_src;
131  int stride = _stride>>(sizeof(pixel)-1);
132  const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
133 
134  AV_WN4PA(src+0*stride, a);
135  AV_WN4PA(src+1*stride, a);
136  AV_WN4PA(src+2*stride, a);
137  AV_WN4PA(src+3*stride, a);
138 }
139 
140 
141 #define LOAD_TOP_RIGHT_EDGE\
142  const unsigned av_unused t4 = topright[0];\
143  const unsigned av_unused t5 = topright[1];\
144  const unsigned av_unused t6 = topright[2];\
145  const unsigned av_unused t7 = topright[3];\
146 
147 #define LOAD_DOWN_LEFT_EDGE\
148  const unsigned av_unused l4 = src[-1+4*stride];\
149  const unsigned av_unused l5 = src[-1+5*stride];\
150  const unsigned av_unused l6 = src[-1+6*stride];\
151  const unsigned av_unused l7 = src[-1+7*stride];\
152 
153 #define LOAD_LEFT_EDGE\
154  const unsigned av_unused l0 = src[-1+0*stride];\
155  const unsigned av_unused l1 = src[-1+1*stride];\
156  const unsigned av_unused l2 = src[-1+2*stride];\
157  const unsigned av_unused l3 = src[-1+3*stride];\
158 
159 #define LOAD_TOP_EDGE\
160  const unsigned av_unused t0 = src[ 0-1*stride];\
161  const unsigned av_unused t1 = src[ 1-1*stride];\
162  const unsigned av_unused t2 = src[ 2-1*stride];\
163  const unsigned av_unused t3 = src[ 3-1*stride];\
164 
165 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
166  ptrdiff_t _stride)
167 {
168  pixel *src = (pixel*)_src;
169  int stride = _stride>>(sizeof(pixel)-1);
170  const int lt= src[-1-1*stride];
173 
174  src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
175  src[0+2*stride]=
176  src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
177  src[0+1*stride]=
178  src[1+2*stride]=
179  src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
180  src[0+0*stride]=
181  src[1+1*stride]=
182  src[2+2*stride]=
183  src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
184  src[1+0*stride]=
185  src[2+1*stride]=
186  src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
187  src[2+0*stride]=
188  src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
189  src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
190 }
191 
192 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
193  ptrdiff_t _stride)
194 {
195  pixel *src = (pixel*)_src;
196  const pixel *topright = (const pixel*)_topright;
197  int stride = _stride>>(sizeof(pixel)-1);
200 // LOAD_LEFT_EDGE
201 
202  src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
203  src[1+0*stride]=
204  src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
205  src[2+0*stride]=
206  src[1+1*stride]=
207  src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
208  src[3+0*stride]=
209  src[2+1*stride]=
210  src[1+2*stride]=
211  src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
212  src[3+1*stride]=
213  src[2+2*stride]=
214  src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
215  src[3+2*stride]=
216  src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
217  src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
218 }
219 
221  const uint8_t *topright,
222  ptrdiff_t _stride)
223 {
224  pixel *src = (pixel*)_src;
225  int stride = _stride>>(sizeof(pixel)-1);
226  const int lt= src[-1-1*stride];
229 
230  src[0+0*stride]=
231  src[1+2*stride]=(lt + t0 + 1)>>1;
232  src[1+0*stride]=
233  src[2+2*stride]=(t0 + t1 + 1)>>1;
234  src[2+0*stride]=
235  src[3+2*stride]=(t1 + t2 + 1)>>1;
236  src[3+0*stride]=(t2 + t3 + 1)>>1;
237  src[0+1*stride]=
238  src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
239  src[1+1*stride]=
240  src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
241  src[2+1*stride]=
242  src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
243  src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
244  src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
245  src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
246 }
247 
249  const uint8_t *_topright,
250  ptrdiff_t _stride)
251 {
252  pixel *src = (pixel*)_src;
253  const pixel *topright = (const pixel*)_topright;
254  int stride = _stride>>(sizeof(pixel)-1);
257 
258  src[0+0*stride]=(t0 + t1 + 1)>>1;
259  src[1+0*stride]=
260  src[0+2*stride]=(t1 + t2 + 1)>>1;
261  src[2+0*stride]=
262  src[1+2*stride]=(t2 + t3 + 1)>>1;
263  src[3+0*stride]=
264  src[2+2*stride]=(t3 + t4+ 1)>>1;
265  src[3+2*stride]=(t4 + t5+ 1)>>1;
266  src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
267  src[1+1*stride]=
268  src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
269  src[2+1*stride]=
270  src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
271  src[3+1*stride]=
272  src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
273  src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
274 }
275 
276 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
277  ptrdiff_t _stride)
278 {
279  pixel *src = (pixel*)_src;
280  int stride = _stride>>(sizeof(pixel)-1);
282 
283  src[0+0*stride]=(l0 + l1 + 1)>>1;
284  src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
285  src[2+0*stride]=
286  src[0+1*stride]=(l1 + l2 + 1)>>1;
287  src[3+0*stride]=
288  src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
289  src[2+1*stride]=
290  src[0+2*stride]=(l2 + l3 + 1)>>1;
291  src[3+1*stride]=
292  src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
293  src[3+2*stride]=
294  src[1+3*stride]=
295  src[0+3*stride]=
296  src[2+2*stride]=
297  src[2+3*stride]=
298  src[3+3*stride]=l3;
299 }
300 
302  const uint8_t *topright,
303  ptrdiff_t _stride)
304 {
305  pixel *src = (pixel*)_src;
306  int stride = _stride>>(sizeof(pixel)-1);
307  const int lt= src[-1-1*stride];
310 
311  src[0+0*stride]=
312  src[2+1*stride]=(lt + l0 + 1)>>1;
313  src[1+0*stride]=
314  src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
315  src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
316  src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
317  src[0+1*stride]=
318  src[2+2*stride]=(l0 + l1 + 1)>>1;
319  src[1+1*stride]=
320  src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
321  src[0+2*stride]=
322  src[2+3*stride]=(l1 + l2+ 1)>>1;
323  src[1+2*stride]=
324  src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
325  src[0+3*stride]=(l2 + l3 + 1)>>1;
326  src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
327 }
328 
329 static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
330 {
331  int i;
332  pixel *src = (pixel*)_src;
333  int stride = _stride>>(sizeof(pixel)-1);
334  const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
335  const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
336  const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
337  const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
338 
339  for(i=0; i<16; i++){
340  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
341  AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
342  AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
343  AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
344  }
345 }
346 
347 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
348 {
349  int i;
350  pixel *src = (pixel*)_src;
351  stride >>= sizeof(pixel)-1;
352 
353  for(i=0; i<16; i++){
354  const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
355 
356  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
357  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
358  AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
359  AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
360  }
361 }
362 
363 #define PREDICT_16x16_DC(v)\
364  for(i=0; i<16; i++){\
365  AV_WN4PA(src+ 0, v);\
366  AV_WN4PA(src+ 4, v);\
367  AV_WN4PA(src+ 8, v);\
368  AV_WN4PA(src+12, v);\
369  src += stride;\
370  }
371 
372 static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
373 {
374  int i, dc=0;
375  pixel *src = (pixel*)_src;
376  pixel4 dcsplat;
377  stride >>= sizeof(pixel)-1;
378 
379  for(i=0;i<16; i++){
380  dc+= src[-1+i*stride];
381  }
382 
383  for(i=0;i<16; i++){
384  dc+= src[i-stride];
385  }
386 
387  dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
388  PREDICT_16x16_DC(dcsplat);
389 }
390 
391 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
392 {
393  int i, dc=0;
394  pixel *src = (pixel*)_src;
395  pixel4 dcsplat;
396  stride >>= sizeof(pixel)-1;
397 
398  for(i=0;i<16; i++){
399  dc+= src[-1+i*stride];
400  }
401 
402  dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
403  PREDICT_16x16_DC(dcsplat);
404 }
405 
406 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
407 {
408  int i, dc=0;
409  pixel *src = (pixel*)_src;
410  pixel4 dcsplat;
411  stride >>= sizeof(pixel)-1;
412 
413  for(i=0;i<16; i++){
414  dc+= src[i-stride];
415  }
416 
417  dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
418  PREDICT_16x16_DC(dcsplat);
419 }
420 
421 #define PRED16x16_X(n, v) \
422 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
423 {\
424  int i;\
425  pixel *src = (pixel*)_src;\
426  stride >>= sizeof(pixel)-1;\
427  PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
428 }
429 
430 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
431 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
432 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
433 
434 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
435  ptrdiff_t _stride,
436  const int svq3,
437  const int rv40)
438 {
439  int i, j, k;
440  int a;
441  INIT_CLIP
442  pixel *src = (pixel*)_src;
443  int stride = _stride>>(sizeof(pixel)-1);
444  const pixel * const src0 = src +7-stride;
445  const pixel * src1 = src +8*stride-1;
446  const pixel * src2 = src1-2*stride; // == src+6*stride-1;
447  int H = src0[1] - src0[-1];
448  int V = src1[0] - src2[ 0];
449  for(k=2; k<=8; ++k) {
450  src1 += stride; src2 -= stride;
451  H += k*(src0[k] - src0[-k]);
452  V += k*(src1[0] - src2[ 0]);
453  }
454  if(svq3){
455  H = ( 5*(H/4) ) / 16;
456  V = ( 5*(V/4) ) / 16;
457 
458  /* required for 100% accuracy */
459  i = H; H = V; V = i;
460  }else if(rv40){
461  H = ( H + (H>>2) ) >> 4;
462  V = ( V + (V>>2) ) >> 4;
463  }else{
464  H = ( 5*H+32 ) >> 6;
465  V = ( 5*V+32 ) >> 6;
466  }
467 
468  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
469  for(j=16; j>0; --j) {
470  int b = a;
471  a += V;
472  for(i=-16; i<0; i+=4) {
473  src[16+i] = CLIP((b ) >> 5);
474  src[17+i] = CLIP((b+ H) >> 5);
475  src[18+i] = CLIP((b+2*H) >> 5);
476  src[19+i] = CLIP((b+3*H) >> 5);
477  b += 4*H;
478  }
479  src += stride;
480  }
481 }
482 
483 static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
484 {
485  FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
486 }
487 
488 static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
489 {
490  int i;
491  pixel *src = (pixel*)_src;
492  int stride = _stride>>(sizeof(pixel)-1);
493  const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
494  const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
495 
496  for(i=0; i<8; i++){
497  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
498  AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
499  }
500 }
501 
502 static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
503 {
504  int i;
505  pixel *src = (pixel*)_src;
506  int stride = _stride>>(sizeof(pixel)-1);
507  const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
508  const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
509 
510  for(i=0; i<16; i++){
511  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
512  AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
513  }
514 }
515 
516 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
517 {
518  int i;
519  pixel *src = (pixel*)_src;
520  stride >>= sizeof(pixel)-1;
521 
522  for(i=0; i<8; i++){
523  const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
524  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
525  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
526  }
527 }
528 
529 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
530 {
531  int i;
532  pixel *src = (pixel*)_src;
533  stride >>= sizeof(pixel)-1;
534  for(i=0; i<16; i++){
535  const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
536  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
537  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
538  }
539 }
540 
541 #define PRED8x8_X(n, v)\
542 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
543 {\
544  int i;\
545  const pixel4 a = PIXEL_SPLAT_X4(v);\
546  pixel *src = (pixel*)_src;\
547  stride >>= sizeof(pixel)-1;\
548  for(i=0; i<8; i++){\
549  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
550  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
551  }\
552 }
553 
554 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
555 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
556 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
557 
558 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
559 {
560  FUNCC(pred8x8_128_dc)(_src, stride);
561  FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
562 }
563 
564 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
565 {
566  int i;
567  int dc0, dc2;
568  pixel4 dc0splat, dc2splat;
569  pixel *src = (pixel*)_src;
570  stride >>= sizeof(pixel)-1;
571 
572  dc0=dc2=0;
573  for(i=0;i<4; i++){
574  dc0+= src[-1+i*stride];
575  dc2+= src[-1+(i+4)*stride];
576  }
577  dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
578  dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
579 
580  for(i=0; i<4; i++){
581  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
582  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
583  }
584  for(i=4; i<8; i++){
585  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
586  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
587  }
588 }
589 
590 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
591 {
592  FUNCC(pred8x8_left_dc)(_src, stride);
593  FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
594 }
595 
596 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
597 {
598  int i;
599  int dc0, dc1;
600  pixel4 dc0splat, dc1splat;
601  pixel *src = (pixel*)_src;
602  stride >>= sizeof(pixel)-1;
603 
604  dc0=dc1=0;
605  for(i=0;i<4; i++){
606  dc0+= src[i-stride];
607  dc1+= src[4+i-stride];
608  }
609  dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
610  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
611 
612  for(i=0; i<4; i++){
613  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
614  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
615  }
616  for(i=4; i<8; i++){
617  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
618  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
619  }
620 }
621 
622 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
623 {
624  int i;
625  int dc0, dc1;
626  pixel4 dc0splat, dc1splat;
627  pixel *src = (pixel*)_src;
628  stride >>= sizeof(pixel)-1;
629 
630  dc0=dc1=0;
631  for(i=0;i<4; i++){
632  dc0+= src[i-stride];
633  dc1+= src[4+i-stride];
634  }
635  dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
636  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
637 
638  for(i=0; i<16; i++){
639  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
640  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
641  }
642 }
643 
644 static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
645 {
646  int i;
647  int dc0, dc1, dc2;
648  pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
649  pixel *src = (pixel*)_src;
650  stride >>= sizeof(pixel)-1;
651 
652  dc0=dc1=dc2=0;
653  for(i=0;i<4; i++){
654  dc0+= src[-1+i*stride] + src[i-stride];
655  dc1+= src[4+i-stride];
656  dc2+= src[-1+(i+4)*stride];
657  }
658  dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
659  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
660  dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
661  dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
662 
663  for(i=0; i<4; i++){
664  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
665  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
666  }
667  for(i=4; i<8; i++){
668  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
669  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
670  }
671 }
672 
673 static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
674 {
675  int i;
676  int dc0, dc1, dc2, dc3, dc4;
677  pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
678  pixel *src = (pixel*)_src;
679  stride >>= sizeof(pixel)-1;
680 
681  dc0=dc1=dc2=dc3=dc4=0;
682  for(i=0;i<4; i++){
683  dc0+= src[-1+i*stride] + src[i-stride];
684  dc1+= src[4+i-stride];
685  dc2+= src[-1+(i+4)*stride];
686  dc3+= src[-1+(i+8)*stride];
687  dc4+= src[-1+(i+12)*stride];
688  }
689  dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
690  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
691  dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
692  dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
693  dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
694  dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
695  dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
696  dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
697 
698  for(i=0; i<4; i++){
699  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
700  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
701  }
702  for(i=4; i<8; i++){
703  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
704  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
705  }
706  for(i=8; i<12; i++){
707  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
708  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
709  }
710  for(i=12; i<16; i++){
711  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
712  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
713  }
714 }
715 
716 //the following 4 function should not be optimized!
717 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
718 {
721 }
722 
723 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
724 {
727 }
728 
729 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
730 {
733 }
734 
735 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
736 {
739 }
740 
741 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
742 {
743  FUNCC(pred8x8_left_dc)(src, stride);
745  FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
746 }
747 
748 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
749 {
752  FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
753 }
754 
755 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
756 {
757  FUNCC(pred8x8_left_dc)(src, stride);
759  FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
760 }
761 
762 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
763 {
766  FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
767 }
768 
769 static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
770 {
771  int j, k;
772  int a;
773  INIT_CLIP
774  pixel *src = (pixel*)_src;
775  int stride = _stride>>(sizeof(pixel)-1);
776  const pixel * const src0 = src +3-stride;
777  const pixel * src1 = src +4*stride-1;
778  const pixel * src2 = src1-2*stride; // == src+2*stride-1;
779  int H = src0[1] - src0[-1];
780  int V = src1[0] - src2[ 0];
781  for(k=2; k<=4; ++k) {
782  src1 += stride; src2 -= stride;
783  H += k*(src0[k] - src0[-k]);
784  V += k*(src1[0] - src2[ 0]);
785  }
786  H = ( 17*H+16 ) >> 5;
787  V = ( 17*V+16 ) >> 5;
788 
789  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
790  for(j=8; j>0; --j) {
791  int b = a;
792  a += V;
793  src[0] = CLIP((b ) >> 5);
794  src[1] = CLIP((b+ H) >> 5);
795  src[2] = CLIP((b+2*H) >> 5);
796  src[3] = CLIP((b+3*H) >> 5);
797  src[4] = CLIP((b+4*H) >> 5);
798  src[5] = CLIP((b+5*H) >> 5);
799  src[6] = CLIP((b+6*H) >> 5);
800  src[7] = CLIP((b+7*H) >> 5);
801  src += stride;
802  }
803 }
804 
805 static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
806 {
807  int j, k;
808  int a;
809  INIT_CLIP
810  pixel *src = (pixel*)_src;
811  int stride = _stride>>(sizeof(pixel)-1);
812  const pixel * const src0 = src +3-stride;
813  const pixel * src1 = src +8*stride-1;
814  const pixel * src2 = src1-2*stride; // == src+6*stride-1;
815  int H = src0[1] - src0[-1];
816  int V = src1[0] - src2[ 0];
817 
818  for (k = 2; k <= 4; ++k) {
819  src1 += stride; src2 -= stride;
820  H += k*(src0[k] - src0[-k]);
821  V += k*(src1[0] - src2[ 0]);
822  }
823  for (; k <= 8; ++k) {
824  src1 += stride; src2 -= stride;
825  V += k*(src1[0] - src2[0]);
826  }
827 
828  H = (17*H+16) >> 5;
829  V = (5*V+32) >> 6;
830 
831  a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
832  for(j=16; j>0; --j) {
833  int b = a;
834  a += V;
835  src[0] = CLIP((b ) >> 5);
836  src[1] = CLIP((b+ H) >> 5);
837  src[2] = CLIP((b+2*H) >> 5);
838  src[3] = CLIP((b+3*H) >> 5);
839  src[4] = CLIP((b+4*H) >> 5);
840  src[5] = CLIP((b+5*H) >> 5);
841  src[6] = CLIP((b+6*H) >> 5);
842  src[7] = CLIP((b+7*H) >> 5);
843  src += stride;
844  }
845 }
846 
847 #define SRC(x,y) src[(x)+(y)*stride]
848 #define PL(y) \
849  const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
850 #define PREDICT_8x8_LOAD_LEFT \
851  const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
852  + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
853  PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
854  const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
855 
856 #define PT(x) \
857  const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
858 #define PREDICT_8x8_LOAD_TOP \
859  const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
860  + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
861  PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
862  const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
863  + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
864 
865 #define PTR(x) \
866  t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
867 #define PREDICT_8x8_LOAD_TOPRIGHT \
868  int t8, t9, t10, t11, t12, t13, t14, t15; \
869  if(has_topright) { \
870  PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
871  t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
872  } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
873 
874 #define PREDICT_8x8_LOAD_TOPLEFT \
875  const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
876 
877 #define PREDICT_8x8_DC(v) \
878  int y; \
879  for( y = 0; y < 8; y++ ) { \
880  AV_WN4PA(((pixel4*)src)+0, v); \
881  AV_WN4PA(((pixel4*)src)+1, v); \
882  src += stride; \
883  }
884 
885 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
886  int has_topright, ptrdiff_t _stride)
887 {
888  pixel *src = (pixel*)_src;
889  int stride = _stride>>(sizeof(pixel)-1);
890 
892 }
893 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
894  int has_topright, ptrdiff_t _stride)
895 {
896  pixel *src = (pixel*)_src;
897  int stride = _stride>>(sizeof(pixel)-1);
898 
900  const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
902 }
903 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
904  int has_topright, ptrdiff_t _stride)
905 {
906  pixel *src = (pixel*)_src;
907  int stride = _stride>>(sizeof(pixel)-1);
908 
910  const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
912 }
913 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
914  int has_topright, ptrdiff_t _stride)
915 {
916  pixel *src = (pixel*)_src;
917  int stride = _stride>>(sizeof(pixel)-1);
918 
921  const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
922  +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
924 }
925 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
926  int has_topright, ptrdiff_t _stride)
927 {
928  pixel *src = (pixel*)_src;
929  int stride = _stride>>(sizeof(pixel)-1);
930  pixel4 a;
931 
933 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
934  AV_WN4PA(src+y*stride, a); \
935  AV_WN4PA(src+y*stride+4, a);
936  ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
937 #undef ROW
938 }
939 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
940  int has_topright, ptrdiff_t _stride)
941 {
942  int y;
943  pixel *src = (pixel*)_src;
944  int stride = _stride>>(sizeof(pixel)-1);
945  pixel4 a, b;
946 
948  src[0] = t0;
949  src[1] = t1;
950  src[2] = t2;
951  src[3] = t3;
952  src[4] = t4;
953  src[5] = t5;
954  src[6] = t6;
955  src[7] = t7;
956  a = AV_RN4PA(((pixel4*)src)+0);
957  b = AV_RN4PA(((pixel4*)src)+1);
958  for( y = 1; y < 8; y++ ) {
959  AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
960  AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
961  }
962 }
963 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
964  int has_topright, ptrdiff_t _stride)
965 {
966  pixel *src = (pixel*)_src;
967  int stride = _stride>>(sizeof(pixel)-1);
970  SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
971  SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
972  SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
973  SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
974  SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
975  SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
976  SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
977  SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
978  SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
979  SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
980  SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
981  SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
982  SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
983  SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
984  SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
985 }
986 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
987  int has_topright, ptrdiff_t _stride)
988 {
989  pixel *src = (pixel*)_src;
990  int stride = _stride>>(sizeof(pixel)-1);
994  SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
995  SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
996  SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
997  SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
998  SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
999  SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
1000  SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
1001  SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
1002  SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
1003  SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
1004  SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
1005  SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
1006  SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
1007  SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
1008  SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
1009 }
1010 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
1011  int has_topright, ptrdiff_t _stride)
1012 {
1013  pixel *src = (pixel*)_src;
1014  int stride = _stride>>(sizeof(pixel)-1);
1018  SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
1019  SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
1020  SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
1021  SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
1022  SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
1023  SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
1024  SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
1025  SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
1026  SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
1027  SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
1028  SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
1029  SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
1030  SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
1031  SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
1032  SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
1033  SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
1034  SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
1035  SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
1036  SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
1037  SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
1038  SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1039  SRC(7,0)= (t6 + t7 + 1) >> 1;
1040 }
1041 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
1042  int has_topright, ptrdiff_t _stride)
1043 {
1044  pixel *src = (pixel*)_src;
1045  int stride = _stride>>(sizeof(pixel)-1);
1049  SRC(0,7)= (l6 + l7 + 1) >> 1;
1050  SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
1051  SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
1052  SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
1053  SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
1054  SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
1055  SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
1056  SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
1057  SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
1058  SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
1059  SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
1060  SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
1061  SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
1062  SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
1063  SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
1064  SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
1065  SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
1066  SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
1067  SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
1068  SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
1069  SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
1070  SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
1071 }
1072 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
1073  int has_topright, ptrdiff_t _stride)
1074 {
1075  pixel *src = (pixel*)_src;
1076  int stride = _stride>>(sizeof(pixel)-1);
1079  SRC(0,0)= (t0 + t1 + 1) >> 1;
1080  SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
1081  SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
1082  SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
1083  SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
1084  SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
1085  SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
1086  SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
1087  SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
1088  SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
1089  SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
1090  SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1091  SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
1092  SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
1093  SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
1094  SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
1095  SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
1096  SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
1097  SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
1098  SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
1099  SRC(7,6)= (t10 + t11 + 1) >> 1;
1100  SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
1101 }
1102 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
1103  int has_topright, ptrdiff_t _stride)
1104 {
1105  pixel *src = (pixel*)_src;
1106  int stride = _stride>>(sizeof(pixel)-1);
1108  SRC(0,0)= (l0 + l1 + 1) >> 1;
1109  SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
1110  SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
1111  SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
1112  SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
1113  SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
1114  SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
1115  SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
1116  SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
1117  SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
1118  SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
1119  SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
1120  SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
1121  SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
1122  SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
1123  SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
1124  SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
1125  SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
1126 }
1127 
1128 static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1129  int has_topright, ptrdiff_t _stride)
1130 {
1131  int i;
1132  pixel *src = (pixel*)_src;
1133  const dctcoef *block = (const dctcoef*)_block;
1134  pixel pix[8];
1135  int stride = _stride>>(sizeof(pixel)-1);
1137 
1138  pix[0] = t0;
1139  pix[1] = t1;
1140  pix[2] = t2;
1141  pix[3] = t3;
1142  pix[4] = t4;
1143  pix[5] = t5;
1144  pix[6] = t6;
1145  pix[7] = t7;
1146 
1147  for(i=0; i<8; i++){
1148  pixel v = pix[i];
1149  src[0*stride]= v += block[0];
1150  src[1*stride]= v += block[8];
1151  src[2*stride]= v += block[16];
1152  src[3*stride]= v += block[24];
1153  src[4*stride]= v += block[32];
1154  src[5*stride]= v += block[40];
1155  src[6*stride]= v += block[48];
1156  src[7*stride]= v + block[56];
1157  src++;
1158  block++;
1159  }
1160 
1161  memset(_block, 0, sizeof(dctcoef) * 64);
1162 }
1163 
1164 static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1165  int has_topright, ptrdiff_t _stride)
1166 {
1167  int i;
1168  pixel *src = (pixel*)_src;
1169  const dctcoef *block = (const dctcoef*)_block;
1170  pixel pix[8];
1171  int stride = _stride>>(sizeof(pixel)-1);
1173 
1174  pix[0] = l0;
1175  pix[1] = l1;
1176  pix[2] = l2;
1177  pix[3] = l3;
1178  pix[4] = l4;
1179  pix[5] = l5;
1180  pix[6] = l6;
1181  pix[7] = l7;
1182 
1183  for(i=0; i<8; i++){
1184  pixel v = pix[i];
1185  src[0]= v += block[0];
1186  src[1]= v += block[1];
1187  src[2]= v += block[2];
1188  src[3]= v += block[3];
1189  src[4]= v += block[4];
1190  src[5]= v += block[5];
1191  src[6]= v += block[6];
1192  src[7]= v + block[7];
1193  src+= stride;
1194  block+= 8;
1195  }
1196 
1197  memset(_block, 0, sizeof(dctcoef) * 64);
1198 }
1199 
1200 #undef PREDICT_8x8_LOAD_LEFT
1201 #undef PREDICT_8x8_LOAD_TOP
1202 #undef PREDICT_8x8_LOAD_TOPLEFT
1203 #undef PREDICT_8x8_LOAD_TOPRIGHT
1204 #undef PREDICT_8x8_DC
1205 #undef PTR
1206 #undef PT
1207 #undef PL
1208 #undef SRC
1209 
1210 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
1211  ptrdiff_t stride)
1212 {
1213  int i;
1214  pixel *pix = (pixel*)_pix;
1215  const dctcoef *block = (const dctcoef*)_block;
1216  stride >>= sizeof(pixel)-1;
1217  pix -= stride;
1218  for(i=0; i<4; i++){
1219  pixel v = pix[0];
1220  pix[1*stride]= v += block[0];
1221  pix[2*stride]= v += block[4];
1222  pix[3*stride]= v += block[8];
1223  pix[4*stride]= v + block[12];
1224  pix++;
1225  block++;
1226  }
1227 
1228  memset(_block, 0, sizeof(dctcoef) * 16);
1229 }
1230 
1231 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
1232  ptrdiff_t stride)
1233 {
1234  int i;
1235  pixel *pix = (pixel*)_pix;
1236  const dctcoef *block = (const dctcoef*)_block;
1237  stride >>= sizeof(pixel)-1;
1238  for(i=0; i<4; i++){
1239  pixel v = pix[-1];
1240  pix[0]= v += block[0];
1241  pix[1]= v += block[1];
1242  pix[2]= v += block[2];
1243  pix[3]= v + block[3];
1244  pix+= stride;
1245  block+= 4;
1246  }
1247 
1248  memset(_block, 0, sizeof(dctcoef) * 16);
1249 }
1250 
1251 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
1252  ptrdiff_t stride)
1253 {
1254  int i;
1255  pixel *pix = (pixel*)_pix;
1256  const dctcoef *block = (const dctcoef*)_block;
1257  stride >>= sizeof(pixel)-1;
1258  pix -= stride;
1259  for(i=0; i<8; i++){
1260  pixel v = pix[0];
1261  pix[1*stride]= v += block[0];
1262  pix[2*stride]= v += block[8];
1263  pix[3*stride]= v += block[16];
1264  pix[4*stride]= v += block[24];
1265  pix[5*stride]= v += block[32];
1266  pix[6*stride]= v += block[40];
1267  pix[7*stride]= v += block[48];
1268  pix[8*stride]= v + block[56];
1269  pix++;
1270  block++;
1271  }
1272 
1273  memset(_block, 0, sizeof(dctcoef) * 64);
1274 }
1275 
1276 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
1277  ptrdiff_t stride)
1278 {
1279  int i;
1280  pixel *pix = (pixel*)_pix;
1281  const dctcoef *block = (const dctcoef*)_block;
1282  stride >>= sizeof(pixel)-1;
1283  for(i=0; i<8; i++){
1284  pixel v = pix[-1];
1285  pix[0]= v += block[0];
1286  pix[1]= v += block[1];
1287  pix[2]= v += block[2];
1288  pix[3]= v += block[3];
1289  pix[4]= v += block[4];
1290  pix[5]= v += block[5];
1291  pix[6]= v += block[6];
1292  pix[7]= v + block[7];
1293  pix+= stride;
1294  block+= 8;
1295  }
1296 
1297  memset(_block, 0, sizeof(dctcoef) * 64);
1298 }
1299 
1300 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
1301  int16_t *block,
1302  ptrdiff_t stride)
1303 {
1304  int i;
1305  for(i=0; i<16; i++)
1306  FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1307 }
1308 
1310  const int *block_offset,
1311  int16_t *block,
1312  ptrdiff_t stride)
1313 {
1314  int i;
1315  for(i=0; i<16; i++)
1316  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1317 }
1318 
1319 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
1320  int16_t *block, ptrdiff_t stride)
1321 {
1322  int i;
1323  for(i=0; i<4; i++)
1324  FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1325 }
1326 
1327 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
1328  int16_t *block, ptrdiff_t stride)
1329 {
1330  int i;
1331  for(i=0; i<4; i++)
1332  FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1333  for(i=4; i<8; i++)
1334  FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1335 }
1336 
1337 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
1338  int16_t *block,
1339  ptrdiff_t stride)
1340 {
1341  int i;
1342  for(i=0; i<4; i++)
1343  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1344 }
1345 
1347  const int *block_offset,
1348  int16_t *block, ptrdiff_t stride)
1349 {
1350  int i;
1351  for(i=0; i<4; i++)
1352  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1353  for(i=4; i<8; i++)
1354  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1355 }
pred8x8_vertical_add
static void FUNCC() pred8x8_vertical_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1319
pred8x8_mad_cow_dc_0l0
static void FUNC() pred8x8_mad_cow_dc_0l0(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:755
pred8x8_horizontal_add
static void FUNCC() pred8x8_horizontal_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1337
stride
int stride
Definition: mace.c:144
pred4x4_127_dc
static void FUNCC() pred4x4_127_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:114
pred8x8_plane
static void FUNCC() pred8x8_plane(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:769
pred16x16_horizontal_add
static void FUNCC() pred16x16_horizontal_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1309
BIT_DEPTH
#define BIT_DEPTH
Definition: bit_depth_template.c:24
pred8x8_mad_cow_dc_0lt
static void FUNC() pred8x8_mad_cow_dc_0lt(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:729
pred16x16_plane
static void FUNCC() pred16x16_plane(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:483
PREDICT_8x8_LOAD_LEFT
#define PREDICT_8x8_LOAD_LEFT
Definition: h264pred_template.c:850
pred8x8_top_dc
static void FUNCC() pred8x8_top_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:596
PREDICT_8x8_LOAD_TOPRIGHT
#define PREDICT_8x8_LOAD_TOPRIGHT
Definition: h264pred_template.c:867
pred8x16_dc
static void FUNCC() pred8x16_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:673
pred8x16_mad_cow_dc_0l0
static void FUNC() pred8x16_mad_cow_dc_0l0(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:762
pred8x16_horizontal_add
static void FUNCC() pred8x16_horizontal_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1346
pred8x8l_top_dc
static void FUNCC() pred8x8l_top_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:903
t0
#define t0
Definition: regdef.h:28
b
#define b
Definition: input.c:41
LOAD_TOP_EDGE
#define LOAD_TOP_EDGE
Definition: h264pred_template.c:159
t1
#define t1
Definition: regdef.h:29
pred16x16_left_dc
static void FUNCC() pred16x16_left_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:391
pred8x8l_vertical_filter_add
static void FUNCC() pred8x8l_vertical_filter_add(uint8_t *_src, int16_t *_block, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1128
PREDICT_16x16_DC
#define PREDICT_16x16_DC(v)
Definition: h264pred_template.c:363
t10
#define t10
Definition: regdef.h:55
pixel4
#define pixel4
Definition: bit_depth_template.c:83
LOAD_LEFT_EDGE
#define LOAD_LEFT_EDGE
Definition: h264pred_template.c:153
dctcoef
#define dctcoef
Definition: bit_depth_template.c:84
pred4x4_left_dc
static void FUNCC() pred4x4_left_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:73
pred4x4_horizontal_up
static void FUNCC() pred4x4_horizontal_up(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:276
INIT_CLIP
#define INIT_CLIP
Definition: bit_depth_template.c:87
src
#define src
Definition: vp8dsp.c:254
pred4x4_top_dc
static void FUNCC() pred4x4_top_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:87
pred4x4_down_right
static void FUNCC() pred4x4_down_right(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:165
pred8x8_vertical
static void FUNCC() pred8x8_vertical(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:488
t15
static int t15(InterplayACMContext *s, unsigned ind, unsigned col)
Definition: interplayacm.c:325
pred16x16_top_dc
static void FUNCC() pred16x16_top_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:406
pred8x8l_left_dc
static void FUNCC() pred8x8l_left_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:893
intreadwrite.h
pred8x16_mad_cow_dc_l00
static void FUNC() pred8x16_mad_cow_dc_l00(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:748
a
a
Definition: h264pred_template.c:468
AV_WN4PA
#define AV_WN4PA
Definition: bit_depth_template.c:95
t7
#define t7
Definition: regdef.h:35
pred4x4_horizontal
static void FUNCC() pred4x4_horizontal(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:47
pred8x8_mad_cow_dc_l0t
static void FUNC() pred8x8_mad_cow_dc_l0t(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:717
pred8x16_vertical_add
static void FUNCC() pred8x16_vertical_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1327
PIXEL_SPLAT_X4
#define PIXEL_SPLAT_X4(x)
Definition: bit_depth_template.c:96
PREDICT_8x8_DC
#define PREDICT_8x8_DC(v)
Definition: h264pred_template.c:877
pred4x4_horizontal_add
static void FUNCC() pred4x4_horizontal_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1231
pred8x16_mad_cow_dc_0lt
static void FUNC() pred8x16_mad_cow_dc_0lt(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:735
LOAD_TOP_RIGHT_EDGE
#define LOAD_TOP_RIGHT_EDGE
Definition: h264pred_template.c:141
pred8x8l_horizontal_add
static void FUNCC() pred8x8l_horizontal_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1276
NULL
#define NULL
Definition: coverity.c:32
pred8x8_dc
static void FUNCC() pred8x8_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:644
t5
#define t5
Definition: regdef.h:33
pixel
uint8_t pixel
Definition: tiny_ssim.c:42
t6
#define t6
Definition: regdef.h:34
pred8x8l_horizontal_up
static void FUNCC() pred8x8l_horizontal_up(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1102
pred8x8l_down_right
static void FUNCC() pred8x8l_down_right(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:986
H
F H1 F F H1 F F F F H1<-F-------F-------F v v v H2 H3 H2 ^ ^ ^ F-------F-------F-> H1<-F-------F-------F|||||||||F H1 F|||||||||F H1 Funavailable fullpel samples(outside the picture for example) shall be equalto the closest available fullpel sampleSmaller pel interpolation:--------------------------if diag_mc is set then points which lie on a line between 2 vertically, horizontally or diagonally adjacent halfpel points shall be interpolatedlinearly with rounding to nearest and halfway values rounded up.points which lie on 2 diagonals at the same time should only use the onediagonal not containing the fullpel point F--> O q O<--h1-> O q O<--F v \/v \/v O O O O O O O|/|\|q q q q q|/|\|O O O O O O O ^/\ ^/\ ^ h2--> O q O<--h3-> O q O<--h2 v \/v \/v O O O O O O O|\|/|q q q q q|\|/|O O O O O O O ^/\ ^/\ ^ F--> O q O<--h1-> O q O<--Fthe remaining points shall be bilinearly interpolated from theup to 4 surrounding halfpel and fullpel points, again rounding should be tonearest and halfway values rounded upcompliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chromainterpolation at leastOverlapped block motion compensation:-------------------------------------FIXMELL band prediction:===================Each sample in the LL0 subband is predicted by the median of the left, top andleft+top-topleft samples, samples outside the subband shall be considered tobe 0. To reverse this prediction in the decoder apply the following.for(y=0;y< height;y++){ for(x=0;x< width;x++){ sample[y][x]+=median(sample[y-1][x], sample[y][x-1], sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]);}}sample[-1][ *]=sample[ *][-1]=0;width, height here are the width and height of the LL0 subband not of the finalvideoDequantization:===============FIXMEWavelet Transform:==================Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integertransform and an integer approximation of the symmetric biorthogonal 9/7daubechies wavelet.2D IDWT(inverse discrete wavelet transform) --------------------------------------------The 2D IDWT applies a 2D filter recursively, each time combining the4 lowest frequency subbands into a single subband until only 1 subbandremains.The 2D filter is done by first applying a 1D filter in the vertical directionand then applying it in the horizontal one. --------------- --------------- --------------- ---------------|LL0|HL0|||||||||||||---+---|HL1||L0|H0|HL1||LL1|HL1|||||LH0|HH0|||||||||||||-------+-------|-> L1 H1 LH1 HH1 LH1 HH1 LH1 HH1 this can end with a L or a H
Definition: snow.txt:555
pred16x16_horizontal
static void FUNCC() pred16x16_horizontal(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:347
pred4x4_dc
static void FUNCC() pred4x4_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:58
mathops.h
bit_depth_template.c
pred8x8_mad_cow_dc_l00
static void FUNC() pred8x8_mad_cow_dc_l00(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:741
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
pred4x4_horizontal_down
static void FUNCC() pred4x4_horizontal_down(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:301
FUNCC
#define FUNCC(a)
Definition: bit_depth_template.c:105
t11
#define t11
Definition: regdef.h:56
pred16x16_vertical_add
static void FUNCC() pred16x16_vertical_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1300
pred8x8_horizontal
static void FUNCC() pred8x8_horizontal(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:516
pred8x8l_horizontal
static void FUNCC() pred8x8l_horizontal(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:925
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
PREDICT_8x8_LOAD_TOP
#define PREDICT_8x8_LOAD_TOP
Definition: h264pred_template.c:858
t12
#define t12
Definition: regdef.h:58
AV_RN4PA
#define AV_RN4PA
Definition: bit_depth_template.c:92
t8
#define t8
Definition: regdef.h:53
PRED8x8_X
#define PRED8x8_X(n, v)
Definition: h264pred_template.c:541
pred8x8l_down_left
static void FUNCC() pred8x8l_down_left(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:963
SRC
#define SRC(x, y)
Definition: h264pred_template.c:847
pred8x16_plane
static void FUNCC() pred8x16_plane(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:805
src0
#define src0
Definition: h264pred.c:138
pred4x4_down_left
static void FUNCC() pred4x4_down_left(uint8_t *_src, const uint8_t *_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:192
src1
#define src1
Definition: h264pred.c:139
ROW
#define ROW(y)
pred8x8l_dc
static void FUNCC() pred8x8l_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:913
pred8x8l_vertical_right
static void FUNCC() pred8x8l_vertical_right(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1010
pred4x4_vertical_right
static void FUNCC() pred4x4_vertical_right(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:220
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
pred8x16_top_dc
static void FUNCC() pred8x16_top_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:622
t4
#define t4
Definition: regdef.h:32
t3
#define t3
Definition: regdef.h:31
pred8x16_left_dc
static void FUNCC() pred8x16_left_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:590
uint8_t
uint8_t
Definition: audio_convert.c:194
PRED16x16_X
#define PRED16x16_X(n, v)
Definition: h264pred_template.c:421
pred4x4_129_dc
static void FUNCC() pred4x4_129_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:127
dc2splat
<<(BIT_DEPTH-1))+0) PRED8x8_X(129,(1<<(BIT_DEPTH-1))+1) static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride) { FUNCC(pred8x8_128_dc)(_src, stride);FUNCC(pred8x8_128_dc)(_src+8 *stride, stride);} static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride) { int i;int dc0, dc2;pixel4 dc0splat, dc2splat;pixel *src=(pixel *) _src;stride >>=sizeof(pixel) -1;dc0=dc2=0;for(i=0;i< 4;i++){ dc0+=src[-1+i *stride];dc2+=src[-1+(i+4) *stride];} dc0splat=PIXEL_SPLAT_X4((dc0+2)>> dc2splat
Definition: h264pred_template.c:578
pred16x16_vertical
static void FUNCC() pred16x16_vertical(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:329
pred4x4_128_dc
static void FUNCC() pred4x4_128_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:101
t2
#define t2
Definition: regdef.h:30
pred8x16_mad_cow_dc_l0t
static void FUNC() pred8x16_mad_cow_dc_l0t(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:723
FUNC
#define FUNC(a)
Definition: bit_depth_template.c:104
pred8x8l_vertical_add
static void FUNCC() pred8x8l_vertical_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1251
pred8x8l_horizontal_filter_add
static void FUNCC() pred8x8l_horizontal_filter_add(uint8_t *_src, int16_t *_block, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1164
pred8x8l_128_dc
static void FUNCC() pred8x8l_128_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:885
pred4x4_vertical
static void FUNCC() pred4x4_vertical(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:34
pred16x16_dc
static void FUNCC() pred16x16_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:372
pred4x4_vertical_left
static void FUNCC() pred4x4_vertical_left(uint8_t *_src, const uint8_t *_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:248
t9
#define t9
Definition: regdef.h:54
CLIP
@ CLIP
Definition: qdrw.c:36
pred8x8l_horizontal_down
static void FUNCC() pred8x8l_horizontal_down(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1041
pred8x8l_vertical_left
static void FUNCC() pred8x8l_vertical_left(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1072
pred8x8l_vertical
static void FUNCC() pred8x8l_vertical(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:939
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
pred8x16_vertical
static void FUNCC() pred8x16_vertical(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:502
pred8x16_horizontal
static void FUNCC() pred8x16_horizontal(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:529
PREDICT_8x8_LOAD_TOPLEFT
#define PREDICT_8x8_LOAD_TOPLEFT
Definition: h264pred_template.c:874
V
<<(BIT_DEPTH-1))+0) PRED16x16_X(129,(1<<(BIT_DEPTH-1))+1) static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, ptrdiff_t _stride, const int svq3, const int rv40) { int i, j, k;int a;INIT_CLIP pixel *src=(pixel *) _src;int stride=_stride >>(sizeof(pixel) -1);const pixel *const src0=src+7-stride;const pixel *src1=src+8 *stride-1;const pixel *src2=src1-2 *stride;int H=src0[1] - src0[-1];int V=src1[0] - src2[0];for(k=2;k<=8;++k) { src1+=stride;src2 -=stride;H+=k *(src0[k] - src0[-k]);V+=k *(src1[0] - src2[0]);} if(svq3){ H=(5 *(H/4))/16;V=(5 *(V/4))/16;i=H;H=V;V=i;}else if(rv40){ H=(H+(H >> V
Definition: h264pred_template.c:462
pred4x4_vertical_add
static void FUNCC() pred4x4_vertical_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1210