FFmpeg
vc1dsp.c
Go to the documentation of this file.
1 /*
2  * VC-1 and WMV3 decoder - DSP functions
3  * Copyright (c) 2006 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * VC-1 and WMV3 decoder
25  */
26 
27 #include "libavutil/avassert.h"
28 #include "libavutil/common.h"
29 #include "libavutil/intreadwrite.h"
30 #include "h264chroma.h"
31 #include "qpeldsp.h"
32 #include "rnd_avg.h"
33 #include "vc1dsp.h"
34 #include "startcode.h"
35 
36 /* Apply overlap transform to horizontal edge */
37 static void vc1_v_overlap_c(uint8_t *src, int stride)
38 {
39  int i;
40  int a, b, c, d;
41  int d1, d2;
42  int rnd = 1;
43  for (i = 0; i < 8; i++) {
44  a = src[-2 * stride];
45  b = src[-stride];
46  c = src[0];
47  d = src[stride];
48  d1 = (a - d + 3 + rnd) >> 3;
49  d2 = (a - d + b - c + 4 - rnd) >> 3;
50 
51  src[-2 * stride] = a - d1;
52  src[-stride] = av_clip_uint8(b - d2);
53  src[0] = av_clip_uint8(c + d2);
54  src[stride] = d + d1;
55  src++;
56  rnd = !rnd;
57  }
58 }
59 
60 /* Apply overlap transform to vertical edge */
61 static void vc1_h_overlap_c(uint8_t *src, int stride)
62 {
63  int i;
64  int a, b, c, d;
65  int d1, d2;
66  int rnd = 1;
67  for (i = 0; i < 8; i++) {
68  a = src[-2];
69  b = src[-1];
70  c = src[0];
71  d = src[1];
72  d1 = (a - d + 3 + rnd) >> 3;
73  d2 = (a - d + b - c + 4 - rnd) >> 3;
74 
75  src[-2] = a - d1;
76  src[-1] = av_clip_uint8(b - d2);
77  src[0] = av_clip_uint8(c + d2);
78  src[1] = d + d1;
79  src += stride;
80  rnd = !rnd;
81  }
82 }
83 
84 static void vc1_v_s_overlap_c(int16_t *top, int16_t *bottom)
85 {
86  int i;
87  int a, b, c, d;
88  int d1, d2;
89  int rnd1 = 4, rnd2 = 3;
90  for (i = 0; i < 8; i++) {
91  a = top[48];
92  b = top[56];
93  c = bottom[0];
94  d = bottom[8];
95  d1 = a - d;
96  d2 = a - d + b - c;
97 
98  top[48] = ((a * 8) - d1 + rnd1) >> 3;
99  top[56] = ((b * 8) - d2 + rnd2) >> 3;
100  bottom[0] = ((c * 8) + d2 + rnd1) >> 3;
101  bottom[8] = ((d * 8) + d1 + rnd2) >> 3;
102 
103  bottom++;
104  top++;
105  rnd2 = 7 - rnd2;
106  rnd1 = 7 - rnd1;
107  }
108 }
109 
110 static void vc1_h_s_overlap_c(int16_t *left, int16_t *right, int left_stride, int right_stride, int flags)
111 {
112  int i;
113  int a, b, c, d;
114  int d1, d2;
115  int rnd1 = flags & 2 ? 3 : 4;
116  int rnd2 = 7 - rnd1;
117  for (i = 0; i < 8; i++) {
118  a = left[6];
119  b = left[7];
120  c = right[0];
121  d = right[1];
122  d1 = a - d;
123  d2 = a - d + b - c;
124 
125  left[6] = ((a * 8) - d1 + rnd1) >> 3;
126  left[7] = ((b * 8) - d2 + rnd2) >> 3;
127  right[0] = ((c * 8) + d2 + rnd1) >> 3;
128  right[1] = ((d * 8) + d1 + rnd2) >> 3;
129 
130  right += right_stride;
131  left += left_stride;
132  if (flags & 1) {
133  rnd2 = 7 - rnd2;
134  rnd1 = 7 - rnd1;
135  }
136  }
137 }
138 
139 /**
140  * VC-1 in-loop deblocking filter for one line
141  * @param src source block type
142  * @param stride block stride
143  * @param pq block quantizer
144  * @return whether other 3 pairs should be filtered or not
145  * @see 8.6
146  */
148 {
149  int a0 = (2 * (src[-2 * stride] - src[1 * stride]) -
150  5 * (src[-1 * stride] - src[0 * stride]) + 4) >> 3;
151  int a0_sign = a0 >> 31; /* Store sign */
152 
153  a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
154  if (a0 < pq) {
155  int a1 = FFABS((2 * (src[-4 * stride] - src[-1 * stride]) -
156  5 * (src[-3 * stride] - src[-2 * stride]) + 4) >> 3);
157  int a2 = FFABS((2 * (src[ 0 * stride] - src[ 3 * stride]) -
158  5 * (src[ 1 * stride] - src[ 2 * stride]) + 4) >> 3);
159  if (a1 < a0 || a2 < a0) {
160  int clip = src[-1 * stride] - src[0 * stride];
161  int clip_sign = clip >> 31;
162 
163  clip = ((clip ^ clip_sign) - clip_sign) >> 1;
164  if (clip) {
165  int a3 = FFMIN(a1, a2);
166  int d = 5 * (a3 - a0);
167  int d_sign = (d >> 31);
168 
169  d = ((d ^ d_sign) - d_sign) >> 3;
170  d_sign ^= a0_sign;
171 
172  if (d_sign ^ clip_sign)
173  d = 0;
174  else {
175  d = FFMIN(d, clip);
176  d = (d ^ d_sign) - d_sign; /* Restore sign */
177  src[-1 * stride] = av_clip_uint8(src[-1 * stride] - d);
178  src[ 0 * stride] = av_clip_uint8(src[ 0 * stride] + d);
179  }
180  return 1;
181  }
182  }
183  }
184  return 0;
185 }
186 
187 /**
188  * VC-1 in-loop deblocking filter
189  * @param src source block type
190  * @param step distance between horizontally adjacent elements
191  * @param stride distance between vertically adjacent elements
192  * @param len edge length to filter (4 or 8 pixels)
193  * @param pq block quantizer
194  * @see 8.6
195  */
196 static inline void vc1_loop_filter(uint8_t *src, int step, int stride,
197  int len, int pq)
198 {
199  int i;
200  int filt3;
201 
202  for (i = 0; i < len; i += 4) {
203  filt3 = vc1_filter_line(src + 2 * step, stride, pq);
204  if (filt3) {
205  vc1_filter_line(src + 0 * step, stride, pq);
206  vc1_filter_line(src + 1 * step, stride, pq);
207  vc1_filter_line(src + 3 * step, stride, pq);
208  }
209  src += step * 4;
210  }
211 }
212 
213 static void vc1_v_loop_filter4_c(uint8_t *src, int stride, int pq)
214 {
215  vc1_loop_filter(src, 1, stride, 4, pq);
216 }
217 
218 static void vc1_h_loop_filter4_c(uint8_t *src, int stride, int pq)
219 {
220  vc1_loop_filter(src, stride, 1, 4, pq);
221 }
222 
223 static void vc1_v_loop_filter8_c(uint8_t *src, int stride, int pq)
224 {
225  vc1_loop_filter(src, 1, stride, 8, pq);
226 }
227 
228 static void vc1_h_loop_filter8_c(uint8_t *src, int stride, int pq)
229 {
230  vc1_loop_filter(src, stride, 1, 8, pq);
231 }
232 
233 static void vc1_v_loop_filter16_c(uint8_t *src, int stride, int pq)
234 {
235  vc1_loop_filter(src, 1, stride, 16, pq);
236 }
237 
238 static void vc1_h_loop_filter16_c(uint8_t *src, int stride, int pq)
239 {
240  vc1_loop_filter(src, stride, 1, 16, pq);
241 }
242 
243 /* Do inverse transform on 8x8 block */
244 static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
245 {
246  int i;
247  int dc = block[0];
248 
249  dc = (3 * dc + 1) >> 1;
250  dc = (3 * dc + 16) >> 5;
251 
252  for (i = 0; i < 8; i++) {
253  dest[0] = av_clip_uint8(dest[0] + dc);
254  dest[1] = av_clip_uint8(dest[1] + dc);
255  dest[2] = av_clip_uint8(dest[2] + dc);
256  dest[3] = av_clip_uint8(dest[3] + dc);
257  dest[4] = av_clip_uint8(dest[4] + dc);
258  dest[5] = av_clip_uint8(dest[5] + dc);
259  dest[6] = av_clip_uint8(dest[6] + dc);
260  dest[7] = av_clip_uint8(dest[7] + dc);
261  dest += stride;
262  }
263 }
264 
265 static void vc1_inv_trans_8x8_c(int16_t block[64])
266 {
267  int i;
268  register int t1, t2, t3, t4, t5, t6, t7, t8;
269  int16_t *src, *dst, temp[64];
270 
271  src = block;
272  dst = temp;
273  for (i = 0; i < 8; i++) {
274  t1 = 12 * (src[ 0] + src[32]) + 4;
275  t2 = 12 * (src[ 0] - src[32]) + 4;
276  t3 = 16 * src[16] + 6 * src[48];
277  t4 = 6 * src[16] - 16 * src[48];
278 
279  t5 = t1 + t3;
280  t6 = t2 + t4;
281  t7 = t2 - t4;
282  t8 = t1 - t3;
283 
284  t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
285  t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
286  t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
287  t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
288 
289  dst[0] = (t5 + t1) >> 3;
290  dst[1] = (t6 + t2) >> 3;
291  dst[2] = (t7 + t3) >> 3;
292  dst[3] = (t8 + t4) >> 3;
293  dst[4] = (t8 - t4) >> 3;
294  dst[5] = (t7 - t3) >> 3;
295  dst[6] = (t6 - t2) >> 3;
296  dst[7] = (t5 - t1) >> 3;
297 
298  src += 1;
299  dst += 8;
300  }
301 
302  src = temp;
303  dst = block;
304  for (i = 0; i < 8; i++) {
305  t1 = 12 * (src[ 0] + src[32]) + 64;
306  t2 = 12 * (src[ 0] - src[32]) + 64;
307  t3 = 16 * src[16] + 6 * src[48];
308  t4 = 6 * src[16] - 16 * src[48];
309 
310  t5 = t1 + t3;
311  t6 = t2 + t4;
312  t7 = t2 - t4;
313  t8 = t1 - t3;
314 
315  t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
316  t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
317  t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
318  t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
319 
320  dst[ 0] = (t5 + t1) >> 7;
321  dst[ 8] = (t6 + t2) >> 7;
322  dst[16] = (t7 + t3) >> 7;
323  dst[24] = (t8 + t4) >> 7;
324  dst[32] = (t8 - t4 + 1) >> 7;
325  dst[40] = (t7 - t3 + 1) >> 7;
326  dst[48] = (t6 - t2 + 1) >> 7;
327  dst[56] = (t5 - t1 + 1) >> 7;
328 
329  src++;
330  dst++;
331  }
332 }
333 
334 /* Do inverse transform on 8x4 part of block */
335 static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
336 {
337  int i;
338  int dc = block[0];
339 
340  dc = (3 * dc + 1) >> 1;
341  dc = (17 * dc + 64) >> 7;
342 
343  for (i = 0; i < 4; i++) {
344  dest[0] = av_clip_uint8(dest[0] + dc);
345  dest[1] = av_clip_uint8(dest[1] + dc);
346  dest[2] = av_clip_uint8(dest[2] + dc);
347  dest[3] = av_clip_uint8(dest[3] + dc);
348  dest[4] = av_clip_uint8(dest[4] + dc);
349  dest[5] = av_clip_uint8(dest[5] + dc);
350  dest[6] = av_clip_uint8(dest[6] + dc);
351  dest[7] = av_clip_uint8(dest[7] + dc);
352  dest += stride;
353  }
354 }
355 
356 static void vc1_inv_trans_8x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
357 {
358  int i;
359  register int t1, t2, t3, t4, t5, t6, t7, t8;
360  int16_t *src, *dst;
361 
362  src = block;
363  dst = block;
364 
365  for (i = 0; i < 4; i++) {
366  t1 = 12 * (src[0] + src[4]) + 4;
367  t2 = 12 * (src[0] - src[4]) + 4;
368  t3 = 16 * src[2] + 6 * src[6];
369  t4 = 6 * src[2] - 16 * src[6];
370 
371  t5 = t1 + t3;
372  t6 = t2 + t4;
373  t7 = t2 - t4;
374  t8 = t1 - t3;
375 
376  t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
377  t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
378  t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
379  t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
380 
381  dst[0] = (t5 + t1) >> 3;
382  dst[1] = (t6 + t2) >> 3;
383  dst[2] = (t7 + t3) >> 3;
384  dst[3] = (t8 + t4) >> 3;
385  dst[4] = (t8 - t4) >> 3;
386  dst[5] = (t7 - t3) >> 3;
387  dst[6] = (t6 - t2) >> 3;
388  dst[7] = (t5 - t1) >> 3;
389 
390  src += 8;
391  dst += 8;
392  }
393 
394  src = block;
395  for (i = 0; i < 8; i++) {
396  t1 = 17 * (src[ 0] + src[16]) + 64;
397  t2 = 17 * (src[ 0] - src[16]) + 64;
398  t3 = 22 * src[ 8] + 10 * src[24];
399  t4 = 22 * src[24] - 10 * src[ 8];
400 
401  dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7));
402  dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7));
403  dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7));
404  dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7));
405 
406  src++;
407  dest++;
408  }
409 }
410 
411 /* Do inverse transform on 4x8 parts of block */
412 static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
413 {
414  int i;
415  int dc = block[0];
416 
417  dc = (17 * dc + 4) >> 3;
418  dc = (12 * dc + 64) >> 7;
419 
420  for (i = 0; i < 8; i++) {
421  dest[0] = av_clip_uint8(dest[0] + dc);
422  dest[1] = av_clip_uint8(dest[1] + dc);
423  dest[2] = av_clip_uint8(dest[2] + dc);
424  dest[3] = av_clip_uint8(dest[3] + dc);
425  dest += stride;
426  }
427 }
428 
429 static void vc1_inv_trans_4x8_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
430 {
431  int i;
432  register int t1, t2, t3, t4, t5, t6, t7, t8;
433  int16_t *src, *dst;
434 
435  src = block;
436  dst = block;
437 
438  for (i = 0; i < 8; i++) {
439  t1 = 17 * (src[0] + src[2]) + 4;
440  t2 = 17 * (src[0] - src[2]) + 4;
441  t3 = 22 * src[1] + 10 * src[3];
442  t4 = 22 * src[3] - 10 * src[1];
443 
444  dst[0] = (t1 + t3) >> 3;
445  dst[1] = (t2 - t4) >> 3;
446  dst[2] = (t2 + t4) >> 3;
447  dst[3] = (t1 - t3) >> 3;
448 
449  src += 8;
450  dst += 8;
451  }
452 
453  src = block;
454  for (i = 0; i < 4; i++) {
455  t1 = 12 * (src[ 0] + src[32]) + 64;
456  t2 = 12 * (src[ 0] - src[32]) + 64;
457  t3 = 16 * src[16] + 6 * src[48];
458  t4 = 6 * src[16] - 16 * src[48];
459 
460  t5 = t1 + t3;
461  t6 = t2 + t4;
462  t7 = t2 - t4;
463  t8 = t1 - t3;
464 
465  t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
466  t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
467  t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
468  t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
469 
470  dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t5 + t1) >> 7));
471  dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t6 + t2) >> 7));
472  dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t7 + t3) >> 7));
473  dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t8 + t4) >> 7));
474  dest[4 * stride] = av_clip_uint8(dest[4 * stride] + ((t8 - t4 + 1) >> 7));
475  dest[5 * stride] = av_clip_uint8(dest[5 * stride] + ((t7 - t3 + 1) >> 7));
476  dest[6 * stride] = av_clip_uint8(dest[6 * stride] + ((t6 - t2 + 1) >> 7));
477  dest[7 * stride] = av_clip_uint8(dest[7 * stride] + ((t5 - t1 + 1) >> 7));
478 
479  src++;
480  dest++;
481  }
482 }
483 
484 /* Do inverse transform on 4x4 part of block */
485 static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
486 {
487  int i;
488  int dc = block[0];
489 
490  dc = (17 * dc + 4) >> 3;
491  dc = (17 * dc + 64) >> 7;
492 
493  for (i = 0; i < 4; i++) {
494  dest[0] = av_clip_uint8(dest[0] + dc);
495  dest[1] = av_clip_uint8(dest[1] + dc);
496  dest[2] = av_clip_uint8(dest[2] + dc);
497  dest[3] = av_clip_uint8(dest[3] + dc);
498  dest += stride;
499  }
500 }
501 
502 static void vc1_inv_trans_4x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
503 {
504  int i;
505  register int t1, t2, t3, t4;
506  int16_t *src, *dst;
507 
508  src = block;
509  dst = block;
510  for (i = 0; i < 4; i++) {
511  t1 = 17 * (src[0] + src[2]) + 4;
512  t2 = 17 * (src[0] - src[2]) + 4;
513  t3 = 22 * src[1] + 10 * src[3];
514  t4 = 22 * src[3] - 10 * src[1];
515 
516  dst[0] = (t1 + t3) >> 3;
517  dst[1] = (t2 - t4) >> 3;
518  dst[2] = (t2 + t4) >> 3;
519  dst[3] = (t1 - t3) >> 3;
520 
521  src += 8;
522  dst += 8;
523  }
524 
525  src = block;
526  for (i = 0; i < 4; i++) {
527  t1 = 17 * (src[0] + src[16]) + 64;
528  t2 = 17 * (src[0] - src[16]) + 64;
529  t3 = 22 * src[8] + 10 * src[24];
530  t4 = 22 * src[24] - 10 * src[8];
531 
532  dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7));
533  dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7));
534  dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7));
535  dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7));
536 
537  src++;
538  dest++;
539  }
540 }
541 
542 /* motion compensation functions */
543 
544 /* Filter in case of 2 filters */
545 #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \
546 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, \
547  int stride, \
548  int mode) \
549 { \
550  switch(mode) { \
551  case 0: /* no shift - should not occur */ \
552  return 0; \
553  case 1: /* 1/4 shift */ \
554  return -4 * src[-stride] + 53 * src[0] + \
555  18 * src[stride] - 3 * src[stride * 2]; \
556  case 2: /* 1/2 shift */ \
557  return -1 * src[-stride] + 9 * src[0] + \
558  9 * src[stride] - 1 * src[stride * 2]; \
559  case 3: /* 3/4 shift */ \
560  return -3 * src[-stride] + 18 * src[0] + \
561  53 * src[stride] - 4 * src[stride * 2]; \
562  } \
563  return 0; /* should not occur */ \
564 }
565 
567 VC1_MSPEL_FILTER_16B(hor, int16_t)
568 
569 /* Filter used to interpolate fractional pel values */
571  int mode, int r)
572 {
573  switch (mode) {
574  case 0: // no shift
575  return src[0];
576  case 1: // 1/4 shift
577  return (-4 * src[-stride] + 53 * src[0] +
578  18 * src[stride] - 3 * src[stride * 2] + 32 - r) >> 6;
579  case 2: // 1/2 shift
580  return (-1 * src[-stride] + 9 * src[0] +
581  9 * src[stride] - 1 * src[stride * 2] + 8 - r) >> 4;
582  case 3: // 3/4 shift
583  return (-3 * src[-stride] + 18 * src[0] +
584  53 * src[stride] - 4 * src[stride * 2] + 32 - r) >> 6;
585  }
586  return 0; // should not occur
587 }
588 
589 /* Function used to do motion compensation with bicubic interpolation */
590 #define VC1_MSPEL_MC(OP, OP4, OPNAME) \
591 static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, \
592  const uint8_t *src, \
593  ptrdiff_t stride, \
594  int hmode, \
595  int vmode, \
596  int rnd) \
597 { \
598  int i, j; \
599  \
600  if (vmode) { /* Horizontal filter to apply */ \
601  int r; \
602  \
603  if (hmode) { /* Vertical filter to apply, output to tmp */ \
604  static const int shift_value[] = { 0, 5, 1, 5 }; \
605  int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \
606  int16_t tmp[11 * 8], *tptr = tmp; \
607  \
608  r = (1 << (shift - 1)) + rnd - 1; \
609  \
610  src -= 1; \
611  for (j = 0; j < 8; j++) { \
612  for (i = 0; i < 11; i++) \
613  tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
614  src += stride; \
615  tptr += 11; \
616  } \
617  \
618  r = 64 - rnd; \
619  tptr = tmp + 1; \
620  for (j = 0; j < 8; j++) { \
621  for (i = 0; i < 8; i++) \
622  OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
623  dst += stride; \
624  tptr += 11; \
625  } \
626  \
627  return; \
628  } else { /* No horizontal filter, output 8 lines to dst */ \
629  r = 1 - rnd; \
630  \
631  for (j = 0; j < 8; j++) { \
632  for (i = 0; i < 8; i++) \
633  OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \
634  src += stride; \
635  dst += stride; \
636  } \
637  return; \
638  } \
639  } \
640  \
641  /* Horizontal mode with no vertical mode */ \
642  for (j = 0; j < 8; j++) { \
643  for (i = 0; i < 8; i++) \
644  OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \
645  dst += stride; \
646  src += stride; \
647  } \
648 }\
649 static av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst, \
650  const uint8_t *src, \
651  ptrdiff_t stride, \
652  int hmode, \
653  int vmode, \
654  int rnd) \
655 { \
656  int i, j; \
657  \
658  if (vmode) { /* Horizontal filter to apply */ \
659  int r; \
660  \
661  if (hmode) { /* Vertical filter to apply, output to tmp */ \
662  static const int shift_value[] = { 0, 5, 1, 5 }; \
663  int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \
664  int16_t tmp[19 * 16], *tptr = tmp; \
665  \
666  r = (1 << (shift - 1)) + rnd - 1; \
667  \
668  src -= 1; \
669  for (j = 0; j < 16; j++) { \
670  for (i = 0; i < 19; i++) \
671  tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
672  src += stride; \
673  tptr += 19; \
674  } \
675  \
676  r = 64 - rnd; \
677  tptr = tmp + 1; \
678  for (j = 0; j < 16; j++) { \
679  for (i = 0; i < 16; i++) \
680  OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
681  dst += stride; \
682  tptr += 19; \
683  } \
684  \
685  return; \
686  } else { /* No horizontal filter, output 8 lines to dst */ \
687  r = 1 - rnd; \
688  \
689  for (j = 0; j < 16; j++) { \
690  for (i = 0; i < 16; i++) \
691  OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \
692  src += stride; \
693  dst += stride; \
694  } \
695  return; \
696  } \
697  } \
698  \
699  /* Horizontal mode with no vertical mode */ \
700  for (j = 0; j < 16; j++) { \
701  for (i = 0; i < 16; i++) \
702  OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \
703  dst += stride; \
704  src += stride; \
705  } \
706 }\
707 static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
708  int i;\
709  for(i=0; i<8; i++){\
710  OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\
711  OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\
712  pixels+=line_size;\
713  block +=line_size;\
714  }\
715 }\
716 static void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
717  int i;\
718  for(i=0; i<16; i++){\
719  OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\
720  OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\
721  OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\
722  OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\
723  pixels+=line_size;\
724  block +=line_size;\
725  }\
726 }
727 
728 #define op_put(a, b) (a) = av_clip_uint8(b)
729 #define op_avg(a, b) (a) = ((a) + av_clip_uint8(b) + 1) >> 1
730 #define op4_avg(a, b) (a) = rnd_avg32(a, b)
731 #define op4_put(a, b) (a) = (b)
732 
735 
736 /* pixel functions - really are entry points to vc1_mspel_mc */
737 
738 #define PUT_VC1_MSPEL(a, b) \
739 static void put_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \
740  const uint8_t *src, \
741  ptrdiff_t stride, int rnd) \
742 { \
743  put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
744 } \
745 static void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \
746  const uint8_t *src, \
747  ptrdiff_t stride, int rnd) \
748 { \
749  avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
750 } \
751 static void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \
752  const uint8_t *src, \
753  ptrdiff_t stride, int rnd) \
754 { \
755  put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
756 } \
757 static void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \
758  const uint8_t *src, \
759  ptrdiff_t stride, int rnd) \
760 { \
761  avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
762 }
763 
764 PUT_VC1_MSPEL(1, 0)
765 PUT_VC1_MSPEL(2, 0)
766 PUT_VC1_MSPEL(3, 0)
767 
768 PUT_VC1_MSPEL(0, 1)
769 PUT_VC1_MSPEL(1, 1)
770 PUT_VC1_MSPEL(2, 1)
771 PUT_VC1_MSPEL(3, 1)
772 
773 PUT_VC1_MSPEL(0, 2)
774 PUT_VC1_MSPEL(1, 2)
775 PUT_VC1_MSPEL(2, 2)
776 PUT_VC1_MSPEL(3, 2)
777 
778 PUT_VC1_MSPEL(0, 3)
779 PUT_VC1_MSPEL(1, 3)
780 PUT_VC1_MSPEL(2, 3)
781 PUT_VC1_MSPEL(3, 3)
782 
783 #define chroma_mc(a) \
784  ((A * src[a] + B * src[a + 1] + \
785  C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6)
786 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
787  uint8_t *src /* align 1 */,
788  ptrdiff_t stride, int h, int x, int y)
789 {
790  const int A = (8 - x) * (8 - y);
791  const int B = (x) * (8 - y);
792  const int C = (8 - x) * (y);
793  const int D = (x) * (y);
794  int i;
795 
796  av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
797 
798  for (i = 0; i < h; i++) {
799  dst[0] = chroma_mc(0);
800  dst[1] = chroma_mc(1);
801  dst[2] = chroma_mc(2);
802  dst[3] = chroma_mc(3);
803  dst[4] = chroma_mc(4);
804  dst[5] = chroma_mc(5);
805  dst[6] = chroma_mc(6);
806  dst[7] = chroma_mc(7);
807  dst += stride;
808  src += stride;
809  }
810 }
811 
813  ptrdiff_t stride, int h, int x, int y)
814 {
815  const int A = (8 - x) * (8 - y);
816  const int B = (x) * (8 - y);
817  const int C = (8 - x) * (y);
818  const int D = (x) * (y);
819  int i;
820 
821  av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
822 
823  for (i = 0; i < h; i++) {
824  dst[0] = chroma_mc(0);
825  dst[1] = chroma_mc(1);
826  dst[2] = chroma_mc(2);
827  dst[3] = chroma_mc(3);
828  dst += stride;
829  src += stride;
830  }
831 }
832 
833 #define avg2(a, b) (((a) + (b) + 1) >> 1)
834 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
835  uint8_t *src /* align 1 */,
836  ptrdiff_t stride, int h, int x, int y)
837 {
838  const int A = (8 - x) * (8 - y);
839  const int B = (x) * (8 - y);
840  const int C = (8 - x) * (y);
841  const int D = (x) * (y);
842  int i;
843 
844  av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
845 
846  for (i = 0; i < h; i++) {
847  dst[0] = avg2(dst[0], chroma_mc(0));
848  dst[1] = avg2(dst[1], chroma_mc(1));
849  dst[2] = avg2(dst[2], chroma_mc(2));
850  dst[3] = avg2(dst[3], chroma_mc(3));
851  dst[4] = avg2(dst[4], chroma_mc(4));
852  dst[5] = avg2(dst[5], chroma_mc(5));
853  dst[6] = avg2(dst[6], chroma_mc(6));
854  dst[7] = avg2(dst[7], chroma_mc(7));
855  dst += stride;
856  src += stride;
857  }
858 }
859 
860 static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */,
861  uint8_t *src /* align 1 */,
862  ptrdiff_t stride, int h, int x, int y)
863 {
864  const int A = (8 - x) * (8 - y);
865  const int B = ( x) * (8 - y);
866  const int C = (8 - x) * ( y);
867  const int D = ( x) * ( y);
868  int i;
869 
870  av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
871 
872  for (i = 0; i < h; i++) {
873  dst[0] = avg2(dst[0], chroma_mc(0));
874  dst[1] = avg2(dst[1], chroma_mc(1));
875  dst[2] = avg2(dst[2], chroma_mc(2));
876  dst[3] = avg2(dst[3], chroma_mc(3));
877  dst += stride;
878  src += stride;
879  }
880 }
881 
882 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
883 
884 static void sprite_h_c(uint8_t *dst, const uint8_t *src, int offset,
885  int advance, int count)
886 {
887  while (count--) {
888  int a = src[(offset >> 16)];
889  int b = src[(offset >> 16) + 1];
890  *dst++ = a + ((b - a) * (offset & 0xFFFF) >> 16);
891  offset += advance;
892  }
893 }
894 
895 static av_always_inline void sprite_v_template(uint8_t *dst,
896  const uint8_t *src1a,
897  const uint8_t *src1b,
898  int offset1,
899  int two_sprites,
900  const uint8_t *src2a,
901  const uint8_t *src2b,
902  int offset2,
903  int alpha, int scaled,
904  int width)
905 {
906  int a1, b1, a2, b2;
907  while (width--) {
908  a1 = *src1a++;
909  if (scaled) {
910  b1 = *src1b++;
911  a1 = a1 + ((b1 - a1) * offset1 >> 16);
912  }
913  if (two_sprites) {
914  a2 = *src2a++;
915  if (scaled > 1) {
916  b2 = *src2b++;
917  a2 = a2 + ((b2 - a2) * offset2 >> 16);
918  }
919  a1 = a1 + ((a2 - a1) * alpha >> 16);
920  }
921  *dst++ = a1;
922  }
923 }
924 
925 static void sprite_v_single_c(uint8_t *dst, const uint8_t *src1a,
926  const uint8_t *src1b,
927  int offset, int width)
928 {
929  sprite_v_template(dst, src1a, src1b, offset, 0, NULL, NULL, 0, 0, 1, width);
930 }
931 
932 static void sprite_v_double_noscale_c(uint8_t *dst, const uint8_t *src1a,
933  const uint8_t *src2a,
934  int alpha, int width)
935 {
936  sprite_v_template(dst, src1a, NULL, 0, 1, src2a, NULL, 0, alpha, 0, width);
937 }
938 
939 static void sprite_v_double_onescale_c(uint8_t *dst,
940  const uint8_t *src1a,
941  const uint8_t *src1b,
942  int offset1,
943  const uint8_t *src2a,
944  int alpha, int width)
945 {
946  sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, NULL, 0, alpha, 1,
947  width);
948 }
949 
950 static void sprite_v_double_twoscale_c(uint8_t *dst,
951  const uint8_t *src1a,
952  const uint8_t *src1b,
953  int offset1,
954  const uint8_t *src2a,
955  const uint8_t *src2b,
956  int offset2,
957  int alpha,
958  int width)
959 {
960  sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, src2b, offset2,
961  alpha, 2, width);
962 }
963 
964 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
965 #define FN_ASSIGN(X, Y) \
966  dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \
967  dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \
968  dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \
969  dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c
970 
972 {
981 
986 
993 
994  dsp->put_vc1_mspel_pixels_tab[0][0] = put_pixels16x16_c;
995  dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_pixels16x16_c;
996  dsp->put_vc1_mspel_pixels_tab[1][0] = put_pixels8x8_c;
997  dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_pixels8x8_c;
998  FN_ASSIGN(0, 1);
999  FN_ASSIGN(0, 2);
1000  FN_ASSIGN(0, 3);
1001 
1002  FN_ASSIGN(1, 0);
1003  FN_ASSIGN(1, 1);
1004  FN_ASSIGN(1, 2);
1005  FN_ASSIGN(1, 3);
1006 
1007  FN_ASSIGN(2, 0);
1008  FN_ASSIGN(2, 1);
1009  FN_ASSIGN(2, 2);
1010  FN_ASSIGN(2, 3);
1011 
1012  FN_ASSIGN(3, 0);
1013  FN_ASSIGN(3, 1);
1014  FN_ASSIGN(3, 2);
1015  FN_ASSIGN(3, 3);
1016 
1021 
1022 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
1023  dsp->sprite_h = sprite_h_c;
1024  dsp->sprite_v_single = sprite_v_single_c;
1025  dsp->sprite_v_double_noscale = sprite_v_double_noscale_c;
1026  dsp->sprite_v_double_onescale = sprite_v_double_onescale_c;
1027  dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c;
1028 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
1029 
1031 
1032  if (ARCH_AARCH64)
1034  if (ARCH_ARM)
1035  ff_vc1dsp_init_arm(dsp);
1036  if (ARCH_PPC)
1037  ff_vc1dsp_init_ppc(dsp);
1038  if (ARCH_X86)
1039  ff_vc1dsp_init_x86(dsp);
1040  if (ARCH_MIPS)
1041  ff_vc1dsp_init_mips(dsp);
1042 }
VC1DSPContext::sprite_v_double_noscale
void(* sprite_v_double_noscale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src2a, int alpha, int width)
Definition: vc1dsp.h:69
vc1_v_loop_filter8_c
static void vc1_v_loop_filter8_c(uint8_t *src, int stride, int pq)
Definition: vc1dsp.c:223
stride
int stride
Definition: mace.c:144
VC1_MSPEL_MC
#define VC1_MSPEL_MC(OP, OP4, OPNAME)
Definition: vc1dsp.c:590
vc1_mspel_filter
static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
Definition: vc1dsp.c:570
r
const char * r
Definition: vf_curves.c:114
vc1_inv_trans_8x8_c
static void vc1_inv_trans_8x8_c(int16_t block[64])
Definition: vc1dsp.c:265
vc1_inv_trans_4x8_dc_c
static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:412
vc1dsp.h
ff_vc1dsp_init_aarch64
av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
Definition: vc1dsp_init_aarch64.c:37
VC1DSPContext::vc1_h_loop_filter8
void(* vc1_h_loop_filter8)(uint8_t *src, int stride, int pq)
Definition: vc1dsp.h:52
vc1_inv_trans_8x4_dc_c
static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:335
VC1DSPContext::vc1_inv_trans_4x4
void(* vc1_inv_trans_4x4)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:40
VC1DSPContext::avg_vc1_mspel_pixels_tab
vc1op_pixels_func avg_vc1_mspel_pixels_tab[2][16]
Definition: vc1dsp.h:60
VC1DSPContext::avg_no_rnd_vc1_chroma_pixels_tab
h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3]
Definition: vc1dsp.h:64
vc1_v_overlap_c
static void vc1_v_overlap_c(uint8_t *src, int stride)
Definition: vc1dsp.c:37
VC1DSPContext::vc1_h_overlap
void(* vc1_h_overlap)(uint8_t *src, int stride)
Definition: vc1dsp.h:46
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
b
#define b
Definition: input.c:41
ff_startcode_find_candidate_c
int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
Definition: startcode.c:31
vc1_loop_filter
static void vc1_loop_filter(uint8_t *src, int step, int stride, int len, int pq)
VC-1 in-loop deblocking filter.
Definition: vc1dsp.c:196
vc1_h_overlap_c
static void vc1_h_overlap_c(uint8_t *src, int stride)
Definition: vc1dsp.c:61
t1
#define t1
Definition: regdef.h:29
VC1DSPContext::vc1_inv_trans_8x8_dc
void(* vc1_inv_trans_8x8_dc)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:41
ff_vc1dsp_init_mips
av_cold void ff_vc1dsp_init_mips(VC1DSPContext *dsp)
Definition: vc1dsp_init_mips.c:122
VC1DSPContext::put_no_rnd_vc1_chroma_pixels_tab
h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3]
Definition: vc1dsp.h:63
VC1DSPContext::vc1_inv_trans_4x4_dc
void(* vc1_inv_trans_4x4_dc)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:44
D
D(D(float, sse)
Definition: rematrix_init.c:28
ff_vc1dsp_init_arm
av_cold void ff_vc1dsp_init_arm(VC1DSPContext *dsp)
Definition: vc1dsp_init_arm.c:27
A
#define A(x)
Definition: vp56_arith.h:28
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:1332
FN_ASSIGN
#define FN_ASSIGN(X, Y)
Definition: vc1dsp.c:965
op_avg
#define op_avg(a, b)
Definition: vc1dsp.c:729
a1
#define a1
Definition: regdef.h:47
vc1_inv_trans_8x4_c
static void vc1_inv_trans_8x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:356
C
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
Definition: writing_filters.txt:58
avassert.h
rnd
#define rnd()
Definition: checkasm.h:107
VC1DSPContext::vc1_inv_trans_8x4_dc
void(* vc1_inv_trans_8x4_dc)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:42
av_cold
#define av_cold
Definition: attributes.h:90
avg2
#define avg2(a, b)
Definition: vc1dsp.c:833
VC1DSPContext::sprite_v_double_twoscale
void(* sprite_v_double_twoscale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1, const uint8_t *src2a, const uint8_t *src2b, int offset2, int alpha, int width)
Definition: vc1dsp.h:72
width
#define width
intreadwrite.h
VC1DSPContext::sprite_h
void(* sprite_h)(uint8_t *dst, const uint8_t *src, int offset, int advance, int count)
Definition: vc1dsp.h:67
VC1DSPContext::vc1_v_loop_filter16
void(* vc1_v_loop_filter16)(uint8_t *src, int stride, int pq)
Definition: vc1dsp.h:53
vc1_v_loop_filter16_c
static void vc1_v_loop_filter16_c(uint8_t *src, int stride, int pq)
Definition: vc1dsp.c:233
t7
#define t7
Definition: regdef.h:35
put_no_rnd_vc1_chroma_mc8_c
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Definition: vc1dsp.c:786
VC1DSPContext::vc1_v_overlap
void(* vc1_v_overlap)(uint8_t *src, int stride)
Definition: vc1dsp.h:45
avg_no_rnd_vc1_chroma_mc4_c
static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Definition: vc1dsp.c:860
avg_no_rnd_vc1_chroma_mc8_c
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Definition: vc1dsp.c:834
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
VC1DSPContext::vc1_h_loop_filter4
void(* vc1_h_loop_filter4)(uint8_t *src, int stride, int pq)
Definition: vc1dsp.h:50
op4_avg
#define op4_avg(a, b)
Definition: vc1dsp.c:730
NULL
#define NULL
Definition: coverity.c:32
vc1_inv_trans_8x8_dc_c
static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:244
chroma_mc
#define chroma_mc(a)
Definition: vc1dsp.c:783
VC1DSPContext::vc1_inv_trans_8x4
void(* vc1_inv_trans_8x4)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:38
t5
#define t5
Definition: regdef.h:33
t6
#define t6
Definition: regdef.h:34
vc1_h_loop_filter16_c
static void vc1_h_loop_filter16_c(uint8_t *src, int stride, int pq)
Definition: vc1dsp.c:238
src
#define src
Definition: vp8dsp.c:254
qpeldsp.h
VC1DSPContext::vc1_h_loop_filter16
void(* vc1_h_loop_filter16)(uint8_t *src, int stride, int pq)
Definition: vc1dsp.h:54
VC1DSPContext::vc1_inv_trans_4x8_dc
void(* vc1_inv_trans_4x8_dc)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:43
VC1DSPContext::vc1_h_s_overlap
void(* vc1_h_s_overlap)(int16_t *left, int16_t *right, int left_stride, int right_stride, int flags)
Definition: vc1dsp.h:48
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_vc1dsp_init_x86
void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
Definition: vc1dsp_init.c:105
startcode.h
VC1_MSPEL_FILTER_16B
#define VC1_MSPEL_FILTER_16B(DIR, TYPE)
Definition: vc1dsp.c:545
vc1_v_loop_filter4_c
static void vc1_v_loop_filter4_c(uint8_t *src, int stride, int pq)
Definition: vc1dsp.c:213
VC1DSPContext::vc1_v_loop_filter4
void(* vc1_v_loop_filter4)(uint8_t *src, int stride, int pq)
Definition: vc1dsp.h:49
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
PUT_VC1_MSPEL
#define PUT_VC1_MSPEL(a, b)
Definition: vc1dsp.c:738
vc1_inv_trans_4x8_c
static void vc1_inv_trans_4x8_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:429
h264chroma.h
t8
#define t8
Definition: regdef.h:53
vc1_inv_trans_4x4_dc_c
static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:485
b2
static double b2(void *priv, double x, double y)
Definition: vf_xfade.c:1333
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
VC1DSPContext::sprite_v_single
void(* sprite_v_single)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset, int width)
Definition: vc1dsp.h:68
a0
#define a0
Definition: regdef.h:46
VC1DSPContext::vc1_inv_trans_8x8
void(* vc1_inv_trans_8x8)(int16_t *b)
Definition: vc1dsp.h:37
VC1DSPContext::startcode_find_candidate
int(* startcode_find_candidate)(const uint8_t *buf, int size)
Search buf from the start for up to size bytes.
Definition: vc1dsp.h:82
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
VC1DSPContext::sprite_v_double_onescale
void(* sprite_v_double_onescale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1, const uint8_t *src2a, int alpha, int width)
Definition: vc1dsp.h:70
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
t4
#define t4
Definition: regdef.h:32
t3
#define t3
Definition: regdef.h:31
op_put
#define op_put(a, b)
Definition: vc1dsp.c:728
a2
#define a2
Definition: regdef.h:48
common.h
av_always_inline
#define av_always_inline
Definition: attributes.h:49
uint8_t
uint8_t
Definition: audio_convert.c:194
len
int len
Definition: vorbis_enc_data.h:452
vc1_filter_line
static av_always_inline int vc1_filter_line(uint8_t *src, int stride, int pq)
VC-1 in-loop deblocking filter for one line.
Definition: vc1dsp.c:147
rnd_avg.h
vc1_h_loop_filter8_c
static void vc1_h_loop_filter8_c(uint8_t *src, int stride, int pq)
Definition: vc1dsp.c:228
VC1DSPContext
Definition: vc1dsp.h:35
VC1DSPContext::put_vc1_mspel_pixels_tab
vc1op_pixels_func put_vc1_mspel_pixels_tab[2][16]
Definition: vc1dsp.h:59
vc1_v_s_overlap_c
static void vc1_v_s_overlap_c(int16_t *top, int16_t *bottom)
Definition: vc1dsp.c:84
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
B
#define B
Definition: huffyuvdsp.h:32
VC1DSPContext::vc1_v_loop_filter8
void(* vc1_v_loop_filter8)(uint8_t *src, int stride, int pq)
Definition: vc1dsp.h:51
t2
#define t2
Definition: regdef.h:30
op4_put
#define op4_put(a, b)
Definition: vc1dsp.c:731
mode
mode
Definition: ebur128.h:83
vc1_inv_trans_4x4_c
static void vc1_inv_trans_4x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:502
temp
else temp
Definition: vf_mcdeint.c:256
vc1_h_s_overlap_c
static void vc1_h_s_overlap_c(int16_t *left, int16_t *right, int left_stride, int right_stride, int flags)
Definition: vc1dsp.c:110
put_no_rnd_vc1_chroma_mc4_c
static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Definition: vc1dsp.c:812
ff_vc1dsp_init_ppc
av_cold void ff_vc1dsp_init_ppc(VC1DSPContext *dsp)
Definition: vc1dsp_altivec.c:354
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
ff_vc1dsp_init
av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
Definition: vc1dsp.c:971
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:565
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
VC1DSPContext::vc1_inv_trans_4x8
void(* vc1_inv_trans_4x8)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:39
a3
#define a3
Definition: regdef.h:49
clip
static double clip(void *opaque, double val)
Clip value val in the minval - maxval range.
Definition: vf_lut.c:162
VC1DSPContext::vc1_v_s_overlap
void(* vc1_v_s_overlap)(int16_t *top, int16_t *bottom)
Definition: vc1dsp.h:47
vc1_h_loop_filter4_c
static void vc1_h_loop_filter4_c(uint8_t *src, int stride, int pq)
Definition: vc1dsp.c:218