FFmpeg
vp8dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Martin Storsjo
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <stdbool.h>
22 #include <string.h>
23 
24 #include "config_components.h"
25 #include "libavcodec/vp8dsp.h"
26 
27 #include "libavutil/common.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/mem_internal.h"
30 
31 #include "checkasm.h"
32 
33 #define PIXEL_STRIDE 16
34 
35 #define randomize_buffers(src, dst, stride, coef) \
36  do { \
37  int x, y; \
38  for (y = 0; y < 4; y++) { \
39  AV_WN32A((src) + y * (stride), rnd()); \
40  AV_WN32A((dst) + y * (stride), rnd()); \
41  for (x = 0; x < 4; x++) \
42  (coef)[y * 4 + x] = (src)[y * (stride) + x] - \
43  (dst)[y * (stride) + x]; \
44  } \
45  } while (0)
46 
47 static void dct4x4(int16_t *coef)
48 {
49  int i;
50  for (i = 0; i < 4; i++) {
51  const int a1 = (coef[i*4 + 0] + coef[i*4 + 3]) * 8;
52  const int b1 = (coef[i*4 + 1] + coef[i*4 + 2]) * 8;
53  const int c1 = (coef[i*4 + 1] - coef[i*4 + 2]) * 8;
54  const int d1 = (coef[i*4 + 0] - coef[i*4 + 3]) * 8;
55  coef[i*4 + 0] = a1 + b1;
56  coef[i*4 + 1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
57  coef[i*4 + 2] = a1 - b1;
58  coef[i*4 + 3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
59  }
60  for (i = 0; i < 4; i++) {
61  const int a1 = coef[i + 0*4] + coef[i + 3*4];
62  const int b1 = coef[i + 1*4] + coef[i + 2*4];
63  const int c1 = coef[i + 1*4] - coef[i + 2*4];
64  const int d1 = coef[i + 0*4] - coef[i + 3*4];
65  coef[i + 0*4] = (a1 + b1 + 7) >> 4;
66  coef[i + 1*4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + !!d1;
67  coef[i + 2*4] = (a1 - b1 + 7) >> 4;
68  coef[i + 3*4] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
69  }
70 }
71 
72 static void wht4x4(int16_t *coef)
73 {
74  int i;
75  for (i = 0; i < 4; i++) {
76  int a1 = coef[0 * 4 + i];
77  int b1 = coef[1 * 4 + i];
78  int c1 = coef[2 * 4 + i];
79  int d1 = coef[3 * 4 + i];
80  int e1;
81  a1 += b1;
82  d1 -= c1;
83  e1 = (a1 - d1) >> 1;
84  b1 = e1 - b1;
85  c1 = e1 - c1;
86  a1 -= c1;
87  d1 += b1;
88  coef[0 * 4 + i] = a1;
89  coef[1 * 4 + i] = c1;
90  coef[2 * 4 + i] = d1;
91  coef[3 * 4 + i] = b1;
92  }
93  for (i = 0; i < 4; i++) {
94  int a1 = coef[i * 4 + 0];
95  int b1 = coef[i * 4 + 1];
96  int c1 = coef[i * 4 + 2];
97  int d1 = coef[i * 4 + 3];
98  int e1;
99  a1 += b1;
100  d1 -= c1;
101  e1 = (a1 - d1) >> 1;
102  b1 = e1 - b1;
103  c1 = e1 - c1;
104  a1 -= c1;
105  d1 += b1;
106  coef[i * 4 + 0] = a1 * 2;
107  coef[i * 4 + 1] = c1 * 2;
108  coef[i * 4 + 2] = d1 * 2;
109  coef[i * 4 + 3] = b1 * 2;
110  }
111 }
112 
113 static void check_idct(VP8DSPContext *d, bool is_vp7)
114 {
115  LOCAL_ALIGNED_16(uint8_t, src, [4 * 4]);
116  LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4]);
117  LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4]);
118  LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4]);
119  LOCAL_ALIGNED_16(int16_t, coef, [4 * 4]);
120  LOCAL_ALIGNED_16(int16_t, subcoef0, [4 * 4]);
121  LOCAL_ALIGNED_16(int16_t, subcoef1, [4 * 4]);
122  int dc;
123  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, ptrdiff_t stride);
124 
125  randomize_buffers(src, dst, 4, coef);
126 
127  dct4x4(coef);
128 
129  for (dc = 0; dc <= 1; dc++) {
130  void (*idct)(uint8_t *, int16_t *, ptrdiff_t) = dc ? d->vp8_idct_dc_add : d->vp8_idct_add;
131 
132  if (check_func(idct, "vp%d_idct_%sadd", 8 - is_vp7, dc ? "dc_" : "")) {
133  if (dc) {
134  memset(subcoef0, 0, 4 * 4 * sizeof(int16_t));
135  subcoef0[0] = coef[0];
136  } else {
137  memcpy(subcoef0, coef, 4 * 4 * sizeof(int16_t));
138  }
139  memcpy(dst0, dst, 4 * 4);
140  memcpy(dst1, dst, 4 * 4);
141  memcpy(subcoef1, subcoef0, 4 * 4 * sizeof(int16_t));
142  // Note, this uses a pixel stride of 4, even though the real decoder uses a stride as a
143  // multiple of 16. If optimizations want to take advantage of that, this test needs to be
144  // updated to make it more like the h264dsp tests.
145  call_ref(dst0, subcoef0, 4);
146  call_new(dst1, subcoef1, 4);
147  if (memcmp(dst0, dst1, 4 * 4) ||
148  memcmp(subcoef0, subcoef1, 4 * 4 * sizeof(int16_t)))
149  fail();
150 
151  bench_new(dst1, subcoef1, 4);
152  }
153  }
154 }
155 
156 static void check_idct_dc4(VP8DSPContext *d, bool is_vp7)
157 {
158  LOCAL_ALIGNED_16(uint8_t, src, [4 * 4 * 4]);
159  LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4 * 4]);
160  LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4 * 4]);
161  LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4 * 4]);
162  LOCAL_ALIGNED_16(int16_t, coef, [4], [4 * 4]);
163  LOCAL_ALIGNED_16(int16_t, subcoef0, [4], [4 * 4]);
164  LOCAL_ALIGNED_16(int16_t, subcoef1, [4], [4 * 4]);
165  int i, chroma;
166  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
167 
168  for (chroma = 0; chroma <= 1; chroma++) {
169  void (*idct4dc)(uint8_t *, int16_t[4][16], ptrdiff_t) = chroma ? d->vp8_idct_dc_add4uv : d->vp8_idct_dc_add4y;
170  if (check_func(idct4dc, "vp%d_idct_dc_add4%s", 8 - is_vp7, chroma ? "uv" : "y")) {
171  ptrdiff_t stride = chroma ? 8 : 16;
172  int w = chroma ? 2 : 4;
173  for (i = 0; i < 4; i++) {
174  int blockx = 4 * (i % w);
175  int blocky = 4 * (i / w);
176  randomize_buffers(src + stride * blocky + blockx, dst + stride * blocky + blockx, stride, coef[i]);
177  dct4x4(coef[i]);
178  memset(&coef[i][1], 0, 15 * sizeof(int16_t));
179  }
180 
181  memcpy(dst0, dst, 4 * 4 * 4);
182  memcpy(dst1, dst, 4 * 4 * 4);
183  memcpy(subcoef0, coef, 4 * 4 * 4 * sizeof(int16_t));
184  memcpy(subcoef1, coef, 4 * 4 * 4 * sizeof(int16_t));
185  call_ref(dst0, subcoef0, stride);
186  call_new(dst1, subcoef1, stride);
187  if (memcmp(dst0, dst1, 4 * 4 * 4) ||
188  memcmp(subcoef0, subcoef1, 4 * 4 * 4 * sizeof(int16_t)))
189  fail();
190  bench_new(dst1, subcoef1, stride);
191  }
192  }
193 
194 }
195 
196 static void check_luma_dc_wht(VP8DSPContext *d, bool is_vp7)
197 {
198  LOCAL_ALIGNED_16(int16_t, dc, [4 * 4]);
199  LOCAL_ALIGNED_16(int16_t, dc0, [4 * 4]);
200  LOCAL_ALIGNED_16(int16_t, dc1, [4 * 4]);
201  int16_t block[4][4][16];
202  LOCAL_ALIGNED_16(int16_t, block0, [4], [4][16]);
203  LOCAL_ALIGNED_16(int16_t, block1, [4], [4][16]);
204  int dc_only;
205  int blockx, blocky;
206  declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t block[4][4][16], int16_t dc[16]);
207 
208  for (blocky = 0; blocky < 4; blocky++) {
209  for (blockx = 0; blockx < 4; blockx++) {
210  uint8_t src[16], dst[16];
211  randomize_buffers(src, dst, 4, block[blocky][blockx]);
212 
213  dct4x4(block[blocky][blockx]);
214  dc[blocky * 4 + blockx] = block[blocky][blockx][0];
215  block[blocky][blockx][0] = rnd();
216  }
217  }
218  wht4x4(dc);
219 
220  for (dc_only = 0; dc_only <= 1; dc_only++) {
221  void (*idct)(int16_t [4][4][16], int16_t [16]) = dc_only ? d->vp8_luma_dc_wht_dc : d->vp8_luma_dc_wht;
222 
223  if (check_func(idct, "vp%d_luma_dc_wht%s", 8 - is_vp7, dc_only ? "_dc" : "")) {
224  if (dc_only) {
225  memset(dc0, 0, 16 * sizeof(int16_t));
226  dc0[0] = dc[0];
227  } else {
228  memcpy(dc0, dc, 16 * sizeof(int16_t));
229  }
230  memcpy(dc1, dc0, 16 * sizeof(int16_t));
231  memcpy(block0, block, 4 * 4 * 16 * sizeof(int16_t));
232  memcpy(block1, block, 4 * 4 * 16 * sizeof(int16_t));
233  call_ref(block0, dc0);
234  call_new(block1, dc1);
235  if (memcmp(block0, block1, 4 * 4 * 16 * sizeof(int16_t)) ||
236  memcmp(dc0, dc1, 16 * sizeof(int16_t)))
237  fail();
238  bench_new(block1, dc1);
239  }
240  }
241 }
242 
243 #define SRC_BUF_STRIDE 32
244 #define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE)
245 // The mc subpixel interpolation filter needs the 2 previous pixels in either
246 // direction, the +1 is to make sure the actual load addresses always are
247 // unaligned.
248 #define src (buf + 2 * SRC_BUF_STRIDE + 2 + 1)
249 
250 #undef randomize_buffers
251 #define randomize_buffers() \
252  do { \
253  int k; \
254  for (k = 0; k < SRC_BUF_SIZE; k += 4) { \
255  AV_WN32A(buf + k, rnd()); \
256  } \
257  } while (0)
258 
259 static void check_mc(VP8DSPContext *d)
260 {
261  LOCAL_ALIGNED_16(uint8_t, buf, [32 * 32]);
262  BUF_RECT(uint8_t, dst0, 16, 16);
263  BUF_RECT(uint8_t, dst1, 16, 16);
264  int type, k, dx, dy;
265  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t,
266  const uint8_t *, ptrdiff_t, int, int, int);
267 
268  for (type = 0; type < 2; type++) {
269  for (k = 1; k < 8; k++) {
270  int hsize = k / 3;
271  int size = 16 >> hsize;
272  int height = (size << 1) >> (k % 3);
273  for (dy = 0; dy < 3; dy++) {
274  for (dx = 0; dx < 3; dx++) {
275  char str[100];
277 
278  if (dx || dy) {
279  if (type == 0) {
280  static const char *dx_names[] = { "", "h4", "h6" };
281  static const char *dy_names[] = { "", "v4", "v6" };
282  snprintf(str, sizeof(str), "epel%d_%s%s", size, dx_names[dx], dy_names[dy]);
283  } else {
284  snprintf(str, sizeof(str), "bilin%d_%s%s", size, dx ? "h" : "", dy ? "v" : "");
285  }
286  } else {
287  snprintf(str, sizeof(str), "pixels%d", size);
288  }
289 
290  if (check_func(func, "vp8_put_%s", str)) {
291  int mx, my;
292  int i;
293  if (type == 0) {
294  mx = dx == 2 ? 2 + 2 * (rnd() % 3) : dx == 1 ? 1 + 2 * (rnd() % 4) : 0;
295  my = dy == 2 ? 2 + 2 * (rnd() % 3) : dy == 1 ? 1 + 2 * (rnd() % 4) : 0;
296  } else {
297  mx = dx ? 1 + (rnd() % 7) : 0;
298  my = dy ? 1 + (rnd() % 7) : 0;
299  }
301  for (i = -2; i <= 3; i++) {
302  int val = (i == -1 || i == 2) ? 0 : 0xff;
303  // Set pixels in the first row and column to the maximum pattern,
304  // to test for potential overflows in the filter.
305  src[i ] = val;
306  src[i * SRC_BUF_STRIDE] = val;
307  }
308  CLEAR_BUF_RECT(dst0);
309  CLEAR_BUF_RECT(dst1);
310  call_ref(dst0, dst0_stride, src, SRC_BUF_STRIDE, height, mx, my);
311  call_new(dst1, dst1_stride, src, SRC_BUF_STRIDE, height, mx, my);
312  checkasm_check_padded(uint8_t, dst0, dst0_stride, dst1, dst1_stride, size, height, "dst");
314  }
315  }
316  }
317  }
318  }
319 }
320 
321 #undef randomize_buffers
322 
323 #define setpx(a, b, c) buf[(a) + (b) * jstride] = av_clip_uint8(c)
324 // Set the pixel to c +/- [0,d]
325 #define setdx(a, b, c, d) setpx(a, b, c - (d) + (rnd() % ((d) * 2 + 1)))
326 // Set the pixel to c +/- [d,d+e] (making sure it won't be clipped)
327 #define setdx2(a, b, o, c, d, e) setpx(a, b, o = c + ((d) + (rnd() % (e))) * (c >= 128 ? -1 : 1))
328 
329 static void randomize_loopfilter_buffers(int lineoff, int str,
330  int dir, int flim_E, int flim_I,
331  int hev_thresh, uint8_t *buf,
332  int force_hev)
333 {
334  uint32_t mask = 0xff;
335  int off = dir ? lineoff : lineoff * str;
336  int istride = dir ? 1 : str;
337  int jstride = dir ? str : 1;
338  int i;
339  for (i = 0; i < 8; i += 2) {
340  // Row 0 will trigger hev for q0/q1, row 2 will trigger hev for p0/p1,
341  // rows 4 and 6 will not trigger hev.
342  // force_hev 1 will make sure all rows trigger hev, while force_hev -1
343  // makes none of them trigger it.
344  int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
345  setpx(idx, 0, q0 = rnd() & mask);
346  if (i == 0 && force_hev >= 0 || force_hev > 0)
347  setdx2(idx, 1, q1, q0, hev_thresh + 1, flim_I - hev_thresh - 1);
348  else
349  setdx(idx, 1, q1 = q0, hev_thresh);
350  setdx(idx, 2, q2 = q1, flim_I);
351  setdx(idx, 3, q2, flim_I);
352  setdx(idx, -1, p0 = q0, flim_E >> 2);
353  if (i == 2 && force_hev >= 0 || force_hev > 0)
354  setdx2(idx, -2, p1, p0, hev_thresh + 1, flim_I - hev_thresh - 1);
355  else
356  setdx(idx, -2, p1 = p0, hev_thresh);
357  setdx(idx, -3, p2 = p1, flim_I);
358  setdx(idx, -4, p2, flim_I);
359  }
360 }
361 
362 // Fill the buffer with random pixels
363 static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
364 {
365  int x, y;
366  for (y = 0; y < h; y++)
367  for (x = 0; x < w; x++)
368  buf[y * stride + x] = rnd() & 0xff;
369 }
370 
371 #define randomize_buffers(buf, lineoff, str, force_hev) \
372  randomize_loopfilter_buffers(lineoff, str, dir, flim_E, flim_I, hev_thresh, buf, force_hev)
373 
374 static void check_loopfilter_16y(VP8DSPContext *d, bool is_vp7)
375 {
376  LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
377  LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
378  int dir, edge, force_hev;
379  int flim_E = 20, flim_I = 10, hev_thresh = 7;
380  declare_func(void, uint8_t *, ptrdiff_t, int, int, int);
381 
382  for (dir = 0; dir < 2; dir++) {
383  int midoff = dir ? 4 * 16 : 4;
384  int midoff_aligned = dir ? 4 * 16 : 16;
385  uint8_t *buf0 = base0 + midoff_aligned;
386  uint8_t *buf1 = base1 + midoff_aligned;
387  for (edge = 0; edge < 2; edge++) {
388  void (*func)(uint8_t *, ptrdiff_t, int, int, int) = NULL;
389  switch (dir << 1 | edge) {
390  case (0 << 1) | 0: func = d->vp8_h_loop_filter16y; break;
391  case (1 << 1) | 0: func = d->vp8_v_loop_filter16y; break;
392  case (0 << 1) | 1: func = d->vp8_h_loop_filter16y_inner; break;
393  case (1 << 1) | 1: func = d->vp8_v_loop_filter16y_inner; break;
394  }
395  if (check_func(func, "vp%d_loop_filter16y%s_%s", 8 - is_vp7, edge ? "_inner" : "", dir ? "v" : "h")) {
396  for (force_hev = -1; force_hev <= 1; force_hev++) {
397  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
398  randomize_buffers(buf0, 0, 16, force_hev);
399  randomize_buffers(buf0, 8, 16, force_hev);
400  memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
401  call_ref(buf0, 16, flim_E, flim_I, hev_thresh);
402  call_new(buf1, 16, flim_E, flim_I, hev_thresh);
403  if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
404  fail();
405  }
406  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
407  randomize_buffers(buf0, 0, 16, 0);
408  randomize_buffers(buf0, 8, 16, 0);
409  bench_new(buf0, 16, flim_E, flim_I, hev_thresh);
410  }
411  }
412  }
413 }
414 
415 static void check_loopfilter_8uv(VP8DSPContext *d, bool is_vp7)
416 {
417  LOCAL_ALIGNED_16(uint8_t, base0u, [32 + 16 * 16]);
418  LOCAL_ALIGNED_16(uint8_t, base0v, [32 + 16 * 16]);
419  LOCAL_ALIGNED_16(uint8_t, base1u, [32 + 16 * 16]);
420  LOCAL_ALIGNED_16(uint8_t, base1v, [32 + 16 * 16]);
421  int dir, edge, force_hev;
422  int flim_E = 20, flim_I = 10, hev_thresh = 7;
423  declare_func(void, uint8_t *, uint8_t *, ptrdiff_t, int, int, int);
424 
425  for (dir = 0; dir < 2; dir++) {
426  int midoff = dir ? 4 * 16 : 4;
427  int midoff_aligned = dir ? 4 * 16 : 16;
428  uint8_t *buf0u = base0u + midoff_aligned;
429  uint8_t *buf0v = base0v + midoff_aligned;
430  uint8_t *buf1u = base1u + midoff_aligned;
431  uint8_t *buf1v = base1v + midoff_aligned;
432  for (edge = 0; edge < 2; edge++) {
433  void (*func)(uint8_t *, uint8_t *, ptrdiff_t, int, int, int) = NULL;
434  switch (dir << 1 | edge) {
435  case (0 << 1) | 0: func = d->vp8_h_loop_filter8uv; break;
436  case (1 << 1) | 0: func = d->vp8_v_loop_filter8uv; break;
437  case (0 << 1) | 1: func = d->vp8_h_loop_filter8uv_inner; break;
438  case (1 << 1) | 1: func = d->vp8_v_loop_filter8uv_inner; break;
439  }
440  if (check_func(func, "vp%d_loop_filter8uv%s_%s", 8 - is_vp7, edge ? "_inner" : "", dir ? "v" : "h")) {
441  for (force_hev = -1; force_hev <= 1; force_hev++) {
442  fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
443  fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
444  randomize_buffers(buf0u, 0, 16, force_hev);
445  randomize_buffers(buf0v, 0, 16, force_hev);
446  memcpy(buf1u - midoff, buf0u - midoff, 16 * 16);
447  memcpy(buf1v - midoff, buf0v - midoff, 16 * 16);
448 
449  call_ref(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
450  call_new(buf1u, buf1v, 16, flim_E, flim_I, hev_thresh);
451  if (memcmp(buf0u - midoff, buf1u - midoff, 16 * 16) ||
452  memcmp(buf0v - midoff, buf1v - midoff, 16 * 16))
453  fail();
454  }
455  fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
456  fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
457  randomize_buffers(buf0u, 0, 16, 0);
458  randomize_buffers(buf0v, 0, 16, 0);
459  bench_new(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
460  }
461  }
462  }
463 }
464 
465 static void check_loopfilter_simple(VP8DSPContext *d, bool is_vp7)
466 {
467  LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
468  LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
469  int dir;
470  int flim_E = 20, flim_I = 30, hev_thresh = 0;
471  declare_func(void, uint8_t *, ptrdiff_t, int);
472 
473  for (dir = 0; dir < 2; dir++) {
474  int midoff = dir ? 4 * 16 : 4;
475  int midoff_aligned = dir ? 4 * 16 : 16;
476  uint8_t *buf0 = base0 + midoff_aligned;
477  uint8_t *buf1 = base1 + midoff_aligned;
478  void (*func)(uint8_t *, ptrdiff_t, int) = dir ? d->vp8_v_loop_filter_simple : d->vp8_h_loop_filter_simple;
479  if (check_func(func, "vp%d_loop_filter_simple_%s", 8 - is_vp7, dir ? "v" : "h")) {
480  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
481  randomize_buffers(buf0, 0, 16, -1);
482  randomize_buffers(buf0, 8, 16, -1);
483  memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
484  call_ref(buf0, 16, flim_E);
485  call_new(buf1, 16, flim_E);
486  if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
487  fail();
488  bench_new(buf0, 16, flim_E);
489  }
490  }
491 }
492 
493 static void checkasm_check_vp78dsp(VP8DSPContext *d, bool is_vp7)
494 {
495 #if CONFIG_VP7_DECODER
496  if (is_vp7)
497  ff_vp7dsp_init(d);
498  else
499 #endif
500  ff_vp8dsp_init(d);
501  check_idct(d, is_vp7);
502  check_idct_dc4(d, is_vp7);
503  check_luma_dc_wht(d, is_vp7);
504  report("idct");
505  check_loopfilter_16y(d, is_vp7);
506  check_loopfilter_8uv(d, is_vp7);
507  check_loopfilter_simple(d, is_vp7);
508  report("loopfilter");
509 }
510 
512 {
513  VP8DSPContext d;
514 
515  ff_vp78dsp_init(&d);
516  check_mc(&d);
517  report("mc");
518  checkasm_check_vp78dsp(&d, false);
519 #if CONFIG_VP7_DECODER
520  checkasm_check_vp78dsp(&d, true);
521 #endif
522 }
BUF_RECT
#define BUF_RECT(type, name, w, h)
Definition: checkasm.h:378
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
check_loopfilter_8uv
static void check_loopfilter_8uv(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:415
VP8DSPContext::vp8_h_loop_filter8uv
void(* vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:54
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:190
VP8DSPContext::vp8_h_loop_filter8uv_inner
void(* vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:65
q1
static const uint8_t q1[256]
Definition: twofish.c:100
mem_internal.h
VP8DSPContext::vp8_v_loop_filter8uv
void(* vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:52
mask
int mask
Definition: mediacodecdec_common.c:154
check_luma_dc_wht
static void check_luma_dc_wht(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:196
check_idct_dc4
static void check_idct_dc4(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:156
w
uint8_t w
Definition: llviddspenc.c:38
check_func
#define check_func(func,...)
Definition: checkasm.h:184
VP8DSPContext::vp8_v_loop_filter16y
void(* vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:48
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
wht4x4
static void wht4x4(int16_t *coef)
Definition: vp8dsp.c:72
c1
static const uint64_t c1
Definition: murmur3.c:52
call_ref
#define call_ref(...)
Definition: checkasm.h:199
checkasm_check_vp78dsp
static void checkasm_check_vp78dsp(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:493
mx
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
Definition: dsp.h:53
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:2034
randomize_buffers
#define randomize_buffers(src, dst, stride, coef)
Definition: vp8dsp.c:371
fail
#define fail()
Definition: checkasm.h:193
check_loopfilter_16y
static void check_loopfilter_16y(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:374
checkasm.h
val
static double val(void *priv, double ch)
Definition: aeval.c:77
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
checkasm_check_padded
#define checkasm_check_padded(...)
Definition: checkasm.h:416
idct
static void idct(int16_t block[64])
Definition: 4xm.c:167
ff_vp7dsp_init
void ff_vp7dsp_init(VP8DSPContext *c)
rnd
#define rnd()
Definition: checkasm.h:177
ff_vp8dsp_init
void ff_vp8dsp_init(VP8DSPContext *c)
VP8DSPContext::vp8_v_loop_filter16y_inner
void(* vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:58
vp8dsp.h
intreadwrite.h
randomize_loopfilter_buffers
static void randomize_loopfilter_buffers(int lineoff, int str, int dir, int flim_E, int flim_I, int hev_thresh, uint8_t *buf, int force_hev)
Definition: vp8dsp.c:329
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
VP8DSPContext::vp8_h_loop_filter_simple
void(* vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
Definition: vp8dsp.h:70
check_loopfilter_simple
static void check_loopfilter_simple(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:465
q0
static const uint8_t q0[256]
Definition: twofish.c:81
my
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
Definition: dsp.h:53
VP8DSPContext::vp8_v_loop_filter_simple
void(* vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
Definition: vp8dsp.h:69
if
if(ret)
Definition: filter_design.txt:179
checkasm_check_vp8dsp
void checkasm_check_vp8dsp(void)
Definition: vp8dsp.c:511
call_new
#define call_new(...)
Definition: checkasm.h:302
NULL
#define NULL
Definition: coverity.c:32
VP8DSPContext::vp8_h_loop_filter16y
void(* vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:50
vp8_mc_func
void(* vp8_mc_func)(uint8_t *dst, ptrdiff_t dstStride, const uint8_t *src, ptrdiff_t srcStride, int h, int x, int y)
Definition: vp8dsp.h:33
VP8DSPContext::put_vp8_bilinear_pixels_tab
vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3]
Definition: vp8dsp.h:81
VP8DSPContext::vp8_h_loop_filter16y_inner
void(* vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:60
VP8DSPContext::vp8_luma_dc_wht
void(* vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16])
Definition: vp8dsp.h:38
check_mc
static void check_mc(VP8DSPContext *d)
Definition: vp8dsp.c:259
setdx
#define setdx(a, b, c, d)
Definition: vp8dsp.c:325
VP8DSPContext
Definition: vp8dsp.h:37
setdx2
#define setdx2(a, b, o, c, d, e)
Definition: vp8dsp.c:327
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
size
int size
Definition: twinvq_data.h:10344
VP8DSPContext::vp8_idct_dc_add
void(* vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
Definition: vp8dsp.h:41
block1
static int16_t block1[64]
Definition: dct.c:121
VP8DSPContext::vp8_v_loop_filter8uv_inner
void(* vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:62
report
#define report
Definition: checkasm.h:196
bench_new
#define bench_new(...)
Definition: checkasm.h:373
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
VP8DSPContext::put_vp8_epel_pixels_tab
vp8_mc_func put_vp8_epel_pixels_tab[3][3][3]
first dimension: 4-log2(width) second dimension: 0 if no vertical interpolation is needed; 1 4-tap ve...
Definition: vp8dsp.h:80
common.h
stride
#define stride
Definition: h264pred_template.c:536
dct4x4
static void dct4x4(int16_t *coef)
Definition: vp8dsp.c:47
fill_loopfilter_buffers
static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
Definition: vp8dsp.c:363
VP8DSPContext::vp8_idct_dc_add4uv
void(* vp8_idct_dc_add4uv)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
Definition: vp8dsp.h:44
SRC_BUF_STRIDE
#define SRC_BUF_STRIDE
Definition: vp8dsp.c:243
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:30
VP8DSPContext::vp8_idct_dc_add4y
void(* vp8_idct_dc_add4y)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
Definition: vp8dsp.h:42
VP8DSPContext::vp8_luma_dc_wht_dc
void(* vp8_luma_dc_wht_dc)(int16_t block[4][4][16], int16_t dc[16])
Definition: vp8dsp.h:39
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:188
VP8DSPContext::vp8_idct_add
void(* vp8_idct_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
Definition: vp8dsp.h:40
setpx
#define setpx(a, b, c)
Definition: vp8dsp.c:323
check_idct
static void check_idct(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:113
CLEAR_BUF_RECT
#define CLEAR_BUF_RECT(name)
Definition: checkasm.h:390
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
a1
static double a1(void *priv, double x, double y)
Definition: vf_xfade.c:2029
h
h
Definition: vp9dsp_template.c:2070
snprintf
#define snprintf
Definition: snprintf.h:34
src
#define src
Definition: vp8dsp.c:248
ff_vp78dsp_init
av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
Definition: vp8dsp.c:668