FFmpeg
hevc_pel.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Henrik Gramner
3  * Copyright (c) 2021 Josh Dekker
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <string.h>
23 #include "checkasm.h"
24 #include "libavcodec/hevcdsp.h"
25 #include "libavutil/common.h"
26 #include "libavutil/internal.h"
27 #include "libavutil/intreadwrite.h"
28 
29 static const uint32_t pixel_mask[] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
30 static const uint32_t pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
31 static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 };
32 static const int weights[] = { 0, 128, 255, -1 };
33 static const int denoms[] = {0, 7, 12, -1 };
34 static const int offsets[] = {0, 255, -1 };
35 
36 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
37 #define BUF_SIZE (2 * MAX_PB_SIZE * (2 * 4 + MAX_PB_SIZE))
38 
39 #define randomize_buffers() \
40  do { \
41  uint32_t mask = pixel_mask[bit_depth - 8]; \
42  int k; \
43  for (k = 0; k < BUF_SIZE + SRC_EXTRA; k += 4) { \
44  uint32_t r = rnd() & mask; \
45  AV_WN32A(buf0 + k, r); \
46  AV_WN32A(buf1 + k, r); \
47  if (k >= BUF_SIZE) \
48  continue; \
49  r = rnd(); \
50  AV_WN32A(dst0 + k, r); \
51  AV_WN32A(dst1 + k, r); \
52  } \
53  } while (0)
54 
55 #define randomize_buffers_ref() \
56  randomize_buffers(); \
57  do { \
58  uint32_t mask = pixel_mask16[bit_depth - 8]; \
59  int k; \
60  for (k = 0; k < BUF_SIZE; k += 2) { \
61  uint32_t r = rnd() & mask; \
62  AV_WN32A(ref0 + k, r); \
63  AV_WN32A(ref1 + k, r); \
64  } \
65  } while (0)
66 
67 #define src0 (buf0 + 2 * 4 * MAX_PB_SIZE) /* hevc qpel functions read data from negative src pointer offsets */
68 #define src1 (buf1 + 2 * 4 * MAX_PB_SIZE)
69 
70 /* FIXME: Does the need for SRC_EXTRA for these tests indicate a bug? */
71 #define SRC_EXTRA 8
72 
73 static void checkasm_check_hevc_qpel(void)
74 {
75  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
76  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
77  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
78  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
79 
81  int size, bit_depth, i, j, row;
82  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
83  int height, intptr_t mx, intptr_t my, int width);
84 
85  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
87 
88  for (i = 0; i < 2; i++) {
89  for (j = 0; j < 2; j++) {
90  for (size = 1; size < 10; size++) {
91  const char *type;
92  switch ((j << 1) | i) {
93  case 0: type = "pel_pixels"; break; // 0 0
94  case 1: type = "qpel_h"; break; // 0 1
95  case 2: type = "qpel_v"; break; // 1 0
96  case 3: type = "qpel_hv"; break; // 1 1
97  }
98 
99  if (check_func(h.put_hevc_qpel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
100  int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
102  call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
103  call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
104  for (row = 0; row < size[sizes]; row++) {
105  if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL))
106  fail();
107  }
108  bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
109  }
110  }
111  }
112  }
113  }
114  report("qpel");
115 }
116 
118 {
119  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
120  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
121  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
122  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
123 
125  int size, bit_depth, i, j;
126  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
127  int height, intptr_t mx, intptr_t my, int width);
128 
129  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
131 
132  for (i = 0; i < 2; i++) {
133  for (j = 0; j < 2; j++) {
134  for (size = 1; size < 10; size++) {
135  const char *type;
136  switch ((j << 1) | i) {
137  case 0: type = "pel_uni_pixels"; break; // 0 0
138  case 1: type = "qpel_uni_h"; break; // 0 1
139  case 2: type = "qpel_uni_v"; break; // 1 0
140  case 3: type = "qpel_uni_hv"; break; // 1 1
141  }
142 
143  if (check_func(h.put_hevc_qpel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
147  if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
148  fail();
150  }
151  }
152  }
153  }
154  }
155  report("qpel_uni");
156 }
157 
159 {
160  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
161  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
162  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
163  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
164 
166  int size, bit_depth, i, j;
167  const int *denom, *wx, *ox;
168  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
169  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
170 
171  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
173 
174  for (i = 0; i < 2; i++) {
175  for (j = 0; j < 2; j++) {
176  for (size = 1; size < 10; size++) {
177  const char *type;
178  switch ((j << 1) | i) {
179  case 0: type = "pel_uni_w_pixels"; break; // 0 0
180  case 1: type = "qpel_uni_w_h"; break; // 0 1
181  case 2: type = "qpel_uni_w_v"; break; // 1 0
182  case 3: type = "qpel_uni_w_hv"; break; // 1 1
183  }
184 
185  if (check_func(h.put_hevc_qpel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
186  for (denom = denoms; *denom >= 0; denom++) {
187  for (wx = weights; *wx >= 0; wx++) {
188  for (ox = offsets; *ox >= 0; ox++) {
190  call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
191  call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
192  if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
193  fail();
194  bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
195  }
196  }
197  }
198  }
199  }
200  }
201  }
202  }
203  report("qpel_uni_w");
204 }
205 
207 {
208  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
209  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
210  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
211  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
212  LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
213  LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
214 
216  int size, bit_depth, i, j;
217  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
218  int16_t *src2,
219  int height, intptr_t mx, intptr_t my, int width);
220 
221  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
223 
224  for (i = 0; i < 2; i++) {
225  for (j = 0; j < 2; j++) {
226  for (size = 1; size < 10; size++) {
227  const char *type;
228  switch ((j << 1) | i) {
229  case 0: type = "pel_bi_pixels"; break; // 0 0
230  case 1: type = "qpel_bi_h"; break; // 0 1
231  case 2: type = "qpel_bi_v"; break; // 1 0
232  case 3: type = "qpel_bi_hv"; break; // 1 1
233  }
234 
235  if (check_func(h.put_hevc_qpel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
237  call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]);
238  call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
239  if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
240  fail();
242  }
243  }
244  }
245  }
246  }
247  report("qpel_bi");
248 }
249 
251 {
252  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
253  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
254  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
255  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
256  LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
257  LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
258 
260  int size, bit_depth, i, j;
261  const int *denom, *wx, *ox;
262  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
263  int16_t *src2,
264  int height, int denom, int wx0, int wx1,
265  int ox0, int ox1, intptr_t mx, intptr_t my, int width);
266 
267  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
269 
270  for (i = 0; i < 2; i++) {
271  for (j = 0; j < 2; j++) {
272  for (size = 1; size < 10; size++) {
273  const char *type;
274  switch ((j << 1) | i) {
275  case 0: type = "pel_bi_w_pixels"; break; // 0 0
276  case 1: type = "qpel_bi_w_h"; break; // 0 1
277  case 2: type = "qpel_bi_w_v"; break; // 1 0
278  case 3: type = "qpel_bi_w_hv"; break; // 1 1
279  }
280 
281  if (check_func(h.put_hevc_qpel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
282  for (denom = denoms; *denom >= 0; denom++) {
283  for (wx = weights; *wx >= 0; wx++) {
284  for (ox = offsets; *ox >= 0; ox++) {
286  call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
287  call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
288  if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
289  fail();
290  bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
291  }
292  }
293  }
294  }
295  }
296  }
297  }
298  }
299  report("qpel_bi_w");
300 }
301 
302 #undef SRC_EXTRA
303 #define SRC_EXTRA 0
304 
305 static void checkasm_check_hevc_epel(void)
306 {
307  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
308  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
309  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
310  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
311 
313  int size, bit_depth, i, j, row;
314  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
315  int height, intptr_t mx, intptr_t my, int width);
316 
317  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
319 
320  for (i = 0; i < 2; i++) {
321  for (j = 0; j < 2; j++) {
322  for (size = 1; size < 10; size++) {
323  const char *type;
324  switch ((j << 1) | i) {
325  case 0: type = "pel_pixels"; break; // 0 0
326  case 1: type = "epel_h"; break; // 0 1
327  case 2: type = "epel_v"; break; // 1 0
328  case 3: type = "epel_hv"; break; // 1 1
329  }
330 
331  if (check_func(h.put_hevc_epel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
332  int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
334  call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
335  call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
336  for (row = 0; row < size[sizes]; row++) {
337  if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL))
338  fail();
339  }
340  bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
341  }
342  }
343  }
344  }
345  }
346  report("epel");
347 }
348 
350 {
351  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
352  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
353  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
354  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
355 
357  int size, bit_depth, i, j;
358  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
359  int height, intptr_t mx, intptr_t my, int width);
360 
361  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
363 
364  for (i = 0; i < 2; i++) {
365  for (j = 0; j < 2; j++) {
366  for (size = 1; size < 10; size++) {
367  const char *type;
368  switch ((j << 1) | i) {
369  case 0: type = "pel_uni_pixels"; break; // 0 0
370  case 1: type = "epel_uni_h"; break; // 0 1
371  case 2: type = "epel_uni_v"; break; // 1 0
372  case 3: type = "epel_uni_hv"; break; // 1 1
373  }
374 
375  if (check_func(h.put_hevc_epel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
379  if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
380  fail();
382  }
383  }
384  }
385  }
386  }
387  report("epel_uni");
388 }
389 
391 {
392  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
393  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
394  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
395  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
396 
398  int size, bit_depth, i, j;
399  const int *denom, *wx, *ox;
400  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
401  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
402 
403  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
405 
406  for (i = 0; i < 2; i++) {
407  for (j = 0; j < 2; j++) {
408  for (size = 1; size < 10; size++) {
409  const char *type;
410  switch ((j << 1) | i) {
411  case 0: type = "pel_uni_w_pixels"; break; // 0 0
412  case 1: type = "epel_uni_w_h"; break; // 0 1
413  case 2: type = "epel_uni_w_v"; break; // 1 0
414  case 3: type = "epel_uni_w_hv"; break; // 1 1
415  }
416 
417  if (check_func(h.put_hevc_epel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
418  for (denom = denoms; *denom >= 0; denom++) {
419  for (wx = weights; *wx >= 0; wx++) {
420  for (ox = offsets; *ox >= 0; ox++) {
422  call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
423  call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
424  if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
425  fail();
426  bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
427  }
428  }
429  }
430  }
431  }
432  }
433  }
434  }
435  report("epel_uni_w");
436 }
437 
439 {
440  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
441  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
442  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
443  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
444  LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
445  LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
446 
448  int size, bit_depth, i, j;
449  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
450  int16_t *src2,
451  int height, intptr_t mx, intptr_t my, int width);
452 
453  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
455 
456  for (i = 0; i < 2; i++) {
457  for (j = 0; j < 2; j++) {
458  for (size = 1; size < 10; size++) {
459  const char *type;
460  switch ((j << 1) | i) {
461  case 0: type = "pel_bi_pixels"; break; // 0 0
462  case 1: type = "epel_bi_h"; break; // 0 1
463  case 2: type = "epel_bi_v"; break; // 1 0
464  case 3: type = "epel_bi_hv"; break; // 1 1
465  }
466 
467  if (check_func(h.put_hevc_epel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
469  call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]);
470  call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
471  if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
472  fail();
474  }
475  }
476  }
477  }
478  }
479  report("epel_bi");
480 }
481 
483 {
484  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
485  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
486  LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
487  LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
488  LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
489  LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
490 
492  int size, bit_depth, i, j;
493  const int *denom, *wx, *ox;
494  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
495  int16_t *src2,
496  int height, int denom, int wx0, int wx1,
497  int ox0, int ox1, intptr_t mx, intptr_t my, int width);
498 
499  for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
501 
502  for (i = 0; i < 2; i++) {
503  for (j = 0; j < 2; j++) {
504  for (size = 1; size < 10; size++) {
505  const char *type;
506  switch ((j << 1) | i) {
507  case 0: type = "pel_bi_w_pixels"; break; // 0 0
508  case 1: type = "epel_bi_w_h"; break; // 0 1
509  case 2: type = "epel_bi_w_v"; break; // 1 0
510  case 3: type = "epel_bi_w_hv"; break; // 1 1
511  }
512 
513  if (check_func(h.put_hevc_epel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
514  for (denom = denoms; *denom >= 0; denom++) {
515  for (wx = weights; *wx >= 0; wx++) {
516  for (ox = offsets; *ox >= 0; ox++) {
518  call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
519  call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
520  if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
521  fail();
522  bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
523  }
524  }
525  }
526  }
527  }
528  }
529  }
530  }
531  report("epel_bi_w");
532 }
533 
535 {
546 }
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:131
bit_depth
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:227
checkasm_check_hevc_qpel_bi
static void checkasm_check_hevc_qpel_bi(void)
Definition: hevc_pel.c:206
checkasm_check_hevc_epel_bi_w
static void checkasm_check_hevc_epel_bi_w(void)
Definition: hevc_pel.c:482
checkasm_check_hevc_epel_uni
static void checkasm_check_hevc_epel_uni(void)
Definition: hevc_pel.c:349
src0
#define src0
Definition: hevc_pel.c:67
pixel_mask16
static const uint32_t pixel_mask16[]
Definition: hevc_pel.c:30
check_func
#define check_func(func,...)
Definition: checkasm.h:125
checkasm_check_hevc_epel
static void checkasm_check_hevc_epel(void)
Definition: hevc_pel.c:305
call_ref
#define call_ref(...)
Definition: checkasm.h:140
fail
#define fail()
Definition: checkasm.h:134
checkasm.h
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
checkasm_check_hevc_qpel
static void checkasm_check_hevc_qpel(void)
Definition: hevc_pel.c:73
SRC_EXTRA
#define SRC_EXTRA
Definition: hevc_pel.c:303
checkasm_check_hevc_pel
void checkasm_check_hevc_pel(void)
Definition: hevc_pel.c:534
width
#define width
intreadwrite.h
offsets
static const int offsets[]
Definition: hevc_pel.c:34
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: hevc_pel.c:36
hevcdsp.h
checkasm_check_hevc_qpel_uni_w
static void checkasm_check_hevc_qpel_uni_w(void)
Definition: hevc_pel.c:158
call_new
#define call_new(...)
Definition: checkasm.h:222
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:135
size
int size
Definition: twinvq_data.h:10344
height
#define height
HEVCDSPContext
Definition: hevcdsp.h:47
report
#define report
Definition: checkasm.h:137
bench_new
#define bench_new(...)
Definition: checkasm.h:287
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
internal.h
MAX_PB_SIZE
#define MAX_PB_SIZE
Definition: hevcdsp.h:32
src2
const pixel * src2
Definition: h264pred_template.c:422
weights
static const int weights[]
Definition: hevc_pel.c:32
common.h
checkasm_check_hevc_epel_bi
static void checkasm_check_hevc_epel_bi(void)
Definition: hevc_pel.c:438
checkasm_check_hevc_qpel_uni
static void checkasm_check_hevc_qpel_uni(void)
Definition: hevc_pel.c:117
ff_hevc_dsp_init
void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
Definition: hevcdsp.c:126
randomize_buffers_ref
#define randomize_buffers_ref()
Definition: hevc_pel.c:55
pixel_mask
static const uint32_t pixel_mask[]
Definition: hevc_pel.c:29
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
denoms
static const int denoms[]
Definition: hevc_pel.c:33
AV_CPU_FLAG_MMXEXT
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:30
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
BUF_SIZE
#define BUF_SIZE
Definition: hevc_pel.c:37
sizes
static const int sizes[]
Definition: hevc_pel.c:31
checkasm_check_hevc_qpel_bi_w
static void checkasm_check_hevc_qpel_bi_w(void)
Definition: hevc_pel.c:250
src1
#define src1
Definition: hevc_pel.c:68
h
h
Definition: vp9dsp_template.c:2038
checkasm_check_hevc_epel_uni_w
static void checkasm_check_hevc_epel_uni_w(void)
Definition: hevc_pel.c:390
randomize_buffers
#define randomize_buffers()
Definition: hevc_pel.c:39