FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hevcdsp_template.c
Go to the documentation of this file.
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "get_bits.h"
24 #include "hevc.h"
25 
26 #include "bit_depth_template.c"
27 #include "hevcdsp.h"
28 
29 
30 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
31  GetBitContext *gb, int pcm_bit_depth)
32 {
33  int x, y;
34  pixel *dst = (pixel *)_dst;
35 
36  stride /= sizeof(pixel);
37 
38  for (y = 0; y < height; y++) {
39  for (x = 0; x < width; x++)
40  dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
41  dst += stride;
42  }
43 }
44 
45 static av_always_inline void FUNC(transquant_bypass)(uint8_t *_dst, int16_t *coeffs,
46  ptrdiff_t stride, int size)
47 {
48  int x, y;
49  pixel *dst = (pixel *)_dst;
50 
51  stride /= sizeof(pixel);
52 
53  for (y = 0; y < size; y++) {
54  for (x = 0; x < size; x++) {
55  dst[x] = av_clip_pixel(dst[x] + *coeffs);
56  coeffs++;
57  }
58  dst += stride;
59  }
60 }
61 
62 static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
63  ptrdiff_t stride)
64 {
65  FUNC(transquant_bypass)(_dst, coeffs, stride, 4);
66 }
67 
68 static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
69  ptrdiff_t stride)
70 {
71  FUNC(transquant_bypass)(_dst, coeffs, stride, 8);
72 }
73 
74 static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
75  ptrdiff_t stride)
76 {
77  FUNC(transquant_bypass)(_dst, coeffs, stride, 16);
78 }
79 
80 static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
81  ptrdiff_t stride)
82 {
83  FUNC(transquant_bypass)(_dst, coeffs, stride, 32);
84 }
85 
86 
87 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
88 {
89  int16_t *coeffs = (int16_t *) _coeffs;
90  int x, y;
91  int size = 1 << log2_size;
92 
93  if (mode) {
94  coeffs += size;
95  for (y = 0; y < size - 1; y++) {
96  for (x = 0; x < size; x++)
97  coeffs[x] += coeffs[x - size];
98  coeffs += size;
99  }
100  } else {
101  for (y = 0; y < size; y++) {
102  for (x = 1; x < size; x++)
103  coeffs[x] += coeffs[x - 1];
104  coeffs += size;
105  }
106  }
107 }
108 
109 static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
110 {
111  int shift = 15 - BIT_DEPTH - log2_size;
112  int x, y;
113  int size = 1 << log2_size;
114  int16_t *coeffs = _coeffs;
115 
116 
117  if (shift > 0) {
118  int offset = 1 << (shift - 1);
119  for (y = 0; y < size; y++) {
120  for (x = 0; x < size; x++) {
121  *coeffs = (*coeffs + offset) >> shift;
122  coeffs++;
123  }
124  }
125  } else {
126  for (y = 0; y < size; y++) {
127  for (x = 0; x < size; x++) {
128  *coeffs = *coeffs << -shift;
129  coeffs++;
130  }
131  }
132  }
133 }
134 
135 #define SET(dst, x) (dst) = (x)
136 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
137 #define ADD_AND_SCALE(dst, x) \
138  (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
139 
140 #define TR_4x4_LUMA(dst, src, step, assign) \
141  do { \
142  int c0 = src[0 * step] + src[2 * step]; \
143  int c1 = src[2 * step] + src[3 * step]; \
144  int c2 = src[0 * step] - src[3 * step]; \
145  int c3 = 74 * src[1 * step]; \
146  \
147  assign(dst[2 * step], 74 * (src[0 * step] - \
148  src[2 * step] + \
149  src[3 * step])); \
150  assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
151  assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
152  assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
153  } while (0)
154 
155 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
156 {
157  int i;
158  int shift = 7;
159  int add = 1 << (shift - 1);
160  int16_t *src = coeffs;
161 
162  for (i = 0; i < 4; i++) {
163  TR_4x4_LUMA(src, src, 4, SCALE);
164  src++;
165  }
166 
167  shift = 20 - BIT_DEPTH;
168  add = 1 << (shift - 1);
169  for (i = 0; i < 4; i++) {
170  TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
171  coeffs += 4;
172  }
173 }
174 
175 #undef TR_4x4_LUMA
176 
177 #define TR_4(dst, src, dstep, sstep, assign, end) \
178  do { \
179  const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
180  const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
181  const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
182  const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
183  \
184  assign(dst[0 * dstep], e0 + o0); \
185  assign(dst[1 * dstep], e1 + o1); \
186  assign(dst[2 * dstep], e1 - o1); \
187  assign(dst[3 * dstep], e0 - o0); \
188  } while (0)
189 
190 #define TR_8(dst, src, dstep, sstep, assign, end) \
191  do { \
192  int i, j; \
193  int e_8[4]; \
194  int o_8[4] = { 0 }; \
195  for (i = 0; i < 4; i++) \
196  for (j = 1; j < end; j += 2) \
197  o_8[i] += transform[4 * j][i] * src[j * sstep]; \
198  TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
199  \
200  for (i = 0; i < 4; i++) { \
201  assign(dst[i * dstep], e_8[i] + o_8[i]); \
202  assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
203  } \
204  } while (0)
205 
206 #define TR_16(dst, src, dstep, sstep, assign, end) \
207  do { \
208  int i, j; \
209  int e_16[8]; \
210  int o_16[8] = { 0 }; \
211  for (i = 0; i < 8; i++) \
212  for (j = 1; j < end; j += 2) \
213  o_16[i] += transform[2 * j][i] * src[j * sstep]; \
214  TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
215  \
216  for (i = 0; i < 8; i++) { \
217  assign(dst[i * dstep], e_16[i] + o_16[i]); \
218  assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
219  } \
220  } while (0)
221 
222 #define TR_32(dst, src, dstep, sstep, assign, end) \
223  do { \
224  int i, j; \
225  int e_32[16]; \
226  int o_32[16] = { 0 }; \
227  for (i = 0; i < 16; i++) \
228  for (j = 1; j < end; j += 2) \
229  o_32[i] += transform[j][i] * src[j * sstep]; \
230  TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
231  \
232  for (i = 0; i < 16; i++) { \
233  assign(dst[i * dstep], e_32[i] + o_32[i]); \
234  assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
235  } \
236  } while (0)
237 
238 #define IDCT_VAR4(H) \
239  int limit2 = FFMIN(col_limit + 4, H)
240 #define IDCT_VAR8(H) \
241  int limit = FFMIN(col_limit, H); \
242  int limit2 = FFMIN(col_limit + 4, H)
243 #define IDCT_VAR16(H) IDCT_VAR8(H)
244 #define IDCT_VAR32(H) IDCT_VAR8(H)
245 
246 #define IDCT(H) \
247 static void FUNC(idct_##H ##x ##H )( \
248  int16_t *coeffs, int col_limit) { \
249  int i; \
250  int shift = 7; \
251  int add = 1 << (shift - 1); \
252  int16_t *src = coeffs; \
253  IDCT_VAR ##H(H); \
254  \
255  for (i = 0; i < H; i++) { \
256  TR_ ## H(src, src, H, H, SCALE, limit2); \
257  if (limit2 < H && i%4 == 0 && !!i) \
258  limit2 -= 4; \
259  src++; \
260  } \
261  \
262  shift = 20 - BIT_DEPTH; \
263  add = 1 << (shift - 1); \
264  for (i = 0; i < H; i++) { \
265  TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
266  coeffs += H; \
267  } \
268 }
269 
270 #define IDCT_DC(H) \
271 static void FUNC(idct_##H ##x ##H ##_dc)( \
272  int16_t *coeffs) { \
273  int i, j; \
274  int shift = 14 - BIT_DEPTH; \
275  int add = 1 << (shift - 1); \
276  int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
277  \
278  for (j = 0; j < H; j++) { \
279  for (i = 0; i < H; i++) { \
280  coeffs[i+j*H] = coeff; \
281  } \
282  } \
283 }
284 
285 IDCT( 4)
286 IDCT( 8)
287 IDCT(16)
288 IDCT(32)
289 
290 IDCT_DC( 4)
291 IDCT_DC( 8)
292 IDCT_DC(16)
293 IDCT_DC(32)
294 
295 #undef TR_4
296 #undef TR_8
297 #undef TR_16
298 #undef TR_32
299 
300 #undef SET
301 #undef SCALE
302 #undef ADD_AND_SCALE
303 
304 static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
305  ptrdiff_t stride_dst, ptrdiff_t stride_src,
306  int16_t *sao_offset_val, int sao_left_class,
307  int width, int height)
308 {
309  pixel *dst = (pixel *)_dst;
310  pixel *src = (pixel *)_src;
311  int offset_table[32] = { 0 };
312  int k, y, x;
313  int shift = BIT_DEPTH - 5;
314 
315  stride_dst /= sizeof(pixel);
316  stride_src /= sizeof(pixel);
317 
318  for (k = 0; k < 4; k++)
319  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
320  for (y = 0; y < height; y++) {
321  for (x = 0; x < width; x++)
322  dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
323  dst += stride_dst;
324  src += stride_src;
325  }
326 }
327 
328 #define CMP(a, b) (((a) > (b)) - ((a) < (b)))
329 
330 static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
331  int eo, int width, int height) {
332 
333  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
334  static const int8_t pos[4][2][2] = {
335  { { -1, 0 }, { 1, 0 } }, // horizontal
336  { { 0, -1 }, { 0, 1 } }, // vertical
337  { { -1, -1 }, { 1, 1 } }, // 45 degree
338  { { 1, -1 }, { -1, 1 } }, // 135 degree
339  };
340  pixel *dst = (pixel *)_dst;
341  pixel *src = (pixel *)_src;
342  int a_stride, b_stride;
343  int x, y;
344  ptrdiff_t stride_src = (2*MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
345  stride_dst /= sizeof(pixel);
346 
347  a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
348  b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
349  for (y = 0; y < height; y++) {
350  for (x = 0; x < width; x++) {
351  int diff0 = CMP(src[x], src[x + a_stride]);
352  int diff1 = CMP(src[x], src[x + b_stride]);
353  int offset_val = edge_idx[2 + diff0 + diff1];
354  dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]);
355  }
356  src += stride_src;
357  dst += stride_dst;
358  }
359 }
360 
361 static void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src,
362  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
363  int *borders, int _width, int _height,
364  int c_idx, uint8_t *vert_edge,
365  uint8_t *horiz_edge, uint8_t *diag_edge)
366 {
367  int x, y;
368  pixel *dst = (pixel *)_dst;
369  pixel *src = (pixel *)_src;
370  int16_t *sao_offset_val = sao->offset_val[c_idx];
371  int sao_eo_class = sao->eo_class[c_idx];
372  int init_x = 0, width = _width, height = _height;
373 
374  stride_dst /= sizeof(pixel);
375  stride_src /= sizeof(pixel);
376 
377  if (sao_eo_class != SAO_EO_VERT) {
378  if (borders[0]) {
379  int offset_val = sao_offset_val[0];
380  for (y = 0; y < height; y++) {
381  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
382  }
383  init_x = 1;
384  }
385  if (borders[2]) {
386  int offset_val = sao_offset_val[0];
387  int offset = width - 1;
388  for (x = 0; x < height; x++) {
389  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
390  }
391  width--;
392  }
393  }
394  if (sao_eo_class != SAO_EO_HORIZ) {
395  if (borders[1]) {
396  int offset_val = sao_offset_val[0];
397  for (x = init_x; x < width; x++)
398  dst[x] = av_clip_pixel(src[x] + offset_val);
399  }
400  if (borders[3]) {
401  int offset_val = sao_offset_val[0];
402  int y_stride_dst = stride_dst * (height - 1);
403  int y_stride_src = stride_src * (height - 1);
404  for (x = init_x; x < width; x++)
405  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
406  height--;
407  }
408  }
409 }
410 
411 static void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src,
412  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
413  int *borders, int _width, int _height,
414  int c_idx, uint8_t *vert_edge,
415  uint8_t *horiz_edge, uint8_t *diag_edge)
416 {
417  int x, y;
418  pixel *dst = (pixel *)_dst;
419  pixel *src = (pixel *)_src;
420  int16_t *sao_offset_val = sao->offset_val[c_idx];
421  int sao_eo_class = sao->eo_class[c_idx];
422  int init_x = 0, init_y = 0, width = _width, height = _height;
423 
424  stride_dst /= sizeof(pixel);
425  stride_src /= sizeof(pixel);
426 
427  if (sao_eo_class != SAO_EO_VERT) {
428  if (borders[0]) {
429  int offset_val = sao_offset_val[0];
430  for (y = 0; y < height; y++) {
431  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
432  }
433  init_x = 1;
434  }
435  if (borders[2]) {
436  int offset_val = sao_offset_val[0];
437  int offset = width - 1;
438  for (x = 0; x < height; x++) {
439  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
440  }
441  width--;
442  }
443  }
444  if (sao_eo_class != SAO_EO_HORIZ) {
445  if (borders[1]) {
446  int offset_val = sao_offset_val[0];
447  for (x = init_x; x < width; x++)
448  dst[x] = av_clip_pixel(src[x] + offset_val);
449  init_y = 1;
450  }
451  if (borders[3]) {
452  int offset_val = sao_offset_val[0];
453  int y_stride_dst = stride_dst * (height - 1);
454  int y_stride_src = stride_src * (height - 1);
455  for (x = init_x; x < width; x++)
456  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
457  height--;
458  }
459  }
460 
461  {
462  int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
463  int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
464  int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
465  int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
466 
467  // Restore pixels that can't be modified
468  if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
469  for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
470  dst[y*stride_dst] = src[y*stride_src];
471  }
472  if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
473  for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
474  dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
475  }
476 
477  if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
478  for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
479  dst[x] = src[x];
480  }
481  if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
482  for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
483  dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
484  }
485  if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
486  dst[0] = src[0];
487  if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
488  dst[width-1] = src[width-1];
489  if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
490  dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
491  if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
492  dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
493 
494  }
495 }
496 
497 #undef CMP
498 
499 ////////////////////////////////////////////////////////////////////////////////
500 //
501 ////////////////////////////////////////////////////////////////////////////////
502 static void FUNC(put_hevc_pel_pixels)(int16_t *dst,
503  uint8_t *_src, ptrdiff_t _srcstride,
504  int height, intptr_t mx, intptr_t my, int width)
505 {
506  int x, y;
507  pixel *src = (pixel *)_src;
508  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
509 
510  for (y = 0; y < height; y++) {
511  for (x = 0; x < width; x++)
512  dst[x] = src[x] << (14 - BIT_DEPTH);
513  src += srcstride;
514  dst += MAX_PB_SIZE;
515  }
516 }
517 
518 static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
519  int height, intptr_t mx, intptr_t my, int width)
520 {
521  int y;
522  pixel *src = (pixel *)_src;
523  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
524  pixel *dst = (pixel *)_dst;
525  ptrdiff_t dststride = _dststride / sizeof(pixel);
526 
527  for (y = 0; y < height; y++) {
528  memcpy(dst, src, width * sizeof(pixel));
529  src += srcstride;
530  dst += dststride;
531  }
532 }
533 
534 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
535  int16_t *src2,
536  int height, intptr_t mx, intptr_t my, int width)
537 {
538  int x, y;
539  pixel *src = (pixel *)_src;
540  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
541  pixel *dst = (pixel *)_dst;
542  ptrdiff_t dststride = _dststride / sizeof(pixel);
543 
544  int shift = 14 + 1 - BIT_DEPTH;
545 #if BIT_DEPTH < 14
546  int offset = 1 << (shift - 1);
547 #else
548  int offset = 0;
549 #endif
550 
551  for (y = 0; y < height; y++) {
552  for (x = 0; x < width; x++)
553  dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
554  src += srcstride;
555  dst += dststride;
556  src2 += MAX_PB_SIZE;
557  }
558 }
559 
560 static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
561  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
562 {
563  int x, y;
564  pixel *src = (pixel *)_src;
565  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
566  pixel *dst = (pixel *)_dst;
567  ptrdiff_t dststride = _dststride / sizeof(pixel);
568  int shift = denom + 14 - BIT_DEPTH;
569 #if BIT_DEPTH < 14
570  int offset = 1 << (shift - 1);
571 #else
572  int offset = 0;
573 #endif
574 
575  ox = ox * (1 << (BIT_DEPTH - 8));
576  for (y = 0; y < height; y++) {
577  for (x = 0; x < width; x++)
578  dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
579  src += srcstride;
580  dst += dststride;
581  }
582 }
583 
584 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
585  int16_t *src2,
586  int height, int denom, int wx0, int wx1,
587  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
588 {
589  int x, y;
590  pixel *src = (pixel *)_src;
591  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
592  pixel *dst = (pixel *)_dst;
593  ptrdiff_t dststride = _dststride / sizeof(pixel);
594 
595  int shift = 14 + 1 - BIT_DEPTH;
596  int log2Wd = denom + shift - 1;
597 
598  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
599  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
600  for (y = 0; y < height; y++) {
601  for (x = 0; x < width; x++) {
602  dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
603  }
604  src += srcstride;
605  dst += dststride;
606  src2 += MAX_PB_SIZE;
607  }
608 }
609 
610 ////////////////////////////////////////////////////////////////////////////////
611 //
612 ////////////////////////////////////////////////////////////////////////////////
613 #define QPEL_FILTER(src, stride) \
614  (filter[0] * src[x - 3 * stride] + \
615  filter[1] * src[x - 2 * stride] + \
616  filter[2] * src[x - stride] + \
617  filter[3] * src[x ] + \
618  filter[4] * src[x + stride] + \
619  filter[5] * src[x + 2 * stride] + \
620  filter[6] * src[x + 3 * stride] + \
621  filter[7] * src[x + 4 * stride])
622 
623 static void FUNC(put_hevc_qpel_h)(int16_t *dst,
624  uint8_t *_src, ptrdiff_t _srcstride,
625  int height, intptr_t mx, intptr_t my, int width)
626 {
627  int x, y;
628  pixel *src = (pixel*)_src;
629  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
630  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
631  for (y = 0; y < height; y++) {
632  for (x = 0; x < width; x++)
633  dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
634  src += srcstride;
635  dst += MAX_PB_SIZE;
636  }
637 }
638 
639 static void FUNC(put_hevc_qpel_v)(int16_t *dst,
640  uint8_t *_src, ptrdiff_t _srcstride,
641  int height, intptr_t mx, intptr_t my, int width)
642 {
643  int x, y;
644  pixel *src = (pixel*)_src;
645  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
646  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
647  for (y = 0; y < height; y++) {
648  for (x = 0; x < width; x++)
649  dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
650  src += srcstride;
651  dst += MAX_PB_SIZE;
652  }
653 }
654 
655 static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
656  uint8_t *_src,
657  ptrdiff_t _srcstride,
658  int height, intptr_t mx,
659  intptr_t my, int width)
660 {
661  int x, y;
662  const int8_t *filter;
663  pixel *src = (pixel*)_src;
664  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
665  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
666  int16_t *tmp = tmp_array;
667 
668  src -= QPEL_EXTRA_BEFORE * srcstride;
669  filter = ff_hevc_qpel_filters[mx - 1];
670  for (y = 0; y < height + QPEL_EXTRA; y++) {
671  for (x = 0; x < width; x++)
672  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
673  src += srcstride;
674  tmp += MAX_PB_SIZE;
675  }
676 
677  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
678  filter = ff_hevc_qpel_filters[my - 1];
679  for (y = 0; y < height; y++) {
680  for (x = 0; x < width; x++)
681  dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
682  tmp += MAX_PB_SIZE;
683  dst += MAX_PB_SIZE;
684  }
685 }
686 
687 static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
688  uint8_t *_src, ptrdiff_t _srcstride,
689  int height, intptr_t mx, intptr_t my, int width)
690 {
691  int x, y;
692  pixel *src = (pixel*)_src;
693  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
694  pixel *dst = (pixel *)_dst;
695  ptrdiff_t dststride = _dststride / sizeof(pixel);
696  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
697  int shift = 14 - BIT_DEPTH;
698 
699 #if BIT_DEPTH < 14
700  int offset = 1 << (shift - 1);
701 #else
702  int offset = 0;
703 #endif
704 
705  for (y = 0; y < height; y++) {
706  for (x = 0; x < width; x++)
707  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
708  src += srcstride;
709  dst += dststride;
710  }
711 }
712 
713 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
714  int16_t *src2,
715  int height, intptr_t mx, intptr_t my, int width)
716 {
717  int x, y;
718  pixel *src = (pixel*)_src;
719  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
720  pixel *dst = (pixel *)_dst;
721  ptrdiff_t dststride = _dststride / sizeof(pixel);
722 
723  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
724 
725  int shift = 14 + 1 - BIT_DEPTH;
726 #if BIT_DEPTH < 14
727  int offset = 1 << (shift - 1);
728 #else
729  int offset = 0;
730 #endif
731 
732  for (y = 0; y < height; y++) {
733  for (x = 0; x < width; x++)
734  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
735  src += srcstride;
736  dst += dststride;
737  src2 += MAX_PB_SIZE;
738  }
739 }
740 
741 static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
742  uint8_t *_src, ptrdiff_t _srcstride,
743  int height, intptr_t mx, intptr_t my, int width)
744 {
745  int x, y;
746  pixel *src = (pixel*)_src;
747  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
748  pixel *dst = (pixel *)_dst;
749  ptrdiff_t dststride = _dststride / sizeof(pixel);
750  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
751  int shift = 14 - BIT_DEPTH;
752 
753 #if BIT_DEPTH < 14
754  int offset = 1 << (shift - 1);
755 #else
756  int offset = 0;
757 #endif
758 
759  for (y = 0; y < height; y++) {
760  for (x = 0; x < width; x++)
761  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
762  src += srcstride;
763  dst += dststride;
764  }
765 }
766 
767 
768 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
769  int16_t *src2,
770  int height, intptr_t mx, intptr_t my, int width)
771 {
772  int x, y;
773  pixel *src = (pixel*)_src;
774  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
775  pixel *dst = (pixel *)_dst;
776  ptrdiff_t dststride = _dststride / sizeof(pixel);
777 
778  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
779 
780  int shift = 14 + 1 - BIT_DEPTH;
781 #if BIT_DEPTH < 14
782  int offset = 1 << (shift - 1);
783 #else
784  int offset = 0;
785 #endif
786 
787  for (y = 0; y < height; y++) {
788  for (x = 0; x < width; x++)
789  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
790  src += srcstride;
791  dst += dststride;
792  src2 += MAX_PB_SIZE;
793  }
794 }
795 
796 static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
797  uint8_t *_src, ptrdiff_t _srcstride,
798  int height, intptr_t mx, intptr_t my, int width)
799 {
800  int x, y;
801  const int8_t *filter;
802  pixel *src = (pixel*)_src;
803  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
804  pixel *dst = (pixel *)_dst;
805  ptrdiff_t dststride = _dststride / sizeof(pixel);
806  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
807  int16_t *tmp = tmp_array;
808  int shift = 14 - BIT_DEPTH;
809 
810 #if BIT_DEPTH < 14
811  int offset = 1 << (shift - 1);
812 #else
813  int offset = 0;
814 #endif
815 
816  src -= QPEL_EXTRA_BEFORE * srcstride;
817  filter = ff_hevc_qpel_filters[mx - 1];
818  for (y = 0; y < height + QPEL_EXTRA; y++) {
819  for (x = 0; x < width; x++)
820  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
821  src += srcstride;
822  tmp += MAX_PB_SIZE;
823  }
824 
825  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
826  filter = ff_hevc_qpel_filters[my - 1];
827 
828  for (y = 0; y < height; y++) {
829  for (x = 0; x < width; x++)
830  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
831  tmp += MAX_PB_SIZE;
832  dst += dststride;
833  }
834 }
835 
836 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
837  int16_t *src2,
838  int height, intptr_t mx, intptr_t my, int width)
839 {
840  int x, y;
841  const int8_t *filter;
842  pixel *src = (pixel*)_src;
843  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
844  pixel *dst = (pixel *)_dst;
845  ptrdiff_t dststride = _dststride / sizeof(pixel);
846  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
847  int16_t *tmp = tmp_array;
848  int shift = 14 + 1 - BIT_DEPTH;
849 #if BIT_DEPTH < 14
850  int offset = 1 << (shift - 1);
851 #else
852  int offset = 0;
853 #endif
854 
855  src -= QPEL_EXTRA_BEFORE * srcstride;
856  filter = ff_hevc_qpel_filters[mx - 1];
857  for (y = 0; y < height + QPEL_EXTRA; y++) {
858  for (x = 0; x < width; x++)
859  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
860  src += srcstride;
861  tmp += MAX_PB_SIZE;
862  }
863 
864  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
865  filter = ff_hevc_qpel_filters[my - 1];
866 
867  for (y = 0; y < height; y++) {
868  for (x = 0; x < width; x++)
869  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
870  tmp += MAX_PB_SIZE;
871  dst += dststride;
872  src2 += MAX_PB_SIZE;
873  }
874 }
875 
876 static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
877  uint8_t *_src, ptrdiff_t _srcstride,
878  int height, int denom, int wx, int ox,
879  intptr_t mx, intptr_t my, int width)
880 {
881  int x, y;
882  pixel *src = (pixel*)_src;
883  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
884  pixel *dst = (pixel *)_dst;
885  ptrdiff_t dststride = _dststride / sizeof(pixel);
886  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
887  int shift = denom + 14 - BIT_DEPTH;
888 #if BIT_DEPTH < 14
889  int offset = 1 << (shift - 1);
890 #else
891  int offset = 0;
892 #endif
893 
894  ox = ox * (1 << (BIT_DEPTH - 8));
895  for (y = 0; y < height; y++) {
896  for (x = 0; x < width; x++)
897  dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
898  src += srcstride;
899  dst += dststride;
900  }
901 }
902 
903 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
904  int16_t *src2,
905  int height, int denom, int wx0, int wx1,
906  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
907 {
908  int x, y;
909  pixel *src = (pixel*)_src;
910  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
911  pixel *dst = (pixel *)_dst;
912  ptrdiff_t dststride = _dststride / sizeof(pixel);
913 
914  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
915 
916  int shift = 14 + 1 - BIT_DEPTH;
917  int log2Wd = denom + shift - 1;
918 
919  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
920  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
921  for (y = 0; y < height; y++) {
922  for (x = 0; x < width; x++)
923  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
924  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
925  src += srcstride;
926  dst += dststride;
927  src2 += MAX_PB_SIZE;
928  }
929 }
930 
931 static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
932  uint8_t *_src, ptrdiff_t _srcstride,
933  int height, int denom, int wx, int ox,
934  intptr_t mx, intptr_t my, int width)
935 {
936  int x, y;
937  pixel *src = (pixel*)_src;
938  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
939  pixel *dst = (pixel *)_dst;
940  ptrdiff_t dststride = _dststride / sizeof(pixel);
941  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
942  int shift = denom + 14 - BIT_DEPTH;
943 #if BIT_DEPTH < 14
944  int offset = 1 << (shift - 1);
945 #else
946  int offset = 0;
947 #endif
948 
949  ox = ox * (1 << (BIT_DEPTH - 8));
950  for (y = 0; y < height; y++) {
951  for (x = 0; x < width; x++)
952  dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
953  src += srcstride;
954  dst += dststride;
955  }
956 }
957 
958 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
959  int16_t *src2,
960  int height, int denom, int wx0, int wx1,
961  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
962 {
963  int x, y;
964  pixel *src = (pixel*)_src;
965  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
966  pixel *dst = (pixel *)_dst;
967  ptrdiff_t dststride = _dststride / sizeof(pixel);
968 
969  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
970 
971  int shift = 14 + 1 - BIT_DEPTH;
972  int log2Wd = denom + shift - 1;
973 
974  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
975  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
976  for (y = 0; y < height; y++) {
977  for (x = 0; x < width; x++)
978  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
979  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
980  src += srcstride;
981  dst += dststride;
982  src2 += MAX_PB_SIZE;
983  }
984 }
985 
986 static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
987  uint8_t *_src, ptrdiff_t _srcstride,
988  int height, int denom, int wx, int ox,
989  intptr_t mx, intptr_t my, int width)
990 {
991  int x, y;
992  const int8_t *filter;
993  pixel *src = (pixel*)_src;
994  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
995  pixel *dst = (pixel *)_dst;
996  ptrdiff_t dststride = _dststride / sizeof(pixel);
997  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
998  int16_t *tmp = tmp_array;
999  int shift = denom + 14 - BIT_DEPTH;
1000 #if BIT_DEPTH < 14
1001  int offset = 1 << (shift - 1);
1002 #else
1003  int offset = 0;
1004 #endif
1005 
1006  src -= QPEL_EXTRA_BEFORE * srcstride;
1007  filter = ff_hevc_qpel_filters[mx - 1];
1008  for (y = 0; y < height + QPEL_EXTRA; y++) {
1009  for (x = 0; x < width; x++)
1010  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1011  src += srcstride;
1012  tmp += MAX_PB_SIZE;
1013  }
1014 
1015  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1016  filter = ff_hevc_qpel_filters[my - 1];
1017 
1018  ox = ox * (1 << (BIT_DEPTH - 8));
1019  for (y = 0; y < height; y++) {
1020  for (x = 0; x < width; x++)
1021  dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1022  tmp += MAX_PB_SIZE;
1023  dst += dststride;
1024  }
1025 }
1026 
1027 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1028  int16_t *src2,
1029  int height, int denom, int wx0, int wx1,
1030  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1031 {
1032  int x, y;
1033  const int8_t *filter;
1034  pixel *src = (pixel*)_src;
1035  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1036  pixel *dst = (pixel *)_dst;
1037  ptrdiff_t dststride = _dststride / sizeof(pixel);
1038  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1039  int16_t *tmp = tmp_array;
1040  int shift = 14 + 1 - BIT_DEPTH;
1041  int log2Wd = denom + shift - 1;
1042 
1043  src -= QPEL_EXTRA_BEFORE * srcstride;
1044  filter = ff_hevc_qpel_filters[mx - 1];
1045  for (y = 0; y < height + QPEL_EXTRA; y++) {
1046  for (x = 0; x < width; x++)
1047  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1048  src += srcstride;
1049  tmp += MAX_PB_SIZE;
1050  }
1051 
1052  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1053  filter = ff_hevc_qpel_filters[my - 1];
1054 
1055  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1056  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1057  for (y = 0; y < height; y++) {
1058  for (x = 0; x < width; x++)
1059  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1060  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1061  tmp += MAX_PB_SIZE;
1062  dst += dststride;
1063  src2 += MAX_PB_SIZE;
1064  }
1065 }
1066 
1067 ////////////////////////////////////////////////////////////////////////////////
1068 //
1069 ////////////////////////////////////////////////////////////////////////////////
1070 #define EPEL_FILTER(src, stride) \
1071  (filter[0] * src[x - stride] + \
1072  filter[1] * src[x] + \
1073  filter[2] * src[x + stride] + \
1074  filter[3] * src[x + 2 * stride])
1075 
1076 static void FUNC(put_hevc_epel_h)(int16_t *dst,
1077  uint8_t *_src, ptrdiff_t _srcstride,
1078  int height, intptr_t mx, intptr_t my, int width)
1079 {
1080  int x, y;
1081  pixel *src = (pixel *)_src;
1082  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1083  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1084  for (y = 0; y < height; y++) {
1085  for (x = 0; x < width; x++)
1086  dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1087  src += srcstride;
1088  dst += MAX_PB_SIZE;
1089  }
1090 }
1091 
1092 static void FUNC(put_hevc_epel_v)(int16_t *dst,
1093  uint8_t *_src, ptrdiff_t _srcstride,
1094  int height, intptr_t mx, intptr_t my, int width)
1095 {
1096  int x, y;
1097  pixel *src = (pixel *)_src;
1098  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1099  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1100 
1101  for (y = 0; y < height; y++) {
1102  for (x = 0; x < width; x++)
1103  dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
1104  src += srcstride;
1105  dst += MAX_PB_SIZE;
1106  }
1107 }
1108 
1109 static void FUNC(put_hevc_epel_hv)(int16_t *dst,
1110  uint8_t *_src, ptrdiff_t _srcstride,
1111  int height, intptr_t mx, intptr_t my, int width)
1112 {
1113  int x, y;
1114  pixel *src = (pixel *)_src;
1115  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1116  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1117  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1118  int16_t *tmp = tmp_array;
1119 
1120  src -= EPEL_EXTRA_BEFORE * srcstride;
1121 
1122  for (y = 0; y < height + EPEL_EXTRA; y++) {
1123  for (x = 0; x < width; x++)
1124  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1125  src += srcstride;
1126  tmp += MAX_PB_SIZE;
1127  }
1128 
1129  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1130  filter = ff_hevc_epel_filters[my - 1];
1131 
1132  for (y = 0; y < height; y++) {
1133  for (x = 0; x < width; x++)
1134  dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1135  tmp += MAX_PB_SIZE;
1136  dst += MAX_PB_SIZE;
1137  }
1138 }
1139 
1140 static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1141  int height, intptr_t mx, intptr_t my, int width)
1142 {
1143  int x, y;
1144  pixel *src = (pixel *)_src;
1145  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1146  pixel *dst = (pixel *)_dst;
1147  ptrdiff_t dststride = _dststride / sizeof(pixel);
1148  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1149  int shift = 14 - BIT_DEPTH;
1150 #if BIT_DEPTH < 14
1151  int offset = 1 << (shift - 1);
1152 #else
1153  int offset = 0;
1154 #endif
1155 
1156  for (y = 0; y < height; y++) {
1157  for (x = 0; x < width; x++)
1158  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
1159  src += srcstride;
1160  dst += dststride;
1161  }
1162 }
1163 
1164 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1165  int16_t *src2,
1166  int height, intptr_t mx, intptr_t my, int width)
1167 {
1168  int x, y;
1169  pixel *src = (pixel *)_src;
1170  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1171  pixel *dst = (pixel *)_dst;
1172  ptrdiff_t dststride = _dststride / sizeof(pixel);
1173  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1174  int shift = 14 + 1 - BIT_DEPTH;
1175 #if BIT_DEPTH < 14
1176  int offset = 1 << (shift - 1);
1177 #else
1178  int offset = 0;
1179 #endif
1180 
1181  for (y = 0; y < height; y++) {
1182  for (x = 0; x < width; x++) {
1183  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1184  }
1185  dst += dststride;
1186  src += srcstride;
1187  src2 += MAX_PB_SIZE;
1188  }
1189 }
1190 
1191 static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1192  int height, intptr_t mx, intptr_t my, int width)
1193 {
1194  int x, y;
1195  pixel *src = (pixel *)_src;
1196  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1197  pixel *dst = (pixel *)_dst;
1198  ptrdiff_t dststride = _dststride / sizeof(pixel);
1199  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1200  int shift = 14 - BIT_DEPTH;
1201 #if BIT_DEPTH < 14
1202  int offset = 1 << (shift - 1);
1203 #else
1204  int offset = 0;
1205 #endif
1206 
1207  for (y = 0; y < height; y++) {
1208  for (x = 0; x < width; x++)
1209  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
1210  src += srcstride;
1211  dst += dststride;
1212  }
1213 }
1214 
1215 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1216  int16_t *src2,
1217  int height, intptr_t mx, intptr_t my, int width)
1218 {
1219  int x, y;
1220  pixel *src = (pixel *)_src;
1221  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1222  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1223  pixel *dst = (pixel *)_dst;
1224  ptrdiff_t dststride = _dststride / sizeof(pixel);
1225  int shift = 14 + 1 - BIT_DEPTH;
1226 #if BIT_DEPTH < 14
1227  int offset = 1 << (shift - 1);
1228 #else
1229  int offset = 0;
1230 #endif
1231 
1232  for (y = 0; y < height; y++) {
1233  for (x = 0; x < width; x++)
1234  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1235  dst += dststride;
1236  src += srcstride;
1237  src2 += MAX_PB_SIZE;
1238  }
1239 }
1240 
1241 static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1242  int height, intptr_t mx, intptr_t my, int width)
1243 {
1244  int x, y;
1245  pixel *src = (pixel *)_src;
1246  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1247  pixel *dst = (pixel *)_dst;
1248  ptrdiff_t dststride = _dststride / sizeof(pixel);
1249  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1250  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1251  int16_t *tmp = tmp_array;
1252  int shift = 14 - BIT_DEPTH;
1253 #if BIT_DEPTH < 14
1254  int offset = 1 << (shift - 1);
1255 #else
1256  int offset = 0;
1257 #endif
1258 
1259  src -= EPEL_EXTRA_BEFORE * srcstride;
1260 
1261  for (y = 0; y < height + EPEL_EXTRA; y++) {
1262  for (x = 0; x < width; x++)
1263  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1264  src += srcstride;
1265  tmp += MAX_PB_SIZE;
1266  }
1267 
1268  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1269  filter = ff_hevc_epel_filters[my - 1];
1270 
1271  for (y = 0; y < height; y++) {
1272  for (x = 0; x < width; x++)
1273  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
1274  tmp += MAX_PB_SIZE;
1275  dst += dststride;
1276  }
1277 }
1278 
1279 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1280  int16_t *src2,
1281  int height, intptr_t mx, intptr_t my, int width)
1282 {
1283  int x, y;
1284  pixel *src = (pixel *)_src;
1285  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1286  pixel *dst = (pixel *)_dst;
1287  ptrdiff_t dststride = _dststride / sizeof(pixel);
1288  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1289  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1290  int16_t *tmp = tmp_array;
1291  int shift = 14 + 1 - BIT_DEPTH;
1292 #if BIT_DEPTH < 14
1293  int offset = 1 << (shift - 1);
1294 #else
1295  int offset = 0;
1296 #endif
1297 
1298  src -= EPEL_EXTRA_BEFORE * srcstride;
1299 
1300  for (y = 0; y < height + EPEL_EXTRA; y++) {
1301  for (x = 0; x < width; x++)
1302  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1303  src += srcstride;
1304  tmp += MAX_PB_SIZE;
1305  }
1306 
1307  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1308  filter = ff_hevc_epel_filters[my - 1];
1309 
1310  for (y = 0; y < height; y++) {
1311  for (x = 0; x < width; x++)
1312  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
1313  tmp += MAX_PB_SIZE;
1314  dst += dststride;
1315  src2 += MAX_PB_SIZE;
1316  }
1317 }
1318 
1319 static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1320  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1321 {
1322  int x, y;
1323  pixel *src = (pixel *)_src;
1324  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1325  pixel *dst = (pixel *)_dst;
1326  ptrdiff_t dststride = _dststride / sizeof(pixel);
1327  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1328  int shift = denom + 14 - BIT_DEPTH;
1329 #if BIT_DEPTH < 14
1330  int offset = 1 << (shift - 1);
1331 #else
1332  int offset = 0;
1333 #endif
1334 
1335  ox = ox * (1 << (BIT_DEPTH - 8));
1336  for (y = 0; y < height; y++) {
1337  for (x = 0; x < width; x++) {
1338  dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1339  }
1340  dst += dststride;
1341  src += srcstride;
1342  }
1343 }
1344 
1345 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1346  int16_t *src2,
1347  int height, int denom, int wx0, int wx1,
1348  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1349 {
1350  int x, y;
1351  pixel *src = (pixel *)_src;
1352  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1353  pixel *dst = (pixel *)_dst;
1354  ptrdiff_t dststride = _dststride / sizeof(pixel);
1355  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1356  int shift = 14 + 1 - BIT_DEPTH;
1357  int log2Wd = denom + shift - 1;
1358 
1359  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1360  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1361  for (y = 0; y < height; y++) {
1362  for (x = 0; x < width; x++)
1363  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1364  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1365  src += srcstride;
1366  dst += dststride;
1367  src2 += MAX_PB_SIZE;
1368  }
1369 }
1370 
1371 static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1372  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1373 {
1374  int x, y;
1375  pixel *src = (pixel *)_src;
1376  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1377  pixel *dst = (pixel *)_dst;
1378  ptrdiff_t dststride = _dststride / sizeof(pixel);
1379  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1380  int shift = denom + 14 - BIT_DEPTH;
1381 #if BIT_DEPTH < 14
1382  int offset = 1 << (shift - 1);
1383 #else
1384  int offset = 0;
1385 #endif
1386 
1387  ox = ox * (1 << (BIT_DEPTH - 8));
1388  for (y = 0; y < height; y++) {
1389  for (x = 0; x < width; x++) {
1390  dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1391  }
1392  dst += dststride;
1393  src += srcstride;
1394  }
1395 }
1396 
1397 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1398  int16_t *src2,
1399  int height, int denom, int wx0, int wx1,
1400  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1401 {
1402  int x, y;
1403  pixel *src = (pixel *)_src;
1404  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1405  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1406  pixel *dst = (pixel *)_dst;
1407  ptrdiff_t dststride = _dststride / sizeof(pixel);
1408  int shift = 14 + 1 - BIT_DEPTH;
1409  int log2Wd = denom + shift - 1;
1410 
1411  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1412  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1413  for (y = 0; y < height; y++) {
1414  for (x = 0; x < width; x++)
1415  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1416  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1417  src += srcstride;
1418  dst += dststride;
1419  src2 += MAX_PB_SIZE;
1420  }
1421 }
1422 
1423 static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1424  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1425 {
1426  int x, y;
1427  pixel *src = (pixel *)_src;
1428  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1429  pixel *dst = (pixel *)_dst;
1430  ptrdiff_t dststride = _dststride / sizeof(pixel);
1431  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1432  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1433  int16_t *tmp = tmp_array;
1434  int shift = denom + 14 - BIT_DEPTH;
1435 #if BIT_DEPTH < 14
1436  int offset = 1 << (shift - 1);
1437 #else
1438  int offset = 0;
1439 #endif
1440 
1441  src -= EPEL_EXTRA_BEFORE * srcstride;
1442 
1443  for (y = 0; y < height + EPEL_EXTRA; y++) {
1444  for (x = 0; x < width; x++)
1445  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1446  src += srcstride;
1447  tmp += MAX_PB_SIZE;
1448  }
1449 
1450  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1451  filter = ff_hevc_epel_filters[my - 1];
1452 
1453  ox = ox * (1 << (BIT_DEPTH - 8));
1454  for (y = 0; y < height; y++) {
1455  for (x = 0; x < width; x++)
1456  dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1457  tmp += MAX_PB_SIZE;
1458  dst += dststride;
1459  }
1460 }
1461 
1462 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1463  int16_t *src2,
1464  int height, int denom, int wx0, int wx1,
1465  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1466 {
1467  int x, y;
1468  pixel *src = (pixel *)_src;
1469  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1470  pixel *dst = (pixel *)_dst;
1471  ptrdiff_t dststride = _dststride / sizeof(pixel);
1472  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1473  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1474  int16_t *tmp = tmp_array;
1475  int shift = 14 + 1 - BIT_DEPTH;
1476  int log2Wd = denom + shift - 1;
1477 
1478  src -= EPEL_EXTRA_BEFORE * srcstride;
1479 
1480  for (y = 0; y < height + EPEL_EXTRA; y++) {
1481  for (x = 0; x < width; x++)
1482  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1483  src += srcstride;
1484  tmp += MAX_PB_SIZE;
1485  }
1486 
1487  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1488  filter = ff_hevc_epel_filters[my - 1];
1489 
1490  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1491  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1492  for (y = 0; y < height; y++) {
1493  for (x = 0; x < width; x++)
1494  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1495  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1496  tmp += MAX_PB_SIZE;
1497  dst += dststride;
1498  src2 += MAX_PB_SIZE;
1499  }
1500 }// line zero
1501 #define P3 pix[-4 * xstride]
1502 #define P2 pix[-3 * xstride]
1503 #define P1 pix[-2 * xstride]
1504 #define P0 pix[-1 * xstride]
1505 #define Q0 pix[0 * xstride]
1506 #define Q1 pix[1 * xstride]
1507 #define Q2 pix[2 * xstride]
1508 #define Q3 pix[3 * xstride]
1509 
1510 // line three. used only for deblocking decision
1511 #define TP3 pix[-4 * xstride + 3 * ystride]
1512 #define TP2 pix[-3 * xstride + 3 * ystride]
1513 #define TP1 pix[-2 * xstride + 3 * ystride]
1514 #define TP0 pix[-1 * xstride + 3 * ystride]
1515 #define TQ0 pix[0 * xstride + 3 * ystride]
1516 #define TQ1 pix[1 * xstride + 3 * ystride]
1517 #define TQ2 pix[2 * xstride + 3 * ystride]
1518 #define TQ3 pix[3 * xstride + 3 * ystride]
1519 
1521  ptrdiff_t _xstride, ptrdiff_t _ystride,
1522  int beta, int *_tc,
1523  uint8_t *_no_p, uint8_t *_no_q)
1524 {
1525  int d, j;
1526  pixel *pix = (pixel *)_pix;
1527  ptrdiff_t xstride = _xstride / sizeof(pixel);
1528  ptrdiff_t ystride = _ystride / sizeof(pixel);
1529 
1530  beta <<= BIT_DEPTH - 8;
1531 
1532  for (j = 0; j < 2; j++) {
1533  const int dp0 = abs(P2 - 2 * P1 + P0);
1534  const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1535  const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1536  const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1537  const int d0 = dp0 + dq0;
1538  const int d3 = dp3 + dq3;
1539  const int tc = _tc[j] << (BIT_DEPTH - 8);
1540  const int no_p = _no_p[j];
1541  const int no_q = _no_q[j];
1542 
1543  if (d0 + d3 >= beta) {
1544  pix += 4 * ystride;
1545  continue;
1546  } else {
1547  const int beta_3 = beta >> 3;
1548  const int beta_2 = beta >> 2;
1549  const int tc25 = ((tc * 5 + 1) >> 1);
1550 
1551  if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
1552  abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1553  (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1554  // strong filtering
1555  const int tc2 = tc << 1;
1556  for (d = 0; d < 4; d++) {
1557  const int p3 = P3;
1558  const int p2 = P2;
1559  const int p1 = P1;
1560  const int p0 = P0;
1561  const int q0 = Q0;
1562  const int q1 = Q1;
1563  const int q2 = Q2;
1564  const int q3 = Q3;
1565  if (!no_p) {
1566  P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1567  P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1568  P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1569  }
1570  if (!no_q) {
1571  Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1572  Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1573  Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1574  }
1575  pix += ystride;
1576  }
1577  } else { // normal filtering
1578  int nd_p = 1;
1579  int nd_q = 1;
1580  const int tc_2 = tc >> 1;
1581  if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1582  nd_p = 2;
1583  if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1584  nd_q = 2;
1585 
1586  for (d = 0; d < 4; d++) {
1587  const int p2 = P2;
1588  const int p1 = P1;
1589  const int p0 = P0;
1590  const int q0 = Q0;
1591  const int q1 = Q1;
1592  const int q2 = Q2;
1593  int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1594  if (abs(delta0) < 10 * tc) {
1595  delta0 = av_clip(delta0, -tc, tc);
1596  if (!no_p)
1597  P0 = av_clip_pixel(p0 + delta0);
1598  if (!no_q)
1599  Q0 = av_clip_pixel(q0 - delta0);
1600  if (!no_p && nd_p > 1) {
1601  const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1602  P1 = av_clip_pixel(p1 + deltap1);
1603  }
1604  if (!no_q && nd_q > 1) {
1605  const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1606  Q1 = av_clip_pixel(q1 + deltaq1);
1607  }
1608  }
1609  pix += ystride;
1610  }
1611  }
1612  }
1613  }
1614 }
1615 
1616 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1617  ptrdiff_t _ystride, int *_tc,
1618  uint8_t *_no_p, uint8_t *_no_q)
1619 {
1620  int d, j, no_p, no_q;
1621  pixel *pix = (pixel *)_pix;
1622  ptrdiff_t xstride = _xstride / sizeof(pixel);
1623  ptrdiff_t ystride = _ystride / sizeof(pixel);
1624 
1625  for (j = 0; j < 2; j++) {
1626  const int tc = _tc[j] << (BIT_DEPTH - 8);
1627  if (tc <= 0) {
1628  pix += 4 * ystride;
1629  continue;
1630  }
1631  no_p = _no_p[j];
1632  no_q = _no_q[j];
1633 
1634  for (d = 0; d < 4; d++) {
1635  int delta0;
1636  const int p1 = P1;
1637  const int p0 = P0;
1638  const int q0 = Q0;
1639  const int q1 = Q1;
1640  delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1641  if (!no_p)
1642  P0 = av_clip_pixel(p0 + delta0);
1643  if (!no_q)
1644  Q0 = av_clip_pixel(q0 - delta0);
1645  pix += ystride;
1646  }
1647  }
1648 }
1649 
1650 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1651  int32_t *tc, uint8_t *no_p,
1652  uint8_t *no_q)
1653 {
1654  FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1655 }
1656 
1657 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1658  int32_t *tc, uint8_t *no_p,
1659  uint8_t *no_q)
1660 {
1661  FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1662 }
1663 
1664 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1665  int beta, int32_t *tc, uint8_t *no_p,
1666  uint8_t *no_q)
1667 {
1668  FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1669  beta, tc, no_p, no_q);
1670 }
1671 
1672 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1673  int beta, int32_t *tc, uint8_t *no_p,
1674  uint8_t *no_q)
1675 {
1676  FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1677  beta, tc, no_p, no_q);
1678 }
1679 
1680 #undef P3
1681 #undef P2
1682 #undef P1
1683 #undef P0
1684 #undef Q0
1685 #undef Q1
1686 #undef Q2
1687 #undef Q3
1688 
1689 #undef TP3
1690 #undef TP2
1691 #undef TP1
1692 #undef TP0
1693 #undef TQ0
1694 #undef TQ1
1695 #undef TQ2
1696 #undef TQ3