FFmpeg
vp9recon.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 #include "libavutil/frame.h"
26 #include "libavutil/mem_internal.h"
27 
28 #include "progressframe.h"
29 #include "videodsp.h"
30 #include "vp9data.h"
31 #include "vp9dec.h"
32 
33 static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a,
34  uint8_t *dst_edge, ptrdiff_t stride_edge,
35  uint8_t *dst_inner, ptrdiff_t stride_inner,
36  uint8_t *l, int col, int x, int w,
37  int row, int y, enum TxfmMode tx,
38  int p, int ss_h, int ss_v, int bytesperpixel)
39 {
40  const VP9Context *s = td->s;
41  int have_top = row > 0 || y > 0;
42  int have_left = col > td->tile_col_start || x > 0;
43  int have_right = x < w - 1;
44  int bpp = s->s.h.bpp;
45  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
46  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
47  { DC_127_PRED, VERT_PRED } },
48  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
49  { HOR_PRED, HOR_PRED } },
50  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
51  { LEFT_DC_PRED, DC_PRED } },
63  { HOR_UP_PRED, HOR_UP_PRED } },
65  { HOR_PRED, TM_VP8_PRED } },
66  };
67  static const struct {
68  uint8_t needs_left:1;
69  uint8_t needs_top:1;
70  uint8_t needs_topleft:1;
71  uint8_t needs_topright:1;
72  uint8_t invert_left:1;
73  } edges[N_INTRA_PRED_MODES] = {
74  [VERT_PRED] = { .needs_top = 1 },
75  [HOR_PRED] = { .needs_left = 1 },
76  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
77  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
78  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
79  .needs_topleft = 1 },
80  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
81  .needs_topleft = 1 },
82  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1,
83  .needs_topleft = 1 },
84  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
85  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
86  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1,
87  .needs_topleft = 1 },
88  [LEFT_DC_PRED] = { .needs_left = 1 },
89  [TOP_DC_PRED] = { .needs_top = 1 },
90  [DC_128_PRED] = { 0 },
91  [DC_127_PRED] = { 0 },
92  [DC_129_PRED] = { 0 }
93  };
94 
95  av_assert2(mode >= 0 && mode < 10);
96  mode = mode_conv[mode][have_left][have_top];
97  if (edges[mode].needs_top) {
98  uint8_t *top, *topleft;
99  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
100  int n_px_need_tr = 0;
101 
102  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
103  n_px_need_tr = 4;
104 
105  // if top of sb64-row, use s->intra_pred_data[] instead of
106  // dst[-stride] for intra prediction (it contains pre- instead of
107  // post-loopfilter data)
108  if (have_top) {
109  top = !(row & 7) && !y ?
110  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
111  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
112  if (have_left)
113  topleft = !(row & 7) && !y ?
114  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
115  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
116  &dst_inner[-stride_inner];
117  }
118 
119  if (have_top &&
120  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
121  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
122  n_px_need + n_px_need_tr <= n_px_have) {
123  *a = top;
124  } else {
125  if (have_top) {
126  if (n_px_need <= n_px_have) {
127  memcpy(*a, top, n_px_need * bytesperpixel);
128  } else {
129 #define memset_bpp(c, i1, v, i2, num) do { \
130  if (bytesperpixel == 1) { \
131  memset(&(c)[(i1)], (v)[(i2)], (num)); \
132  } else { \
133  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
134  for (n = 0; n < (num); n++) { \
135  AV_WN16A(&(c)[((i1) + n) * 2], val); \
136  } \
137  } \
138 } while (0)
139  memcpy(*a, top, n_px_have * bytesperpixel);
140  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
141  }
142  } else {
143 #define memset_val(c, val, num) do { \
144  if (bytesperpixel == 1) { \
145  memset((c), (val), (num)); \
146  } else { \
147  int n; \
148  for (n = 0; n < (num); n++) { \
149  AV_WN16A(&(c)[n * 2], (val)); \
150  } \
151  } \
152 } while (0)
153  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
154  }
155  if (edges[mode].needs_topleft) {
156  if (have_left && have_top) {
157 #define assign_bpp(c, i1, v, i2) do { \
158  if (bytesperpixel == 1) { \
159  (c)[(i1)] = (v)[(i2)]; \
160  } else { \
161  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
162  } \
163 } while (0)
164  assign_bpp(*a, -1, topleft, -1);
165  } else {
166 #define assign_val(c, i, v) do { \
167  if (bytesperpixel == 1) { \
168  (c)[(i)] = (v); \
169  } else { \
170  AV_WN16A(&(c)[(i) * 2], (v)); \
171  } \
172 } while (0)
173  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
174  }
175  }
176  if (tx == TX_4X4 && edges[mode].needs_topright) {
177  if (have_top && have_right &&
178  n_px_need + n_px_need_tr <= n_px_have) {
179  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
180  } else {
181  memset_bpp(*a, 4, *a, 3, 4);
182  }
183  }
184  }
185  }
186  if (edges[mode].needs_left) {
187  if (have_left) {
188  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
189  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
190  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
191 
192  if (edges[mode].invert_left) {
193  if (n_px_need <= n_px_have) {
194  for (i = 0; i < n_px_need; i++)
195  assign_bpp(l, i, &dst[i * stride], -1);
196  } else {
197  for (i = 0; i < n_px_have; i++)
198  assign_bpp(l, i, &dst[i * stride], -1);
199  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
200  }
201  } else {
202  if (n_px_need <= n_px_have) {
203  for (i = 0; i < n_px_need; i++)
204  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
205  } else {
206  for (i = 0; i < n_px_have; i++)
207  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
208  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
209  }
210  }
211  } else {
212  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
213  }
214  }
215 
216  return mode;
217 }
218 
219 static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off,
220  ptrdiff_t uv_off, int bytesperpixel)
221 {
222  const VP9Context *s = td->s;
223  VP9Block *b = td->b;
224  int row = td->row, col = td->col;
225  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
226  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
227  int end_x = FFMIN(2 * (s->cols - col), w4);
228  int end_y = FFMIN(2 * (s->rows - row), h4);
229  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
230  int uvstep1d = 1 << b->uvtx, p;
231  uint8_t *dst = td->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
232  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
233  LOCAL_ALIGNED_32(uint8_t, l, [64]);
234 
235  for (n = 0, y = 0; y < end_y; y += step1d) {
236  uint8_t *ptr = dst, *ptr_r = dst_r;
237  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
238  ptr_r += 4 * step1d * bytesperpixel, n += step) {
239  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
240  y * 2 + x : 0];
241  uint8_t *a = &a_buf[32];
242  enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
243  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
244 
245  mode = check_intra_mode(td, mode, &a, ptr_r,
246  s->s.frames[CUR_FRAME].tf.f->linesize[0],
247  ptr, td->y_stride, l,
248  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
249  s->dsp.intra_pred[b->tx][mode](ptr, td->y_stride, l, a);
250  if (eob)
251  s->dsp.itxfm_add[tx][txtp](ptr, td->y_stride,
252  td->block + 16 * n * bytesperpixel, eob);
253  }
254  dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
255  dst += 4 * step1d * td->y_stride;
256  }
257 
258  // U/V
259  w4 >>= s->ss_h;
260  end_x >>= s->ss_h;
261  end_y >>= s->ss_v;
262  step = 1 << (b->uvtx * 2);
263  for (p = 0; p < 2; p++) {
264  dst = td->dst[1 + p];
265  dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
266  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
267  uint8_t *ptr = dst, *ptr_r = dst_r;
268  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
269  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
270  int mode = b->uvmode;
271  uint8_t *a = &a_buf[32];
272  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
273 
274  mode = check_intra_mode(td, mode, &a, ptr_r,
275  s->s.frames[CUR_FRAME].tf.f->linesize[1],
276  ptr, td->uv_stride, l, col, x, w4, row, y,
277  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
278  s->dsp.intra_pred[b->uvtx][mode](ptr, td->uv_stride, l, a);
279  if (eob)
280  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
281  td->uvblock[p] + 16 * n * bytesperpixel, eob);
282  }
283  dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
284  dst += 4 * uvstep1d * td->uv_stride;
285  }
286  }
287 }
288 
289 void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
290 {
291  intra_recon(td, y_off, uv_off, 1);
292 }
293 
294 void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
295 {
296  intra_recon(td, y_off, uv_off, 2);
297 }
298 
300  uint8_t *dst, ptrdiff_t dst_stride,
301  const uint8_t *ref, ptrdiff_t ref_stride,
302  const ProgressFrame *ref_frame,
303  ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
304  int bw, int bh, int w, int h, int bytesperpixel)
305 {
306  const VP9Context *s = td->s;
307  int mx = mv->x, my = mv->y, th;
308 
309  y += my >> 3;
310  x += mx >> 3;
311  ref += y * ref_stride + x * bytesperpixel;
312  mx &= 7;
313  my &= 7;
314  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
315  // we use +7 because the last 7 pixels of each sbrow can be changed in
316  // the longest loopfilter of the next sbrow
317  th = (y + bh + 4 * !!my + 7) >> 6;
319  // The arm/aarch64 _hv filters read one more row than what actually is
320  // needed, so switch to emulated edge one pixel sooner vertically
321  // (!!my * 5) than horizontally (!!mx * 4).
322  // The arm/aarch64 _h filters read one more pixel than what actually is
323  // needed, so switch to emulated edge if that would read beyond the bottom
324  // right block.
325  if (x < !!mx * 3 || y < !!my * 3 ||
326  ((ARCH_AARCH64 || ARCH_ARM) && (x + !!mx * 5 > w - bw) && (y + !!my * 5 + 1 > h - bh)) ||
327  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
328  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
329  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
330  160, ref_stride,
331  bw + !!mx * 7, bh + !!my * 7,
332  x - !!mx * 3, y - !!my * 3, w, h);
333  ref = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
334  ref_stride = 160;
335  }
336  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
337 }
338 
340  uint8_t *dst_u, uint8_t *dst_v,
341  ptrdiff_t dst_stride,
342  const uint8_t *ref_u, ptrdiff_t src_stride_u,
343  const uint8_t *ref_v, ptrdiff_t src_stride_v,
344  const ProgressFrame *ref_frame,
345  ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
346  int bw, int bh, int w, int h, int bytesperpixel)
347 {
348  const VP9Context *s = td->s;
349  int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
350 
351  y += my >> 4;
352  x += mx >> 4;
353  ref_u += y * src_stride_u + x * bytesperpixel;
354  ref_v += y * src_stride_v + x * bytesperpixel;
355  mx &= 15;
356  my &= 15;
357  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
358  // we use +7 because the last 7 pixels of each sbrow can be changed in
359  // the longest loopfilter of the next sbrow
360  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
362  // The arm/aarch64 _hv filters read one more row than what actually is
363  // needed, so switch to emulated edge one pixel sooner vertically
364  // (!!my * 5) than horizontally (!!mx * 4).
365  // The arm/aarch64 _h filters read one more pixel than what actually is
366  // needed, so switch to emulated edge if that would read beyond the bottom
367  // right block.
368  if (x < !!mx * 3 || y < !!my * 3 ||
369  ((ARCH_AARCH64 || ARCH_ARM) && (x + !!mx * 5 > w - bw) && (y + !!my * 5 + 1 > h - bh)) ||
370  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
371  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
372  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
373  160, src_stride_u,
374  bw + !!mx * 7, bh + !!my * 7,
375  x - !!mx * 3, y - !!my * 3, w, h);
376  ref_u = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
377  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
378 
379  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
380  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
381  160, src_stride_v,
382  bw + !!mx * 7, bh + !!my * 7,
383  x - !!mx * 3, y - !!my * 3, w, h);
384  ref_v = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
385  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
386  } else {
387  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
388  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
389  }
390 }
391 
392 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
393  px, py, pw, ph, bw, bh, w, h, i) \
394  mc_luma_unscaled(td, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
395  mv, bw, bh, w, h, bytesperpixel)
396 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
397  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
398  mc_chroma_unscaled(td, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
399  row, col, mv, bw, bh, w, h, bytesperpixel)
400 #define SCALED 0
401 #define FN(x) x##_8bpp
402 #define BYTES_PER_PIXEL 1
403 #include "vp9_mc_template.c"
404 #undef FN
405 #undef BYTES_PER_PIXEL
406 #define FN(x) x##_16bpp
407 #define BYTES_PER_PIXEL 2
408 #include "vp9_mc_template.c"
409 #undef mc_luma_dir
410 #undef mc_chroma_dir
411 #undef FN
412 #undef BYTES_PER_PIXEL
413 #undef SCALED
414 
416  const vp9_mc_func (*mc)[2],
417  uint8_t *dst, ptrdiff_t dst_stride,
418  const uint8_t *ref, ptrdiff_t ref_stride,
419  const ProgressFrame *ref_frame,
420  ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
421  int px, int py, int pw, int ph,
422  int bw, int bh, int w, int h, int bytesperpixel,
423  const uint16_t *scale, const uint8_t *step)
424 {
425  const VP9Context *s = td->s;
426  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
427  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
428  mc_luma_unscaled(td, mc, dst, dst_stride, ref, ref_stride, ref_frame,
429  y, x, in_mv, bw, bh, w, h, bytesperpixel);
430  } else {
431 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
432  int mx, my;
433  int refbw_m1, refbh_m1;
434  int th;
435  VP9mv mv;
436 
437  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
438  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
439  // BUG libvpx seems to scale the two components separately. This introduces
440  // rounding errors but we have to reproduce them to be exactly compatible
441  // with the output from libvpx...
442  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
443  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
444 
445  y = my >> 4;
446  x = mx >> 4;
447  ref += y * ref_stride + x * bytesperpixel;
448  mx &= 15;
449  my &= 15;
450  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
451  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
452  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
453  // we use +7 because the last 7 pixels of each sbrow can be changed in
454  // the longest loopfilter of the next sbrow
455  th = (y + refbh_m1 + 4 + 7) >> 6;
457  // The arm/aarch64 _hv filters read one more row than what actually is
458  // needed, so switch to emulated edge one pixel sooner vertically
459  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
460  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
461  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
462  ref - 3 * ref_stride - 3 * bytesperpixel,
463  288, ref_stride,
464  refbw_m1 + 8, refbh_m1 + 8,
465  x - 3, y - 3, w, h);
466  ref = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
467  ref_stride = 288;
468  }
469  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
470  }
471 }
472 
474  const vp9_mc_func (*mc)[2],
475  uint8_t *dst_u, uint8_t *dst_v,
476  ptrdiff_t dst_stride,
477  const uint8_t *ref_u, ptrdiff_t src_stride_u,
478  const uint8_t *ref_v, ptrdiff_t src_stride_v,
479  const ProgressFrame *ref_frame,
480  ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
481  int px, int py, int pw, int ph,
482  int bw, int bh, int w, int h, int bytesperpixel,
483  const uint16_t *scale, const uint8_t *step)
484 {
485  const VP9Context *s = td->s;
486  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
487  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
488  mc_chroma_unscaled(td, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
489  ref_v, src_stride_v, ref_frame,
490  y, x, in_mv, bw, bh, w, h, bytesperpixel);
491  } else {
492  int mx, my;
493  int refbw_m1, refbh_m1;
494  int th;
495  VP9mv mv;
496 
497  if (s->ss_h) {
498  // BUG https://code.google.com/p/webm/issues/detail?id=820
499  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
500  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
501  } else {
502  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
503  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
504  }
505  if (s->ss_v) {
506  // BUG https://code.google.com/p/webm/issues/detail?id=820
507  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
508  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
509  } else {
510  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
511  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
512  }
513 #undef scale_mv
514  y = my >> 4;
515  x = mx >> 4;
516  ref_u += y * src_stride_u + x * bytesperpixel;
517  ref_v += y * src_stride_v + x * bytesperpixel;
518  mx &= 15;
519  my &= 15;
520  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
521  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
522  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
523  // we use +7 because the last 7 pixels of each sbrow can be changed in
524  // the longest loopfilter of the next sbrow
525  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
527  // The arm/aarch64 _hv filters read one more row than what actually is
528  // needed, so switch to emulated edge one pixel sooner vertically
529  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
530  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
531  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
532  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
533  288, src_stride_u,
534  refbw_m1 + 8, refbh_m1 + 8,
535  x - 3, y - 3, w, h);
536  ref_u = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
537  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
538 
539  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
540  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
541  288, src_stride_v,
542  refbw_m1 + 8, refbh_m1 + 8,
543  x - 3, y - 3, w, h);
544  ref_v = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
545  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
546  } else {
547  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
548  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
549  }
550  }
551 }
552 
553 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
554  px, py, pw, ph, bw, bh, w, h, i) \
555  mc_luma_scaled(td, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
556  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
557  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
558 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
559  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
560  mc_chroma_scaled(td, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
561  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
562  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
563 #define SCALED 1
564 #define FN(x) x##_scaled_8bpp
565 #define BYTES_PER_PIXEL 1
566 #include "vp9_mc_template.c"
567 #undef FN
568 #undef BYTES_PER_PIXEL
569 #define FN(x) x##_scaled_16bpp
570 #define BYTES_PER_PIXEL 2
571 #include "vp9_mc_template.c"
572 #undef mc_luma_dir
573 #undef mc_chroma_dir
574 #undef FN
575 #undef BYTES_PER_PIXEL
576 #undef SCALED
577 
578 static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
579 {
580  const VP9Context *s = td->s;
581  VP9Block *b = td->b;
582  int row = td->row, col = td->col;
583 
584  if (s->mvscale[b->ref[0]][0] == REF_INVALID_SCALE ||
585  (b->comp && s->mvscale[b->ref[1]][0] == REF_INVALID_SCALE)) {
586  if (!s->td->error_info) {
587  s->td->error_info = AVERROR_INVALIDDATA;
588  av_log(NULL, AV_LOG_ERROR, "Bitstream not supported, "
589  "reference frame has invalid dimensions\n");
590  }
591  return;
592  }
593 
594  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
595  if (bytesperpixel == 1) {
596  inter_pred_scaled_8bpp(td);
597  } else {
598  inter_pred_scaled_16bpp(td);
599  }
600  } else {
601  if (bytesperpixel == 1) {
602  inter_pred_8bpp(td);
603  } else {
604  inter_pred_16bpp(td);
605  }
606  }
607 
608  if (!b->skip) {
609  /* mostly copied intra_recon() */
610 
611  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
612  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
613  int end_x = FFMIN(2 * (s->cols - col), w4);
614  int end_y = FFMIN(2 * (s->rows - row), h4);
615  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
616  int uvstep1d = 1 << b->uvtx, p;
617  uint8_t *dst = td->dst[0];
618 
619  // y itxfm add
620  for (n = 0, y = 0; y < end_y; y += step1d) {
621  uint8_t *ptr = dst;
622  for (x = 0; x < end_x; x += step1d,
623  ptr += 4 * step1d * bytesperpixel, n += step) {
624  int eob = b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
625 
626  if (eob)
627  s->dsp.itxfm_add[tx][DCT_DCT](ptr, td->y_stride,
628  td->block + 16 * n * bytesperpixel, eob);
629  }
630  dst += 4 * td->y_stride * step1d;
631  }
632 
633  // uv itxfm add
634  end_x >>= s->ss_h;
635  end_y >>= s->ss_v;
636  step = 1 << (b->uvtx * 2);
637  for (p = 0; p < 2; p++) {
638  dst = td->dst[p + 1];
639  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
640  uint8_t *ptr = dst;
641  for (x = 0; x < end_x; x += uvstep1d,
642  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
643  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
644 
645  if (eob)
646  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
647  td->uvblock[p] + 16 * n * bytesperpixel, eob);
648  }
649  dst += 4 * uvstep1d * td->uv_stride;
650  }
651  }
652  }
653 }
654 
656 {
657  inter_recon(td, 1);
658 }
659 
661 {
662  inter_recon(td, 2);
663 }
av_clip
#define av_clip
Definition: common.h:100
mem_internal.h
DC_128_PRED
@ DC_128_PRED
Definition: vp9.h:58
mv
static const int8_t mv[256][2]
Definition: 4xm.c:81
VP9TileData::row
int row
Definition: vp9dec.h:171
ph
static int FUNC() ph(CodedBitstreamContext *ctx, RWContext *rw, H266RawPH *current)
Definition: cbs_h266_syntax_template.c:3043
mode
Definition: swscale.c:52
TM_VP8_PRED
@ TM_VP8_PRED
Definition: vp9.h:55
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
w
uint8_t w
Definition: llviddspenc.c:38
DC_PRED
@ DC_PRED
Definition: vp9.h:48
b
#define b
Definition: input.c:41
VERT_LEFT_PRED
@ VERT_LEFT_PRED
Definition: vp9.h:53
VP9TileData::block
int16_t * block
Definition: vp9dec.h:226
inter_recon
static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
Definition: vp9recon.c:578
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
VP9TileData::b
VP9Block * b
Definition: vp9dec.h:174
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:294
VP9Block
Definition: vp9dec.h:84
mx
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
Definition: dsp.h:53
DC_127_PRED
@ DC_127_PRED
Definition: vp9.h:59
VERT_PRED
@ VERT_PRED
Definition: vp9.h:46
assign_val
#define assign_val(c, i, v)
check_intra_mode
static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p, int ss_h, int ss_v, int bytesperpixel)
Definition: vp9recon.c:33
DIAG_DOWN_RIGHT_PRED
@ DIAG_DOWN_RIGHT_PRED
Definition: vp9.h:50
progressframe.h
VP9TileData::col
int col
Definition: vp9dec.h:171
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
HOR_PRED
@ HOR_PRED
Definition: vp9.h:47
mc_chroma_unscaled
static av_always_inline void mc_chroma_unscaled(VP9TileData *td, const vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, const ProgressFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP9mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:339
VP9mv::y
int16_t y
Definition: vp9shared.h:57
vp9_scaled_mc_func
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:36
s
#define s(width, name)
Definition: cbs_vp9.c:198
vp9data.h
LEFT_DC_PRED
@ LEFT_DC_PRED
Definition: vp9.h:56
ff_progress_frame_await
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_progress_frame_await() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_progress_frame_report() has been called on them. This includes draw_edges(). Porting codecs to frame threading
memset_val
#define memset_val(c, val, num)
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:660
my
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
Definition: dsp.h:53
assign_bpp
#define assign_bpp(c, i1, v, i2)
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:132
VP9Context
Definition: vp9dec.h:96
mc_luma_scaled
static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, const vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, const ProgressFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:415
VP9TileData::uv_stride
ptrdiff_t uv_stride
Definition: vp9dec.h:173
TX_8X8
@ TX_8X8
Definition: vp9.h:29
VP9mv
Definition: vp9shared.h:55
mc_chroma_scaled
static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, const vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, const ProgressFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:473
VP9TileData::eob
unsigned eob[4][2][2][6][6][2]
Definition: vp9dec.h:203
vp9_mc_template.c
TxfmMode
TxfmMode
Definition: vp9.h:27
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
TxfmType
TxfmType
Definition: vp9.h:37
VP9TileData::edge_emu_buffer
uint8_t edge_emu_buffer[135 *144 *2]
Definition: vp9dec.h:207
N_INTRA_PRED_MODES
@ N_INTRA_PRED_MODES
Definition: vp9.h:61
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
REF_INVALID_SCALE
#define REF_INVALID_SCALE
Definition: vp9dec.h:42
VERT_RIGHT_PRED
@ VERT_RIGHT_PRED
Definition: vp9.h:51
ref_frame
static int ref_frame(VVCFrame *dst, const VVCFrame *src)
Definition: dec.c:593
BS_8x8
@ BS_8x8
Definition: vp9shared.h:92
scale_mv
#define scale_mv(n, dim)
TX_4X4
@ TX_4X4
Definition: vp9.h:28
frame.h
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
DC_129_PRED
@ DC_129_PRED
Definition: vp9.h:60
VP9TileData::uveob
uint8_t * uveob[2]
Definition: vp9dec.h:227
ff_vp9_intra_txfm_type
enum TxfmType ff_vp9_intra_txfm_type[14]
Definition: vp9data.c:437
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
stride
#define stride
Definition: h264pred_template.c:537
VP9TileData::s
const VP9Context * s
Definition: vp9dec.h:168
VP9TileData
Definition: vp9dec.h:167
HOR_UP_PRED
@ HOR_UP_PRED
Definition: vp9.h:54
mode
mode
Definition: ebur128.h:83
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:518
vp9_mc_func
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:33
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:117
VP9TileData::dst
uint8_t * dst[3]
Definition: vp9dec.h:172
HOR_DOWN_PRED
@ HOR_DOWN_PRED
Definition: vp9.h:52
vp9dec.h
mc_luma_unscaled
static av_always_inline void mc_luma_unscaled(VP9TileData *td, const vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, const ProgressFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP9mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:299
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:168
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:291
ProgressFrame
The ProgressFrame structure.
Definition: progressframe.h:73
TOP_DC_PRED
@ TOP_DC_PRED
Definition: vp9.h:57
videodsp.h
DIAG_DOWN_LEFT_PRED
@ DIAG_DOWN_LEFT_PRED
Definition: vp9.h:49
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
memset_bpp
#define memset_bpp(c, i1, v, i2, num)
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:655
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
VP9TileData::y_stride
ptrdiff_t y_stride
Definition: vp9dec.h:173
h
h
Definition: vp9dsp_template.c:2070
intra_recon
static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off, int bytesperpixel)
Definition: vp9recon.c:219
VP9TileData::tile_col_start
unsigned tile_col_start
Definition: vp9dec.h:175
VP9mv::x
int16_t x
Definition: vp9shared.h:56
VP9TileData::uvblock
int16_t * uvblock[2]
Definition: vp9dec.h:226
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:289
mc
#define mc
Definition: vf_colormatrix.c:100