FFmpeg
vp9block.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 #include "libavutil/frame.h"
26 
27 #include "progressframe.h"
28 #include "vp89_rac.h"
29 #include "vp9.h"
30 #include "vp9data.h"
31 #include "vp9dec.h"
32 #include "vpx_rac.h"
33 
34 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
35  ptrdiff_t stride, int v)
36 {
37  switch (w) {
38  case 1:
39  do {
40  *ptr = v;
41  ptr += stride;
42  } while (--h);
43  break;
44  case 2: {
45  int v16 = v * 0x0101;
46  do {
47  AV_WN16A(ptr, v16);
48  ptr += stride;
49  } while (--h);
50  break;
51  }
52  case 4: {
53  uint32_t v32 = v * 0x01010101;
54  do {
55  AV_WN32A(ptr, v32);
56  ptr += stride;
57  } while (--h);
58  break;
59  }
60  case 8: {
61 #if HAVE_FAST_64BIT
62  uint64_t v64 = v * 0x0101010101010101ULL;
63  do {
64  AV_WN64A(ptr, v64);
65  ptr += stride;
66  } while (--h);
67 #else
68  uint32_t v32 = v * 0x01010101;
69  do {
70  AV_WN32A(ptr, v32);
71  AV_WN32A(ptr + 4, v32);
72  ptr += stride;
73  } while (--h);
74 #endif
75  break;
76  }
77  }
78 }
79 
80 static void decode_mode(VP9TileData *td)
81 {
82  static const uint8_t left_ctx[N_BS_SIZES] = {
83  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
84  };
85  static const uint8_t above_ctx[N_BS_SIZES] = {
86  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
87  };
88  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
91  };
92  const VP9Context *s = td->s;
93  VP9Block *b = td->b;
94  int row = td->row, col = td->col, row7 = td->row7;
95  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
96  int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
97  int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
98  int have_a = row > 0, have_l = col > td->tile_col_start;
99  int vref, filter_id;
100 
101  if (!s->s.h.segmentation.enabled) {
102  b->seg_id = 0;
103  } else if (s->s.h.keyframe || s->s.h.intraonly) {
104  b->seg_id = !s->s.h.segmentation.update_map ? 0 :
106  s->s.h.segmentation.prob);
107  } else if (!s->s.h.segmentation.update_map ||
108  (s->s.h.segmentation.temporal &&
110  s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
111  td->left_segpred_ctx[row7]]))) {
112  if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
113  int pred = 8, x;
114  uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
115 
116  if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
117  ff_progress_frame_await(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3);
118  for (y = 0; y < h4; y++) {
119  int idx_base = (y + row) * 8 * s->sb_cols + col;
120  for (x = 0; x < w4; x++)
121  pred = FFMIN(pred, refsegmap[idx_base + x]);
122  }
123  av_assert1(pred < 8);
124  b->seg_id = pred;
125  } else {
126  b->seg_id = 0;
127  }
128 
129  memset(&s->above_segpred_ctx[col], 1, w4);
130  memset(&td->left_segpred_ctx[row7], 1, h4);
131  } else {
133  s->s.h.segmentation.prob);
134 
135  memset(&s->above_segpred_ctx[col], 0, w4);
136  memset(&td->left_segpred_ctx[row7], 0, h4);
137  }
138  if (s->s.h.segmentation.enabled &&
139  (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
140  setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
141  bw4, bh4, 8 * s->sb_cols, b->seg_id);
142  }
143 
144  b->skip = s->s.h.segmentation.enabled &&
145  s->s.h.segmentation.feat[b->seg_id].skip_enabled;
146  if (!b->skip) {
147  int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col];
148  b->skip = vpx_rac_get_prob(td->c, s->prob.p.skip[c]);
149  td->counts.skip[c][b->skip]++;
150  }
151 
152  if (s->s.h.keyframe || s->s.h.intraonly) {
153  b->intra = 1;
154  } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
155  b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
156  } else {
157  int c, bit;
158 
159  if (have_a && have_l) {
160  c = s->above_intra_ctx[col] + td->left_intra_ctx[row7];
161  c += (c == 2);
162  } else {
163  c = have_a ? 2 * s->above_intra_ctx[col] :
164  have_l ? 2 * td->left_intra_ctx[row7] : 0;
165  }
166  bit = vpx_rac_get_prob(td->c, s->prob.p.intra[c]);
167  td->counts.intra[c][bit]++;
168  b->intra = !bit;
169  }
170 
171  if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
172  int c;
173  if (have_a) {
174  if (have_l) {
175  c = (s->above_skip_ctx[col] ? max_tx :
176  s->above_txfm_ctx[col]) +
177  (td->left_skip_ctx[row7] ? max_tx :
178  td->left_txfm_ctx[row7]) > max_tx;
179  } else {
180  c = s->above_skip_ctx[col] ? 1 :
181  (s->above_txfm_ctx[col] * 2 > max_tx);
182  }
183  } else if (have_l) {
184  c = td->left_skip_ctx[row7] ? 1 :
185  (td->left_txfm_ctx[row7] * 2 > max_tx);
186  } else {
187  c = 1;
188  }
189  switch (max_tx) {
190  case TX_32X32:
191  b->tx = vpx_rac_get_prob(td->c, s->prob.p.tx32p[c][0]);
192  if (b->tx) {
193  b->tx += vpx_rac_get_prob(td->c, s->prob.p.tx32p[c][1]);
194  if (b->tx == 2)
195  b->tx += vpx_rac_get_prob(td->c, s->prob.p.tx32p[c][2]);
196  }
197  td->counts.tx32p[c][b->tx]++;
198  break;
199  case TX_16X16:
200  b->tx = vpx_rac_get_prob(td->c, s->prob.p.tx16p[c][0]);
201  if (b->tx)
202  b->tx += vpx_rac_get_prob(td->c, s->prob.p.tx16p[c][1]);
203  td->counts.tx16p[c][b->tx]++;
204  break;
205  case TX_8X8:
206  b->tx = vpx_rac_get_prob(td->c, s->prob.p.tx8p[c]);
207  td->counts.tx8p[c][b->tx]++;
208  break;
209  case TX_4X4:
210  b->tx = TX_4X4;
211  break;
212  }
213  } else {
214  b->tx = FFMIN(max_tx, s->s.h.txfmmode);
215  }
216 
217  if (s->s.h.keyframe || s->s.h.intraonly) {
218  uint8_t *a = &s->above_mode_ctx[col * 2];
219  uint8_t *l = &td->left_mode_ctx[(row7) << 1];
220 
221  b->comp = 0;
222  if (b->bs > BS_8x8) {
223  // FIXME the memory storage intermediates here aren't really
224  // necessary, they're just there to make the code slightly
225  // simpler for now
226  b->mode[0] =
228  ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
229  if (b->bs != BS_8x4) {
230  b->mode[1] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
231  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
232  l[0] =
233  a[1] = b->mode[1];
234  } else {
235  l[0] =
236  a[1] =
237  b->mode[1] = b->mode[0];
238  }
239  if (b->bs != BS_4x8) {
240  b->mode[2] =
242  ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
243  if (b->bs != BS_8x4) {
244  b->mode[3] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
245  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
246  l[1] =
247  a[1] = b->mode[3];
248  } else {
249  l[1] =
250  a[1] =
251  b->mode[3] = b->mode[2];
252  }
253  } else {
254  b->mode[2] = b->mode[0];
255  l[1] =
256  a[1] =
257  b->mode[3] = b->mode[1];
258  }
259  } else {
260  b->mode[0] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
262  b->mode[3] =
263  b->mode[2] =
264  b->mode[1] = b->mode[0];
265  // FIXME this can probably be optimized
266  memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]);
267  memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]);
268  }
271  } else if (b->intra) {
272  b->comp = 0;
273  if (b->bs > BS_8x8) {
274  b->mode[0] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
275  s->prob.p.y_mode[0]);
276  td->counts.y_mode[0][b->mode[0]]++;
277  if (b->bs != BS_8x4) {
278  b->mode[1] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
279  s->prob.p.y_mode[0]);
280  td->counts.y_mode[0][b->mode[1]]++;
281  } else {
282  b->mode[1] = b->mode[0];
283  }
284  if (b->bs != BS_4x8) {
285  b->mode[2] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
286  s->prob.p.y_mode[0]);
287  td->counts.y_mode[0][b->mode[2]]++;
288  if (b->bs != BS_8x4) {
289  b->mode[3] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
290  s->prob.p.y_mode[0]);
291  td->counts.y_mode[0][b->mode[3]]++;
292  } else {
293  b->mode[3] = b->mode[2];
294  }
295  } else {
296  b->mode[2] = b->mode[0];
297  b->mode[3] = b->mode[1];
298  }
299  } else {
300  static const uint8_t size_group[10] = {
301  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
302  };
303  int sz = size_group[b->bs];
304 
305  b->mode[0] = vp89_rac_get_tree(td->c, ff_vp9_intramode_tree,
306  s->prob.p.y_mode[sz]);
307  b->mode[1] =
308  b->mode[2] =
309  b->mode[3] = b->mode[0];
310  td->counts.y_mode[sz][b->mode[3]]++;
311  }
313  s->prob.p.uv_mode[b->mode[3]]);
314  td->counts.uv_mode[b->mode[3]][b->uvmode]++;
315  } else {
316  static const uint8_t inter_mode_ctx_lut[14][14] = {
317  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
318  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
319  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
320  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
321  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
322  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
323  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
324  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
325  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
326  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
327  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
328  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
329  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
330  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
331  };
332 
333  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
334  av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
335  b->comp = 0;
336  b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
337  } else {
338  // read comp_pred flag
339  if (s->s.h.comppredmode != PRED_SWITCHABLE) {
340  b->comp = s->s.h.comppredmode == PRED_COMPREF;
341  } else {
342  int c;
343 
344  // FIXME add intra as ref=0xff (or -1) to make these easier?
345  if (have_a) {
346  if (have_l) {
347  if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) {
348  c = 4;
349  } else if (s->above_comp_ctx[col]) {
350  c = 2 + (td->left_intra_ctx[row7] ||
351  td->left_ref_ctx[row7] == s->s.h.fixcompref);
352  } else if (td->left_comp_ctx[row7]) {
353  c = 2 + (s->above_intra_ctx[col] ||
354  s->above_ref_ctx[col] == s->s.h.fixcompref);
355  } else {
356  c = (!s->above_intra_ctx[col] &&
357  s->above_ref_ctx[col] == s->s.h.fixcompref) ^
358  (!td->left_intra_ctx[row7] &&
359  td->left_ref_ctx[row & 7] == s->s.h.fixcompref);
360  }
361  } else {
362  c = s->above_comp_ctx[col] ? 3 :
363  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
364  }
365  } else if (have_l) {
366  c = td->left_comp_ctx[row7] ? 3 :
367  (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref);
368  } else {
369  c = 1;
370  }
371  b->comp = vpx_rac_get_prob(td->c, s->prob.p.comp[c]);
372  td->counts.comp[c][b->comp]++;
373  }
374 
375  // read actual references
376  // FIXME probably cache a few variables here to prevent repetitive
377  // memory accesses below
378  if (b->comp) { /* two references */
379  int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
380 
381  b->ref[fix_idx] = s->s.h.fixcompref;
382  // FIXME can this codeblob be replaced by some sort of LUT?
383  if (have_a) {
384  if (have_l) {
385  if (s->above_intra_ctx[col]) {
386  if (td->left_intra_ctx[row7]) {
387  c = 2;
388  } else {
389  c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
390  }
391  } else if (td->left_intra_ctx[row7]) {
392  c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
393  } else {
394  int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
395 
396  if (refl == refa && refa == s->s.h.varcompref[1]) {
397  c = 0;
398  } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
399  if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
400  (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
401  c = 4;
402  } else {
403  c = (refa == refl) ? 3 : 1;
404  }
405  } else if (!td->left_comp_ctx[row7]) {
406  if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
407  c = 1;
408  } else {
409  c = (refl == s->s.h.varcompref[1] &&
410  refa != s->s.h.varcompref[1]) ? 2 : 4;
411  }
412  } else if (!s->above_comp_ctx[col]) {
413  if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
414  c = 1;
415  } else {
416  c = (refa == s->s.h.varcompref[1] &&
417  refl != s->s.h.varcompref[1]) ? 2 : 4;
418  }
419  } else {
420  c = (refl == refa) ? 4 : 2;
421  }
422  }
423  } else {
424  if (s->above_intra_ctx[col]) {
425  c = 2;
426  } else if (s->above_comp_ctx[col]) {
427  c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
428  } else {
429  c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
430  }
431  }
432  } else if (have_l) {
433  if (td->left_intra_ctx[row7]) {
434  c = 2;
435  } else if (td->left_comp_ctx[row7]) {
436  c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
437  } else {
438  c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
439  }
440  } else {
441  c = 2;
442  }
443  bit = vpx_rac_get_prob(td->c, s->prob.p.comp_ref[c]);
444  b->ref[var_idx] = s->s.h.varcompref[bit];
445  td->counts.comp_ref[c][bit]++;
446  } else /* single reference */ {
447  int bit, c;
448 
449  if (have_a && !s->above_intra_ctx[col]) {
450  if (have_l && !td->left_intra_ctx[row7]) {
451  if (td->left_comp_ctx[row7]) {
452  if (s->above_comp_ctx[col]) {
453  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] ||
454  !s->above_ref_ctx[col]);
455  } else {
456  c = (3 * !s->above_ref_ctx[col]) +
457  (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
458  }
459  } else if (s->above_comp_ctx[col]) {
460  c = (3 * !td->left_ref_ctx[row7]) +
461  (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
462  } else {
463  c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
464  }
465  } else if (s->above_intra_ctx[col]) {
466  c = 2;
467  } else if (s->above_comp_ctx[col]) {
468  c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
469  } else {
470  c = 4 * (!s->above_ref_ctx[col]);
471  }
472  } else if (have_l && !td->left_intra_ctx[row7]) {
473  if (td->left_intra_ctx[row7]) {
474  c = 2;
475  } else if (td->left_comp_ctx[row7]) {
476  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
477  } else {
478  c = 4 * (!td->left_ref_ctx[row7]);
479  }
480  } else {
481  c = 2;
482  }
483  bit = vpx_rac_get_prob(td->c, s->prob.p.single_ref[c][0]);
484  td->counts.single_ref[c][0][bit]++;
485  if (!bit) {
486  b->ref[0] = 0;
487  } else {
488  // FIXME can this codeblob be replaced by some sort of LUT?
489  if (have_a) {
490  if (have_l) {
491  if (td->left_intra_ctx[row7]) {
492  if (s->above_intra_ctx[col]) {
493  c = 2;
494  } else if (s->above_comp_ctx[col]) {
495  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
496  s->above_ref_ctx[col] == 1);
497  } else if (!s->above_ref_ctx[col]) {
498  c = 3;
499  } else {
500  c = 4 * (s->above_ref_ctx[col] == 1);
501  }
502  } else if (s->above_intra_ctx[col]) {
503  if (td->left_intra_ctx[row7]) {
504  c = 2;
505  } else if (td->left_comp_ctx[row7]) {
506  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
507  td->left_ref_ctx[row7] == 1);
508  } else if (!td->left_ref_ctx[row7]) {
509  c = 3;
510  } else {
511  c = 4 * (td->left_ref_ctx[row7] == 1);
512  }
513  } else if (s->above_comp_ctx[col]) {
514  if (td->left_comp_ctx[row7]) {
515  if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
516  c = 3 * (s->s.h.fixcompref == 1 ||
517  td->left_ref_ctx[row7] == 1);
518  } else {
519  c = 2;
520  }
521  } else if (!td->left_ref_ctx[row7]) {
522  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
523  s->above_ref_ctx[col] == 1);
524  } else {
525  c = 3 * (td->left_ref_ctx[row7] == 1) +
526  (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
527  }
528  } else if (td->left_comp_ctx[row7]) {
529  if (!s->above_ref_ctx[col]) {
530  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
531  td->left_ref_ctx[row7] == 1);
532  } else {
533  c = 3 * (s->above_ref_ctx[col] == 1) +
534  (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
535  }
536  } else if (!s->above_ref_ctx[col]) {
537  if (!td->left_ref_ctx[row7]) {
538  c = 3;
539  } else {
540  c = 4 * (td->left_ref_ctx[row7] == 1);
541  }
542  } else if (!td->left_ref_ctx[row7]) {
543  c = 4 * (s->above_ref_ctx[col] == 1);
544  } else {
545  c = 2 * (td->left_ref_ctx[row7] == 1) +
546  2 * (s->above_ref_ctx[col] == 1);
547  }
548  } else {
549  if (s->above_intra_ctx[col] ||
550  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
551  c = 2;
552  } else if (s->above_comp_ctx[col]) {
553  c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
554  } else {
555  c = 4 * (s->above_ref_ctx[col] == 1);
556  }
557  }
558  } else if (have_l) {
559  if (td->left_intra_ctx[row7] ||
560  (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) {
561  c = 2;
562  } else if (td->left_comp_ctx[row7]) {
563  c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
564  } else {
565  c = 4 * (td->left_ref_ctx[row7] == 1);
566  }
567  } else {
568  c = 2;
569  }
570  bit = vpx_rac_get_prob(td->c, s->prob.p.single_ref[c][1]);
571  td->counts.single_ref[c][1][bit]++;
572  b->ref[0] = 1 + bit;
573  }
574  }
575  }
576 
577  if (b->bs <= BS_8x8) {
578  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
579  b->mode[0] =
580  b->mode[1] =
581  b->mode[2] =
582  b->mode[3] = ZEROMV;
583  } else {
584  static const uint8_t off[10] = {
585  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
586  };
587 
588  // FIXME this needs to use the LUT tables from find_ref_mvs
589  // because not all are -1,0/0,-1
590  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
591  [td->left_mode_ctx[row7 + off[b->bs]]];
592 
594  s->prob.p.mv_mode[c]);
595  b->mode[1] =
596  b->mode[2] =
597  b->mode[3] = b->mode[0];
598  td->counts.mv_mode[c][b->mode[0] - 10]++;
599  }
600  }
601 
602  if (s->s.h.filtermode == FILTER_SWITCHABLE) {
603  int c;
604 
605  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
606  if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
607  c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ?
608  td->left_filter_ctx[row7] : 3;
609  } else {
610  c = s->above_filter_ctx[col];
611  }
612  } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
613  c = td->left_filter_ctx[row7];
614  } else {
615  c = 3;
616  }
617 
618  filter_id = vp89_rac_get_tree(td->c, ff_vp9_filter_tree,
619  s->prob.p.filter[c]);
620  td->counts.filter[c][filter_id]++;
621  b->filter = ff_vp9_filter_lut[filter_id];
622  } else {
623  b->filter = s->s.h.filtermode;
624  }
625 
626  if (b->bs > BS_8x8) {
627  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]];
628 
630  s->prob.p.mv_mode[c]);
631  td->counts.mv_mode[c][b->mode[0] - 10]++;
632  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0);
633 
634  if (b->bs != BS_8x4) {
636  s->prob.p.mv_mode[c]);
637  td->counts.mv_mode[c][b->mode[1] - 10]++;
638  ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1);
639  } else {
640  b->mode[1] = b->mode[0];
641  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
642  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
643  }
644 
645  if (b->bs != BS_4x8) {
647  s->prob.p.mv_mode[c]);
648  td->counts.mv_mode[c][b->mode[2] - 10]++;
649  ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2);
650 
651  if (b->bs != BS_8x4) {
653  s->prob.p.mv_mode[c]);
654  td->counts.mv_mode[c][b->mode[3] - 10]++;
655  ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3);
656  } else {
657  b->mode[3] = b->mode[2];
658  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
659  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
660  }
661  } else {
662  b->mode[2] = b->mode[0];
663  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
664  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
665  b->mode[3] = b->mode[1];
666  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
667  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
668  }
669  } else {
670  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1);
671  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
672  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
673  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
674  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
675  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
676  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
677  }
678 
679  vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
680  }
681 
682 #if HAVE_FAST_64BIT
683 #define SPLAT_CTX(var, val, n) \
684  switch (n) { \
685  case 1: var = val; break; \
686  case 2: AV_WN16A(&var, val * 0x0101); break; \
687  case 4: AV_WN32A(&var, val * 0x01010101); break; \
688  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
689  case 16: { \
690  uint64_t v64 = val * 0x0101010101010101ULL; \
691  AV_WN64A( &var, v64); \
692  AV_WN64A(&((uint8_t *) &var)[8], v64); \
693  break; \
694  } \
695  }
696 #else
697 #define SPLAT_CTX(var, val, n) \
698  switch (n) { \
699  case 1: var = val; break; \
700  case 2: AV_WN16A(&var, val * 0x0101); break; \
701  case 4: AV_WN32A(&var, val * 0x01010101); break; \
702  case 8: { \
703  uint32_t v32 = val * 0x01010101; \
704  AV_WN32A( &var, v32); \
705  AV_WN32A(&((uint8_t *) &var)[4], v32); \
706  break; \
707  } \
708  case 16: { \
709  uint32_t v32 = val * 0x01010101; \
710  AV_WN32A( &var, v32); \
711  AV_WN32A(&((uint8_t *) &var)[4], v32); \
712  AV_WN32A(&((uint8_t *) &var)[8], v32); \
713  AV_WN32A(&((uint8_t *) &var)[12], v32); \
714  break; \
715  } \
716  }
717 #endif
718 
719  switch (ff_vp9_bwh_tab[1][b->bs][0]) {
720 #define SET_CTXS(perf, dir, off, n) \
721  do { \
722  SPLAT_CTX(perf->dir##_skip_ctx[off], b->skip, n); \
723  SPLAT_CTX(perf->dir##_txfm_ctx[off], b->tx, n); \
724  SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
725  if (!s->s.h.keyframe && !s->s.h.intraonly) { \
726  SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra, n); \
727  SPLAT_CTX(perf->dir##_comp_ctx[off], b->comp, n); \
728  SPLAT_CTX(perf->dir##_mode_ctx[off], b->mode[3], n); \
729  if (!b->intra) { \
730  SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \
731  if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
732  SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \
733  } \
734  } \
735  } \
736  } while (0)
737  case 1: SET_CTXS(s, above, col, 1); break;
738  case 2: SET_CTXS(s, above, col, 2); break;
739  case 4: SET_CTXS(s, above, col, 4); break;
740  case 8: SET_CTXS(s, above, col, 8); break;
741  }
742  switch (ff_vp9_bwh_tab[1][b->bs][1]) {
743  case 1: SET_CTXS(td, left, row7, 1); break;
744  case 2: SET_CTXS(td, left, row7, 2); break;
745  case 4: SET_CTXS(td, left, row7, 4); break;
746  case 8: SET_CTXS(td, left, row7, 8); break;
747  }
748 #undef SPLAT_CTX
749 #undef SET_CTXS
750 
751  if (!s->s.h.keyframe && !s->s.h.intraonly) {
752  if (b->bs > BS_8x8) {
753  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
754 
755  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
756  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
757  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0);
758  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1);
759  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
760  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
761  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
762  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
763  } else {
764  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
765 
766  for (n = 0; n < w4 * 2; n++) {
767  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
768  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
769  }
770  for (n = 0; n < h4 * 2; n++) {
771  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0);
772  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1);
773  }
774  }
775  }
776 
777  // FIXME kinda ugly
778  for (y = 0; y < h4; y++) {
779  int x, o = (row + y) * s->sb_cols * 8 + col;
780  VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
781 
782  if (b->intra) {
783  for (x = 0; x < w4; x++) {
784  mv[x].ref[0] =
785  mv[x].ref[1] = -1;
786  }
787  } else if (b->comp) {
788  for (x = 0; x < w4; x++) {
789  mv[x].ref[0] = b->ref[0];
790  mv[x].ref[1] = b->ref[1];
791  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
792  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
793  }
794  } else {
795  for (x = 0; x < w4; x++) {
796  mv[x].ref[0] = b->ref[0];
797  mv[x].ref[1] = -1;
798  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
799  }
800  }
801  }
802 }
803 
804 // FIXME merge cnt/eob arguments?
805 static av_always_inline int
806 decode_coeffs_b_generic(VPXRangeCoder *c, int16_t *coef, int n_coeffs,
807  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
808  unsigned (*eob)[6][2], const uint8_t (*p)[6][11],
809  int nnz, const int16_t *scan, const int16_t (*nb)[2],
810  const int16_t *band_counts, const int16_t *qmul)
811 {
812  int i = 0, band = 0, band_left = band_counts[band];
813  const uint8_t *tp = p[0][nnz];
814  uint8_t cache[1024];
815 
816  do {
817  int val, rc;
818 
819  val = vpx_rac_get_prob_branchy(c, tp[0]); // eob
820  eob[band][nnz][val]++;
821  if (!val)
822  break;
823 
824 skip_eob:
825  if (!vpx_rac_get_prob_branchy(c, tp[1])) { // zero
826  cnt[band][nnz][0]++;
827  if (!--band_left)
828  band_left = band_counts[++band];
829  cache[scan[i]] = 0;
830  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
831  tp = p[band][nnz];
832  if (++i == n_coeffs)
833  break; //invalid input; blocks should end with EOB
834  goto skip_eob;
835  }
836 
837  rc = scan[i];
838  if (!vpx_rac_get_prob_branchy(c, tp[2])) { // one
839  cnt[band][nnz][1]++;
840  val = 1;
841  cache[rc] = 1;
842  } else {
843  cnt[band][nnz][2]++;
844  if (!vpx_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
845  if (!vpx_rac_get_prob_branchy(c, tp[4])) {
846  cache[rc] = val = 2;
847  } else {
848  val = 3 + vpx_rac_get_prob(c, tp[5]);
849  cache[rc] = 3;
850  }
851  } else if (!vpx_rac_get_prob_branchy(c, tp[6])) { // cat1/2
852  cache[rc] = 4;
853  if (!vpx_rac_get_prob_branchy(c, tp[7])) {
854  val = vpx_rac_get_prob(c, 159) + 5;
855  } else {
856  val = (vpx_rac_get_prob(c, 165) << 1) + 7;
857  val += vpx_rac_get_prob(c, 145);
858  }
859  } else { // cat 3-6
860  cache[rc] = 5;
861  if (!vpx_rac_get_prob_branchy(c, tp[8])) {
862  if (!vpx_rac_get_prob_branchy(c, tp[9])) {
863  val = 11 + (vpx_rac_get_prob(c, 173) << 2);
864  val += (vpx_rac_get_prob(c, 148) << 1);
865  val += vpx_rac_get_prob(c, 140);
866  } else {
867  val = 19 + (vpx_rac_get_prob(c, 176) << 3);
868  val += (vpx_rac_get_prob(c, 155) << 2);
869  val += (vpx_rac_get_prob(c, 140) << 1);
870  val += vpx_rac_get_prob(c, 135);
871  }
872  } else if (!vpx_rac_get_prob_branchy(c, tp[10])) {
873  val = (vpx_rac_get_prob(c, 180) << 4) + 35;
874  val += (vpx_rac_get_prob(c, 157) << 3);
875  val += (vpx_rac_get_prob(c, 141) << 2);
876  val += (vpx_rac_get_prob(c, 134) << 1);
877  val += vpx_rac_get_prob(c, 130);
878  } else {
879  val = 67;
880  if (!is8bitsperpixel) {
881  if (bpp == 12) {
882  val += vpx_rac_get_prob(c, 255) << 17;
883  val += vpx_rac_get_prob(c, 255) << 16;
884  }
885  val += (vpx_rac_get_prob(c, 255) << 15);
886  val += (vpx_rac_get_prob(c, 255) << 14);
887  }
888  val += (vpx_rac_get_prob(c, 254) << 13);
889  val += (vpx_rac_get_prob(c, 254) << 12);
890  val += (vpx_rac_get_prob(c, 254) << 11);
891  val += (vpx_rac_get_prob(c, 252) << 10);
892  val += (vpx_rac_get_prob(c, 249) << 9);
893  val += (vpx_rac_get_prob(c, 243) << 8);
894  val += (vpx_rac_get_prob(c, 230) << 7);
895  val += (vpx_rac_get_prob(c, 196) << 6);
896  val += (vpx_rac_get_prob(c, 177) << 5);
897  val += (vpx_rac_get_prob(c, 153) << 4);
898  val += (vpx_rac_get_prob(c, 140) << 3);
899  val += (vpx_rac_get_prob(c, 133) << 2);
900  val += (vpx_rac_get_prob(c, 130) << 1);
901  val += vpx_rac_get_prob(c, 129);
902  }
903  }
904  }
905 #define STORE_COEF(c, i, v) do { \
906  if (is8bitsperpixel) { \
907  c[i] = v; \
908  } else { \
909  AV_WN32A(&c[i * 2], v); \
910  } \
911 } while (0)
912  if (!--band_left)
913  band_left = band_counts[++band];
914  if (is_tx32x32)
915  STORE_COEF(coef, rc, (int)((vp89_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2);
916  else
917  STORE_COEF(coef, rc, (vp89_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]);
918  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
919  tp = p[band][nnz];
920  } while (++i < n_coeffs);
921 
922  return i;
923 }
924 
925 static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
926  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
927  const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
928  const int16_t (*nb)[2], const int16_t *band_counts,
929  const int16_t *qmul)
930 {
931  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
932  nnz, scan, nb, band_counts, qmul);
933 }
934 
935 static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
936  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
937  const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
938  const int16_t (*nb)[2], const int16_t *band_counts,
939  const int16_t *qmul)
940 {
941  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
942  nnz, scan, nb, band_counts, qmul);
943 }
944 
945 static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
946  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
947  const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
948  const int16_t (*nb)[2], const int16_t *band_counts,
949  const int16_t *qmul)
950 {
951  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p,
952  nnz, scan, nb, band_counts, qmul);
953 }
954 
955 static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
956  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
957  const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
958  const int16_t (*nb)[2], const int16_t *band_counts,
959  const int16_t *qmul)
960 {
961  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p,
962  nnz, scan, nb, band_counts, qmul);
963 }
964 
965 static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
966 {
967  const VP9Context *s = td->s;
968  VP9Block *b = td->b;
969  int row = td->row, col = td->col;
970  const uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
971  unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra];
972  unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra];
973  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1;
974  int end_x = FFMIN(2 * (s->cols - col), w4);
975  int end_y = FFMIN(2 * (s->rows - row), h4);
976  int n, pl, x, y, ret;
977  const int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
978  int tx = 4 * s->s.h.lossless + b->tx;
979  const int16_t * const *yscans = ff_vp9_scans[tx];
980  const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx];
981  const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
982  const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
983  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
984  uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1];
985  static const int16_t band_counts[4][8] = {
986  { 1, 2, 3, 4, 3, 16 - 13 },
987  { 1, 2, 3, 4, 11, 64 - 21 },
988  { 1, 2, 3, 4, 11, 256 - 21 },
989  { 1, 2, 3, 4, 11, 1024 - 21 },
990  };
991  const int16_t *y_band_counts = band_counts[b->tx];
992  const int16_t *uv_band_counts = band_counts[b->uvtx];
993  int bytesperpixel = is8bitsperpixel ? 1 : 2;
994  int total_coeff = 0;
995 
996 #define MERGE(la, end, step, rd) \
997  for (n = 0; n < end; n += step) \
998  la[n] = !!rd(&la[n])
999 #define MERGE_CTX(step, rd) \
1000  do { \
1001  MERGE(l, end_y, step, rd); \
1002  MERGE(a, end_x, step, rd); \
1003  } while (0)
1004 
1005 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
1006  for (n = 0, y = 0; y < end_y; y += step) { \
1007  for (x = 0; x < end_x; x += step, n += step * step) { \
1008  enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \
1009  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1010  (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \
1011  c, e, p, a[x] + l[y], yscans[txtp], \
1012  ynbs[txtp], y_band_counts, qmul[0]); \
1013  a[x] = l[y] = !!ret; \
1014  total_coeff |= !!ret; \
1015  if (step >= 4) { \
1016  AV_WN16A(&td->eob[n], ret); \
1017  } else { \
1018  td->eob[n] = ret; \
1019  } \
1020  } \
1021  }
1022 
1023 #define SPLAT(la, end, step, cond) \
1024  if (step == 2) { \
1025  for (n = 1; n < end; n += step) \
1026  la[n] = la[n - 1]; \
1027  } else if (step == 4) { \
1028  if (cond) { \
1029  for (n = 0; n < end; n += step) \
1030  AV_WN32A(&la[n], la[n] * 0x01010101); \
1031  } else { \
1032  for (n = 0; n < end; n += step) \
1033  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
1034  } \
1035  } else /* step == 8 */ { \
1036  if (cond) { \
1037  if (HAVE_FAST_64BIT) { \
1038  for (n = 0; n < end; n += step) \
1039  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
1040  } else { \
1041  for (n = 0; n < end; n += step) { \
1042  uint32_t v32 = la[n] * 0x01010101; \
1043  AV_WN32A(&la[n], v32); \
1044  AV_WN32A(&la[n + 4], v32); \
1045  } \
1046  } \
1047  } else { \
1048  for (n = 0; n < end; n += step) \
1049  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
1050  } \
1051  }
1052 #define SPLAT_CTX(step) \
1053  do { \
1054  SPLAT(a, end_x, step, end_x == w4); \
1055  SPLAT(l, end_y, step, end_y == h4); \
1056  } while (0)
1057 
1058  /* y tokens */
1059  switch (b->tx) {
1060  case TX_4X4:
1061  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
1062  break;
1063  case TX_8X8:
1064  MERGE_CTX(2, AV_RN16A);
1065  DECODE_Y_COEF_LOOP(2, 0,);
1066  SPLAT_CTX(2);
1067  break;
1068  case TX_16X16:
1069  MERGE_CTX(4, AV_RN32A);
1070  DECODE_Y_COEF_LOOP(4, 0,);
1071  SPLAT_CTX(4);
1072  break;
1073  case TX_32X32:
1074  MERGE_CTX(8, AV_RN64A);
1075  DECODE_Y_COEF_LOOP(8, 0, 32);
1076  SPLAT_CTX(8);
1077  break;
1078  }
1079 
1080 #define DECODE_UV_COEF_LOOP(step, v) \
1081  for (n = 0, y = 0; y < end_y; y += step) { \
1082  for (x = 0; x < end_x; x += step, n += step * step) { \
1083  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1084  (td, td->uvblock[pl] + 16 * n * bytesperpixel, \
1085  16 * step * step, c, e, p, a[x] + l[y], \
1086  uvscan, uvnb, uv_band_counts, qmul[1]); \
1087  a[x] = l[y] = !!ret; \
1088  total_coeff |= !!ret; \
1089  if (step >= 4) { \
1090  AV_WN16A(&td->uveob[pl][n], ret); \
1091  } else { \
1092  td->uveob[pl][n] = ret; \
1093  } \
1094  } \
1095  }
1096 
1097  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1098  c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1099  e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1100  w4 >>= s->ss_h;
1101  end_x >>= s->ss_h;
1102  h4 >>= s->ss_v;
1103  end_y >>= s->ss_v;
1104  for (pl = 0; pl < 2; pl++) {
1105  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
1106  l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
1107  switch (b->uvtx) {
1108  case TX_4X4:
1109  DECODE_UV_COEF_LOOP(1,);
1110  break;
1111  case TX_8X8:
1112  MERGE_CTX(2, AV_RN16A);
1113  DECODE_UV_COEF_LOOP(2,);
1114  SPLAT_CTX(2);
1115  break;
1116  case TX_16X16:
1117  MERGE_CTX(4, AV_RN32A);
1118  DECODE_UV_COEF_LOOP(4,);
1119  SPLAT_CTX(4);
1120  break;
1121  case TX_32X32:
1122  MERGE_CTX(8, AV_RN64A);
1123  DECODE_UV_COEF_LOOP(8, 32);
1124  SPLAT_CTX(8);
1125  break;
1126  }
1127  }
1128 
1129  return total_coeff;
1130 }
1131 
1133 {
1134  return decode_coeffs(td, 1);
1135 }
1136 
1138 {
1139  return decode_coeffs(td, 0);
1140 }
1141 
1142 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
1143  int row_and_7, int col_and_7,
1144  int w, int h, int col_end, int row_end,
1145  enum TxfmMode tx, int skip_inter)
1146 {
1147  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
1148  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
1149 
1150  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
1151  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
1152  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
1153  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
1154 
1155  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
1156  // edges. This means that for UV, we work on two subsampled blocks at
1157  // a time, and we only use the topleft block's mode information to set
1158  // things like block strength. Thus, for any block size smaller than
1159  // 16x16, ignore the odd portion of the block.
1160  if (tx == TX_4X4 && (ss_v | ss_h)) {
1161  if (h == ss_v) {
1162  if (row_and_7 & 1)
1163  return;
1164  if (!row_end)
1165  h += 1;
1166  }
1167  if (w == ss_h) {
1168  if (col_and_7 & 1)
1169  return;
1170  if (!col_end)
1171  w += 1;
1172  }
1173  }
1174 
1175  if (tx == TX_4X4 && !skip_inter) {
1176  int t = 1 << col_and_7, m_col = (t << w) - t, y;
1177  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
1178  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
1179 
1180  for (y = row_and_7; y < h + row_and_7; y++) {
1181  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
1182 
1183  mask[0][y][1] |= m_row_8;
1184  mask[0][y][2] |= m_row_4;
1185  // for odd lines, if the odd col is not being filtered,
1186  // skip odd row also:
1187  // .---. <-- a
1188  // | |
1189  // |___| <-- b
1190  // ^ ^
1191  // c d
1192  //
1193  // if a/c are even row/col and b/d are odd, and d is skipped,
1194  // e.g. right edge of size-66x66.webm, then skip b also (bug)
1195  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
1196  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
1197  } else {
1198  mask[1][y][col_mask_id] |= m_col;
1199  }
1200  if (!ss_h)
1201  mask[0][y][3] |= m_col;
1202  if (!ss_v) {
1203  if (ss_h && (col_end & 1))
1204  mask[1][y][3] |= (t << (w - 1)) - t;
1205  else
1206  mask[1][y][3] |= m_col;
1207  }
1208  }
1209  } else {
1210  int y, t = 1 << col_and_7, m_col = (t << w) - t;
1211 
1212  if (!skip_inter) {
1213  int mask_id = (tx == TX_8X8);
1214  int l2 = tx + ss_h - 1, step1d;
1215  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
1216  int m_row = m_col & masks[l2];
1217 
1218  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
1219  // 8wd loopfilter to prevent going off the visible edge.
1220  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
1221  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
1222  int m_row_8 = m_row - m_row_16;
1223 
1224  for (y = row_and_7; y < h + row_and_7; y++) {
1225  mask[0][y][0] |= m_row_16;
1226  mask[0][y][1] |= m_row_8;
1227  }
1228  } else {
1229  for (y = row_and_7; y < h + row_and_7; y++)
1230  mask[0][y][mask_id] |= m_row;
1231  }
1232 
1233  l2 = tx + ss_v - 1;
1234  step1d = 1 << l2;
1235  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
1236  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
1237  mask[1][y][0] |= m_col;
1238  if (y - row_and_7 == h - 1)
1239  mask[1][y][1] |= m_col;
1240  } else {
1241  for (y = row_and_7; y < h + row_and_7; y += step1d)
1242  mask[1][y][mask_id] |= m_col;
1243  }
1244  } else if (tx != TX_4X4) {
1245  int mask_id;
1246 
1247  mask_id = (tx == TX_8X8) || (h == ss_v);
1248  mask[1][row_and_7][mask_id] |= m_col;
1249  mask_id = (tx == TX_8X8) || (w == ss_h);
1250  for (y = row_and_7; y < h + row_and_7; y++)
1251  mask[0][y][mask_id] |= t;
1252  } else {
1253  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
1254 
1255  for (y = row_and_7; y < h + row_and_7; y++) {
1256  mask[0][y][2] |= t4;
1257  mask[0][y][1] |= t8;
1258  }
1259  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
1260  }
1261  }
1262 }
1263 
1264 void ff_vp9_decode_block(VP9TileData *td, int row, int col,
1265  VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
1266  enum BlockLevel bl, enum BlockPartition bp)
1267 {
1268  const VP9Context *s = td->s;
1269  VP9Block *b = td->b;
1270  enum BlockSize bs = bl * 3 + bp;
1271  int bytesperpixel = s->bytesperpixel;
1272  int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl;
1273  int emu[2];
1274  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1275 
1276  td->row = row;
1277  td->row7 = row & 7;
1278  td->col = col;
1279  td->col7 = col & 7;
1280 
1281  td->min_mv.x = -(128 + col * 64);
1282  td->min_mv.y = -(128 + row * 64);
1283  td->max_mv.x = 128 + (s->cols - col - w4) * 64;
1284  td->max_mv.y = 128 + (s->rows - row - h4) * 64;
1285 
1286  if (s->pass < 2) {
1287  b->bs = bs;
1288  b->bl = bl;
1289  b->bp = bp;
1290  decode_mode(td);
1291  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
1292  (s->ss_v && h4 * 2 == (1 << b->tx)));
1293 
1294  if (td->block_structure) {
1295  td->block_structure[td->nb_block_structure].row = row;
1296  td->block_structure[td->nb_block_structure].col = col;
1299  td->nb_block_structure++;
1300  }
1301 
1302  if (!b->skip) {
1303  int has_coeffs;
1304 
1305  if (bytesperpixel == 1) {
1306  has_coeffs = decode_coeffs_8bpp(td);
1307  } else {
1308  has_coeffs = decode_coeffs_16bpp(td);
1309  }
1310  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
1311  b->skip = 1;
1312  memset(&s->above_skip_ctx[col], 1, w4);
1313  memset(&td->left_skip_ctx[td->row7], 1, h4);
1314  }
1315  } else {
1316  int row7 = td->row7;
1317 
1318 #define SPLAT_ZERO_CTX(v, n) \
1319  switch (n) { \
1320  case 1: v = 0; break; \
1321  case 2: AV_ZERO16(&v); break; \
1322  case 4: AV_ZERO32(&v); break; \
1323  case 8: AV_ZERO64(&v); break; \
1324  case 16: AV_ZERO128(&v); break; \
1325  }
1326 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
1327  do { \
1328  SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \
1329  if (s->ss_##dir2) { \
1330  SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \
1331  SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \
1332  } else { \
1333  SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \
1334  SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \
1335  } \
1336  } while (0)
1337 
1338  switch (w4) {
1339  case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break;
1340  case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break;
1341  case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break;
1342  case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break;
1343  }
1344  switch (h4) {
1345  case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break;
1346  case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break;
1347  case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break;
1348  case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break;
1349  }
1350  }
1351 
1352  if (s->pass == 1) {
1353  s->td[0].b++;
1354  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1355  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1356  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1357  s->td[0].eob += 4 * w4 * h4;
1358  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1359  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1360 
1361  return;
1362  }
1363  }
1364 
1365  // emulated overhangs if the stride of the target buffer can't hold. This
1366  // makes it possible to support emu-edge and so on even if we have large block
1367  // overhangs
1368  emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
1369  (row + h4) > s->rows;
1370  emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
1371  (row + h4) > s->rows;
1372  if (emu[0]) {
1373  td->dst[0] = td->tmp_y;
1374  td->y_stride = 128;
1375  } else {
1376  td->dst[0] = f->data[0] + yoff;
1377  td->y_stride = f->linesize[0];
1378  }
1379  if (emu[1]) {
1380  td->dst[1] = td->tmp_uv[0];
1381  td->dst[2] = td->tmp_uv[1];
1382  td->uv_stride = 128;
1383  } else {
1384  td->dst[1] = f->data[1] + uvoff;
1385  td->dst[2] = f->data[2] + uvoff;
1386  td->uv_stride = f->linesize[1];
1387  }
1388  if (b->intra) {
1389  if (s->s.h.bpp > 8) {
1390  ff_vp9_intra_recon_16bpp(td, yoff, uvoff);
1391  } else {
1392  ff_vp9_intra_recon_8bpp(td, yoff, uvoff);
1393  }
1394  } else {
1395  if (s->s.h.bpp > 8) {
1397  } else {
1399  }
1400  }
1401  if (emu[0]) {
1402  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
1403 
1404  for (n = 0; o < w; n++) {
1405  int bw = 64 >> n;
1406 
1407  av_assert2(n <= 4);
1408  if (w & bw) {
1409  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
1410  td->tmp_y + o * bytesperpixel, 128, h, 0, 0);
1411  o += bw;
1412  }
1413  }
1414  }
1415  if (emu[1]) {
1416  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
1417  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
1418 
1419  for (n = s->ss_h; o < w; n++) {
1420  int bw = 64 >> n;
1421 
1422  av_assert2(n <= 4);
1423  if (w & bw) {
1424  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
1425  td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
1426  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
1427  td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
1428  o += bw;
1429  }
1430  }
1431  }
1432 
1433  // pick filter level and find edges to apply filter to
1434  if (s->s.h.filter.level &&
1435  (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
1436  [b->mode[3] != ZEROMV]) > 0) {
1437  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
1438  int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7;
1439 
1440  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
1441  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
1442  if (s->ss_h || s->ss_v)
1443  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
1444  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
1445  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
1446  b->uvtx, skip_inter);
1447  }
1448 
1449  if (s->pass == 2) {
1450  s->td[0].b++;
1451  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1452  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1453  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1454  s->td[0].eob += 4 * w4 * h4;
1455  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1456  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1457  }
1458 }
VP9TileData::left_y_nnz_ctx
uint8_t left_y_nnz_ctx[16]
Definition: vp9dec.h:210
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:655
DECODE_Y_COEF_LOOP
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
VP9TileData::single_ref
unsigned single_ref[5][2][2]
Definition: vp9dec.h:184
ff_vp9_default_kf_uvmode_probs
const uint8_t ff_vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.c:201
PRED_SWITCHABLE
@ PRED_SWITCHABLE
Definition: vp9shared.h:52
ff_vp9_fill_mv
void ff_vp9_fill_mv(VP9TileData *td, VP9mv *mv, int mode, int sb)
Definition: vp9mvs.c:291
ff_vp9_filter_tree
const int8_t ff_vp9_filter_tree[2][2]
Definition: vp9data.c:220
SET_CTXS
#define SET_CTXS(perf, dir, off, n)
decode_coeffs_b_8bpp
static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:925
mv
static const int8_t mv[256][2]
Definition: 4xm.c:81
decode_coeffs
static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
Definition: vp9block.c:965
VP9TileData::left_skip_ctx
uint8_t left_skip_ctx[8]
Definition: vp9dec.h:215
mask
int mask
Definition: mediacodecdec_common.c:154
VP9TileData::row
int row
Definition: vp9dec.h:171
PRED_COMPREF
@ PRED_COMPREF
Definition: vp9shared.h:51
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:403
w
uint8_t w
Definition: llviddspenc.c:38
BlockPartition
BlockPartition
Definition: vp9shared.h:35
VP9TileData::x
int x
Definition: vp9dec.h:225
b
#define b
Definition: input.c:41
REF_FRAME_SEGMAP
#define REF_FRAME_SEGMAP
Definition: vp9shared.h:170
AV_WN32A
#define AV_WN32A(p, v)
Definition: intreadwrite.h:534
VP9TileData::left_segpred_ctx
uint8_t left_segpred_ctx[8]
Definition: vp9dec.h:217
vp89_rac.h
VP9TileData::left_comp_ctx
uint8_t left_comp_ctx[8]
Definition: vp9dec.h:219
VP9Filter
Definition: vp9dec.h:78
BS_4x8
@ BS_4x8
Definition: vp9shared.h:94
VP9TileData::b
VP9Block * b
Definition: vp9dec.h:174
VPXRangeCoder
Definition: vpx_rac.h:35
FILTER_SWITCHABLE
@ FILTER_SWITCHABLE
Definition: vp9.h:70
VP9TileData::skip
unsigned skip[3][2]
Definition: vp9dec.h:189
VP9Block
Definition: vp9dec.h:84
decode_mode
static void decode_mode(VP9TileData *td)
Definition: vp9block.c:80
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:56
TX_SWITCHABLE
@ TX_SWITCHABLE
Definition: vp9.h:33
VP9TileData::tmp_uv
uint8_t tmp_uv[2][64 *64 *2]
Definition: vp9dec.h:224
ff_vp9_intramode_tree
const int8_t ff_vp9_intramode_tree[9][2]
Definition: vp9data.c:75
decode_coeffs_b_16bpp
static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:945
VP9TileData::left_filter_ctx
uint8_t left_filter_ctx[8]
Definition: vp9dec.h:221
val
static double val(void *priv, double ch)
Definition: aeval.c:77
mask_edges
static av_always_inline void mask_edges(uint8_t(*mask)[8][4], int ss_h, int ss_v, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9block.c:1142
progressframe.h
ZEROMV
@ ZEROMV
Definition: vp9shared.h:45
VP9TileData::col
int col
Definition: vp9dec.h:171
avassert.h
ff_vp9_default_kf_ymode_probs
const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.c:87
VP9TileData::left_txfm_ctx
uint8_t left_txfm_ctx[8]
Definition: vp9dec.h:216
decode_coeffs_16bpp
static int decode_coeffs_16bpp(VP9TileData *td)
Definition: vp9block.c:1137
VP9TileData::comp_ref
unsigned comp_ref[5][2]
Definition: vp9dec.h:185
s
#define s(width, name)
Definition: cbs_vp9.c:198
ff_vp9_inter_mode_tree
const int8_t ff_vp9_inter_mode_tree[3][2]
Definition: vp9data.c:214
VP9TileData::max_mv
struct VP9TileData::@274 max_mv
VP9Context::s
VP9SharedContext s
Definition: vp9dec.h:97
ff_vp9_scans
const int16_t *const ff_vp9_scans[5][4]
Definition: vp9data.c:600
vp9data.h
VP9TileData::intra
unsigned intra[4][2]
Definition: vp9dec.h:182
VP9TileData::filter
unsigned filter[4][3]
Definition: vp9dec.h:180
ff_progress_frame_await
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_progress_frame_await() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_progress_frame_report() has been called on them. This includes draw_edges(). Porting codecs to frame threading
AV_WN16A
#define AV_WN16A(p, v)
Definition: intreadwrite.h:530
VP9TileData::block_size_idx_x
unsigned int block_size_idx_x
Definition: vp9dec.h:234
BS_8x4
@ BS_8x4
Definition: vp9shared.h:93
STORE_COEF
#define STORE_COEF(c, i, v)
VP9TileData::y
int y
Definition: vp9dec.h:225
if
if(ret)
Definition: filter_design.txt:179
decode_coeffs_b_generic
static av_always_inline int decode_coeffs_b_generic(VPXRangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, int is8bitsperpixel, int bpp, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:806
MERGE_CTX
#define MERGE_CTX(step, rd)
VP9Context
Definition: vp9dec.h:96
VP9TileData::uv_stride
ptrdiff_t uv_stride
Definition: vp9dec.h:173
TX_8X8
@ TX_8X8
Definition: vp9.h:29
VP9TileData::counts
struct VP9TileData::@273 counts
TX_16X16
@ TX_16X16
Definition: vp9.h:30
ff_vp9_filter_lut
enum FilterMode ff_vp9_filter_lut[3]
Definition: vp9data.c:225
DECODE_UV_COEF_LOOP
#define DECODE_UV_COEF_LOOP(step, v)
ff_vp9_segmentation_tree
const int8_t ff_vp9_segmentation_tree[7][2]
Definition: vp9data.c:65
SPLAT_ZERO_YUV
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2)
VP9TileData::eob
unsigned eob[4][2][2][6][6][2]
Definition: vp9dec.h:203
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:294
TxfmMode
TxfmMode
Definition: vp9.h:27
vp9.h
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
f
f
Definition: af_crystalizer.c:122
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
BS_8x8
@ BS_8x8
Definition: vp9shared.h:92
AV_RN64A
#define AV_RN64A(p)
Definition: intreadwrite.h:526
TX_4X4
@ TX_4X4
Definition: vp9.h:28
frame.h
N_BS_SIZES
@ N_BS_SIZES
Definition: vp9shared.h:96
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
VP9TileData::left_intra_ctx
uint8_t left_intra_ctx[8]
Definition: vp9dec.h:218
VP9TileData::uv_mode
unsigned uv_mode[10][10]
Definition: vp9dec.h:179
VP9TileData::left_uv_nnz_ctx
uint8_t left_uv_nnz_ctx[2][16]
Definition: vp9dec.h:213
SPLAT_CTX
#define SPLAT_CTX(var, val, n)
decode_coeffs_b32_16bpp
static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:955
VP9TileData::tmp_y
uint8_t tmp_y[64 *64 *2]
Definition: vp9dec.h:223
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
VP9TileData::mv_mode
unsigned mv_mode[7][4]
Definition: vp9dec.h:181
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
VP9TileData::y_mode
unsigned y_mode[4][10]
Definition: vp9dec.h:178
vpx_rac.h
VP9TileData::block_size_idx_y
unsigned int block_size_idx_y
Definition: vp9dec.h:235
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:56
VP9TileData::block_structure
struct VP9TileData::@275 * block_structure
av_always_inline
#define av_always_inline
Definition: attributes.h:49
VP9TileData::left_mv_ctx
VP9mv left_mv_ctx[16][2]
Definition: vp9dec.h:212
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
vpx_rac_get_prob_branchy
static av_always_inline int vpx_rac_get_prob_branchy(VPXRangeCoder *c, int prob)
Definition: vpx_rac.h:99
BlockSize
BlockSize
Definition: vp9shared.h:82
AV_COPY32
#define AV_COPY32(d, s)
Definition: intreadwrite.h:634
VP9TileData::left_mode_ctx
uint8_t left_mode_ctx[16]
Definition: vp9dec.h:211
decode_coeffs_b32_8bpp
static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:935
VP9TileData::coef
unsigned coef[4][2][2][6][6][3]
Definition: vp9dec.h:202
VP9TileData::c
VPXRangeCoder * c
Definition: vp9dec.h:170
AV_RN32A
#define AV_RN32A(p)
Definition: intreadwrite.h:522
stride
#define stride
Definition: h264pred_template.c:537
VP9TileData::comp
unsigned comp[5][2]
Definition: vp9dec.h:183
vp89_rac_get_tree
static av_always_inline int vp89_rac_get_tree(VPXRangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp89_rac.h:54
VP9TileData::s
const VP9Context * s
Definition: vp9dec.h:168
ret
ret
Definition: filter_design.txt:187
pred
static const float pred[4]
Definition: siprdata.h:259
VP9mvrefPair
Definition: vp9shared.h:60
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
VP9TileData
Definition: vp9dec.h:167
AV_WN64A
#define AV_WN64A(p, v)
Definition: intreadwrite.h:538
vp89_rac_get
static av_always_inline int vp89_rac_get(VPXRangeCoder *c)
Definition: vp89_rac.h:36
VP9Filter::mask
uint8_t mask[2][2][8][4]
Definition: vp9dec.h:81
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
VP9SharedContext::h
VP9BitstreamHeader h
Definition: vp9shared.h:165
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:518
setctx_2d
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
Definition: vp9block.c:34
ff_vp9_decode_block
void ff_vp9_decode_block(VP9TileData *td, int row, int col, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9block.c:1264
NEARESTMV
@ NEARESTMV
Definition: vp9shared.h:43
BlockLevel
BlockLevel
Definition: vp9shared.h:75
VP9TileData::dst
uint8_t * dst[3]
Definition: vp9dec.h:172
vp9dec.h
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:168
TX_32X32
@ TX_32X32
Definition: vp9.h:31
VP9TileData::row7
int row7
Definition: vp9dec.h:171
VP9TileData::col7
int col7
Definition: vp9dec.h:171
VP9BitstreamHeader::bpp
uint8_t bpp
Definition: vp9shared.h:102
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:289
VP9TileData::y_stride
ptrdiff_t y_stride
Definition: vp9dec.h:173
h
h
Definition: vp9dsp_template.c:2070
VP9TileData::tx16p
unsigned tx16p[2][3]
Definition: vp9dec.h:187
decode_coeffs_8bpp
static int decode_coeffs_8bpp(VP9TileData *td)
Definition: vp9block.c:1132
VP9TileData::nb_block_structure
unsigned int nb_block_structure
Definition: vp9dec.h:237
VP9TileData::left_ref_ctx
uint8_t left_ref_ctx[8]
Definition: vp9dec.h:220
VP9TileData::tile_col_start
unsigned tile_col_start
Definition: vp9dec.h:175
vpx_rac_get_prob
#define vpx_rac_get_prob
Definition: vpx_rac.h:82
VP9Filter::level
uint8_t level[8 *8]
Definition: vp9dec.h:79
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
ff_vp9_scans_nb
const int16_t(*const [5][4] ff_vp9_scans_nb)[2]
Definition: vp9data.c:1157
VP9TileData::tx8p
unsigned tx8p[2][2]
Definition: vp9dec.h:188
VP9TileData::min_mv
struct VP9TileData::@274 min_mv
VP9TileData::tx32p
unsigned tx32p[2][4]
Definition: vp9dec.h:186
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:660