24 #include "config_components.h"
46 #define VP9_SYNCCODE 0x498342
65 for (
i = 0;
i < n;
i++)
103 f->segmentation_map =
NULL;
104 f->hwaccel_picture_private =
NULL;
116 sz = 64 *
s->sb_cols *
s->sb_rows;
117 if (sz !=
s->frame_extradata_pool_size) {
120 if (!
s->frame_extradata_pool) {
121 s->frame_extradata_pool_size = 0;
124 s->frame_extradata_pool_size = sz;
130 memset(
f->extradata->data, 0,
f->extradata->size);
132 f->segmentation_map =
f->extradata->data;
140 if (!
f->hwaccel_priv_buf)
142 f->hwaccel_picture_private =
f->hwaccel_priv_buf->data;
169 if (
src->hwaccel_picture_private) {
185 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
186 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
187 CONFIG_VP9_NVDEC_HWACCEL + \
188 CONFIG_VP9_VAAPI_HWACCEL + \
189 CONFIG_VP9_VDPAU_HWACCEL + \
190 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
194 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
199 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
203 switch (
s->pix_fmt) {
206 #if CONFIG_VP9_DXVA2_HWACCEL
209 #if CONFIG_VP9_D3D11VA_HWACCEL
213 #if CONFIG_VP9_NVDEC_HWACCEL
216 #if CONFIG_VP9_VAAPI_HWACCEL
219 #if CONFIG_VP9_VDPAU_HWACCEL
222 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
227 #if CONFIG_VP9_NVDEC_HWACCEL
230 #if CONFIG_VP9_VAAPI_HWACCEL
233 #if CONFIG_VP9_VDPAU_HWACCEL
240 #if CONFIG_VP9_VAAPI_HWACCEL
246 *fmtp++ =
s->pix_fmt;
254 s->gf_fmt =
s->pix_fmt;
262 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
265 s->last_fmt =
s->pix_fmt;
266 s->sb_cols = (
w + 63) >> 6;
267 s->sb_rows = (
h + 63) >> 6;
268 s->cols = (
w + 7) >> 3;
269 s->rows = (
h + 7) >> 3;
272 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
276 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
277 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
280 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
281 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
282 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
283 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
284 assign(
s->above_mode_ctx, uint8_t *, 16);
286 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
287 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
288 assign(
s->above_partition_ctx, uint8_t *, 8);
289 assign(
s->above_skip_ctx, uint8_t *, 8);
290 assign(
s->above_txfm_ctx, uint8_t *, 8);
291 assign(
s->above_segpred_ctx, uint8_t *, 8);
292 assign(
s->above_intra_ctx, uint8_t *, 8);
293 assign(
s->above_comp_ctx, uint8_t *, 8);
294 assign(
s->above_ref_ctx, uint8_t *, 8);
295 assign(
s->above_filter_ctx, uint8_t *, 8);
300 for (
i = 0;
i <
s->active_tile_cols;
i++)
304 if (
s->s.h.bpp !=
s->last_bpp) {
307 s->last_bpp =
s->s.h.bpp;
317 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
320 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
324 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
325 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
327 int sbs =
s->sb_cols *
s->sb_rows;
330 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
331 16 * 16 + 2 * chroma_eobs) * sbs);
332 if (!
td->b_base || !
td->block_base)
334 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
335 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
336 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
337 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
338 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
342 if (!
td->block_structure)
346 for (
i = 1;
i <
s->active_tile_cols;
i++)
349 for (
i = 0;
i <
s->active_tile_cols;
i++) {
351 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
352 16 * 16 + 2 * chroma_eobs);
353 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
355 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
356 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
357 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
358 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
359 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
363 if (!
s->td[
i].block_structure)
368 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
385 return m - ((v + 1) >> 1);
392 static const uint8_t inv_map_table[255] = {
393 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
394 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
395 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
396 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
397 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
398 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
399 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
400 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
401 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
402 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
403 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
404 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
405 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
406 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
407 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
408 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
409 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
410 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
458 s->s.h.bpp = 8 +
bits * 2;
459 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
465 s->ss_h =
s->ss_v = 0;
479 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
491 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
502 s->ss_h =
s->ss_v = 1;
503 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
514 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
516 const uint8_t *data2;
540 s->last_keyframe =
s->s.h.keyframe;
543 last_invisible =
s->s.h.invisible;
546 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
548 if (
s->s.h.keyframe) {
556 s->s.h.refreshrefmask = 0xff;
562 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
563 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
564 if (
s->s.h.intraonly) {
573 s->ss_h =
s->ss_v = 1;
576 s->bytesperpixel = 1;
589 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
591 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
593 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
594 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
595 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
596 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
601 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
602 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
604 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
605 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
607 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
608 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
616 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
623 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
624 s->s.h.signbias[0] !=
s->s.h.signbias[2];
625 if (
s->s.h.allowcompinter) {
626 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
627 s->s.h.fixcompref = 2;
628 s->s.h.varcompref[0] = 0;
629 s->s.h.varcompref[1] = 1;
630 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
631 s->s.h.fixcompref = 1;
632 s->s.h.varcompref[0] = 0;
633 s->s.h.varcompref[1] = 2;
635 s->s.h.fixcompref = 0;
636 s->s.h.varcompref[0] = 1;
637 s->s.h.varcompref[1] = 2;
642 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
643 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
645 if (
s->s.h.keyframe ||
s->s.h.intraonly)
646 s->s.h.framectxid = 0;
649 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
651 s->s.h.lf_delta.ref[0] = 1;
652 s->s.h.lf_delta.ref[1] = 0;
653 s->s.h.lf_delta.ref[2] = -1;
654 s->s.h.lf_delta.ref[3] = -1;
655 s->s.h.lf_delta.mode[0] = 0;
656 s->s.h.lf_delta.mode[1] = 0;
657 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
663 if (
s->s.h.filter.sharpness != sharp) {
664 for (
i = 1;
i <= 63;
i++) {
668 limit >>= (sharp + 3) >> 2;
673 s->filter_lut.lim_lut[
i] =
limit;
674 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
677 s->s.h.filter.sharpness = sharp;
678 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
679 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
680 for (
i = 0;
i < 4;
i++)
683 for (
i = 0;
i < 2;
i++)
694 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
695 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
700 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
701 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
702 for (
i = 0;
i < 7;
i++)
705 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
706 for (
i = 0;
i < 3;
i++)
712 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
713 for (
i = 0;
i < 8;
i++) {
714 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
716 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
718 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
719 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
720 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
726 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
727 int qyac, qydc, quvac, quvdc, lflvl, sh;
729 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
730 if (
s->s.h.segmentation.absolute_vals)
735 qyac =
s->s.h.yac_qi;
747 sh =
s->s.h.filter.level >= 32;
748 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
749 if (
s->s.h.segmentation.absolute_vals)
752 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
754 lflvl =
s->s.h.filter.level;
756 if (
s->s.h.lf_delta.enabled) {
757 s->s.h.segmentation.feat[
i].lflvl[0][0] =
758 s->s.h.segmentation.feat[
i].lflvl[0][1] =
760 for (j = 1; j < 4; j++) {
761 s->s.h.segmentation.feat[
i].lflvl[j][0] =
763 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
764 s->s.h.segmentation.feat[
i].lflvl[j][1] =
766 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
769 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
770 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
780 for (
s->s.h.tiling.log2_tile_cols = 0;
781 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
782 s->s.h.tiling.log2_tile_cols++) ;
783 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
785 while (
max >
s->s.h.tiling.log2_tile_cols) {
787 s->s.h.tiling.log2_tile_cols++;
792 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
793 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
798 for (
i = 0;
i <
s->active_tile_cols;
i++)
803 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
805 s->s.h.tiling.tile_cols : 1;
810 n_range_coders =
s->s.h.tiling.tile_cols;
817 for (
i = 0;
i <
s->active_tile_cols;
i++) {
820 rc += n_range_coders;
825 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
826 int valid_ref_frame = 0;
827 for (
i = 0;
i < 3;
i++) {
829 int refw =
ref->width, refh =
ref->height;
833 "Ref pixfmt (%s) did not match current frame (%s)",
837 }
else if (refw ==
w && refh ==
h) {
838 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
842 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
844 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
849 s->mvscale[
i][0] = (refw << 14) /
w;
850 s->mvscale[
i][1] = (refh << 14) /
h;
851 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
852 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
856 if (!valid_ref_frame) {
857 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
862 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
863 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
873 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
880 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
884 if (size2 >
size - (data2 -
data)) {
897 for (
i = 0;
i <
s->active_tile_cols;
i++) {
898 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
899 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
900 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
902 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
904 s->td[
i].nb_block_structure = 0;
910 s->prob.p =
s->prob_ctx[
c].p;
913 if (
s->s.h.lossless) {
917 if (
s->s.h.txfmmode == 3)
921 for (
i = 0;
i < 2;
i++)
924 for (
i = 0;
i < 2;
i++)
925 for (j = 0; j < 2; j++)
927 s->prob.p.tx16p[
i][j] =
929 for (
i = 0;
i < 2;
i++)
930 for (j = 0; j < 3; j++)
932 s->prob.p.tx32p[
i][j] =
938 for (
i = 0;
i < 4;
i++) {
939 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
941 for (j = 0; j < 2; j++)
942 for (k = 0; k < 2; k++)
943 for (l = 0; l < 6; l++)
944 for (m = 0; m < 6; m++) {
945 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
946 uint8_t *
r =
ref[j][k][l][m];
947 if (m >= 3 && l == 0)
949 for (n = 0; n < 3; n++) {
958 for (j = 0; j < 2; j++)
959 for (k = 0; k < 2; k++)
960 for (l = 0; l < 6; l++)
961 for (m = 0; m < 6; m++) {
962 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
963 uint8_t *
r =
ref[j][k][l][m];
970 if (
s->s.h.txfmmode ==
i)
975 for (
i = 0;
i < 3;
i++)
978 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
979 for (
i = 0;
i < 7;
i++)
980 for (j = 0; j < 3; j++)
982 s->prob.p.mv_mode[
i][j] =
986 for (
i = 0;
i < 4;
i++)
987 for (j = 0; j < 2; j++)
989 s->prob.p.filter[
i][j] =
992 for (
i = 0;
i < 4;
i++)
996 if (
s->s.h.allowcompinter) {
998 if (
s->s.h.comppredmode)
1001 for (
i = 0;
i < 5;
i++)
1010 for (
i = 0;
i < 5;
i++) {
1012 s->prob.p.single_ref[
i][0] =
1015 s->prob.p.single_ref[
i][1] =
1021 for (
i = 0;
i < 5;
i++)
1023 s->prob.p.comp_ref[
i] =
1027 for (
i = 0;
i < 4;
i++)
1028 for (j = 0; j < 9; j++)
1030 s->prob.p.y_mode[
i][j] =
1033 for (
i = 0;
i < 4;
i++)
1034 for (j = 0; j < 4; j++)
1035 for (k = 0; k < 3; k++)
1037 s->prob.p.partition[3 -
i][j][k] =
1039 s->prob.p.partition[3 -
i][j][k]);
1042 for (
i = 0;
i < 3;
i++)
1046 for (
i = 0;
i < 2;
i++) {
1048 s->prob.p.mv_comp[
i].sign =
1051 for (j = 0; j < 10; j++)
1053 s->prob.p.mv_comp[
i].classes[j] =
1057 s->prob.p.mv_comp[
i].class0 =
1060 for (j = 0; j < 10; j++)
1062 s->prob.p.mv_comp[
i].bits[j] =
1066 for (
i = 0;
i < 2;
i++) {
1067 for (j = 0; j < 2; j++)
1068 for (k = 0; k < 3; k++)
1070 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1073 for (j = 0; j < 3; j++)
1075 s->prob.p.mv_comp[
i].fp[j] =
1079 if (
s->s.h.highprecisionmvs) {
1080 for (
i = 0;
i < 2;
i++) {
1082 s->prob.p.mv_comp[
i].class0_hp =
1086 s->prob.p.mv_comp[
i].hp =
1092 return (data2 -
data) + size2;
1096 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1099 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1100 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1102 s->prob.p.partition[bl][
c];
1104 ptrdiff_t hbs = 4 >> bl;
1106 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1107 int bytesperpixel =
s->bytesperpixel;
1112 }
else if (col + hbs < s->cols) {
1113 if (row + hbs < s->rows) {
1121 yoff += hbs * 8 * y_stride;
1122 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1127 yoff += hbs * 8 * bytesperpixel;
1128 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1132 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1134 yoff + 8 * hbs * bytesperpixel,
1135 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1136 yoff += hbs * 8 * y_stride;
1137 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1138 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1140 yoff + 8 * hbs * bytesperpixel,
1141 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1148 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1150 yoff + 8 * hbs * bytesperpixel,
1151 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1156 }
else if (row + hbs < s->rows) {
1159 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1160 yoff += hbs * 8 * y_stride;
1161 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1162 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1169 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1171 td->counts.partition[bl][
c][bp]++;
1175 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1179 ptrdiff_t hbs = 4 >> bl;
1181 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1182 int bytesperpixel =
s->bytesperpixel;
1187 }
else if (
td->b->bl == bl) {
1190 yoff += hbs * 8 * y_stride;
1191 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1193 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1194 yoff += hbs * 8 * bytesperpixel;
1195 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1200 if (col + hbs < s->cols) {
1201 if (row + hbs < s->rows) {
1202 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1203 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1204 yoff += hbs * 8 * y_stride;
1205 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1208 yoff + 8 * hbs * bytesperpixel,
1209 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1211 yoff += hbs * 8 * bytesperpixel;
1212 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1215 }
else if (row + hbs < s->rows) {
1216 yoff += hbs * 8 * y_stride;
1217 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1225 int sb_start = ( idx * n) >> log2_n;
1226 int sb_end = ((idx + 1) * n) >> log2_n;
1227 *start =
FFMIN(sb_start, n) << 3;
1228 *end =
FFMIN(sb_end, n) << 3;
1236 for (
i = 0;
i <
s->active_tile_cols;
i++)
1245 for (
i = 0;
i < 3;
i++) {
1250 for (
i = 0;
i < 8;
i++) {
1271 int row, col, tile_row, tile_col,
ret;
1273 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1275 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1278 ls_y =
f->linesize[0];
1279 ls_uv =
f->linesize[1];
1280 bytesperpixel =
s->bytesperpixel;
1283 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1285 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1287 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1290 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1291 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1298 if (tile_size >
size)
1309 for (row = tile_row_start; row < tile_row_end;
1310 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1312 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1314 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1316 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1317 td->tile_col_start = tile_col_start;
1319 memset(
td->left_partition_ctx, 0, 8);
1320 memset(
td->left_skip_ctx, 0, 8);
1321 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1326 memset(
td->left_y_nnz_ctx, 0, 16);
1327 memset(
td->left_uv_nnz_ctx, 0, 32);
1328 memset(
td->left_segpred_ctx, 0, 8);
1330 td->c = &
td->c_b[tile_col];
1333 for (col = tile_col_start;
1335 col += 8, yoff2 += 64 * bytesperpixel,
1336 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1340 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1361 if (row + 8 <
s->rows) {
1362 memcpy(
s->intra_pred_data[0],
1363 f->data[0] + yoff + 63 * ls_y,
1364 8 *
s->cols * bytesperpixel);
1365 memcpy(
s->intra_pred_data[1],
1366 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1367 8 *
s->cols * bytesperpixel >>
s->ss_h);
1368 memcpy(
s->intra_pred_data[2],
1369 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1370 8 *
s->cols * bytesperpixel >>
s->ss_h);
1374 if (
s->s.h.filter.level) {
1377 lflvl_ptr =
s->lflvl;
1378 for (col = 0; col <
s->cols;
1379 col += 8, yoff2 += 64 * bytesperpixel,
1380 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1397 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1402 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1403 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1404 unsigned tile_cols_len;
1405 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1410 ls_y =
f->linesize[0];
1411 ls_uv =
f->linesize[1];
1414 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1415 td->tile_col_start = tile_col_start;
1416 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1417 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1418 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1420 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1422 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1424 td->c = &
td->c_b[tile_row];
1425 for (row = tile_row_start; row < tile_row_end;
1426 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1427 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1428 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1430 memset(
td->left_partition_ctx, 0, 8);
1431 memset(
td->left_skip_ctx, 0, 8);
1432 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1437 memset(
td->left_y_nnz_ctx, 0, 16);
1438 memset(
td->left_uv_nnz_ctx, 0, 32);
1439 memset(
td->left_segpred_ctx, 0, 8);
1441 for (col = tile_col_start;
1443 col += 8, yoff2 += 64 * bytesperpixel,
1444 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1447 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1454 tile_cols_len = tile_col_end - tile_col_start;
1455 if (row + 8 <
s->rows) {
1456 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1457 f->data[0] + yoff + 63 * ls_y,
1458 8 * tile_cols_len * bytesperpixel);
1459 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1460 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1461 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1462 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1463 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1464 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1467 vp9_report_tile_progress(
s, row >> 3, 1);
1477 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1479 int bytesperpixel =
s->bytesperpixel, col,
i;
1483 ls_y =
f->linesize[0];
1484 ls_uv =
f->linesize[1];
1486 for (
i = 0;
i <
s->sb_rows;
i++) {
1487 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1489 if (
s->s.h.filter.level) {
1490 yoff = (ls_y * 64)*
i;
1491 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1492 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1493 for (col = 0; col <
s->cols;
1494 col += 8, yoff += 64 * bytesperpixel,
1495 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1508 unsigned int tile, nb_blocks = 0;
1510 if (
s->s.h.segmentation.enabled) {
1511 for (tile = 0; tile <
s->active_tile_cols; tile++)
1512 nb_blocks +=
s->td[tile].nb_block_structure;
1520 par->
qp =
s->s.h.yac_qi;
1521 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1522 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1523 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1524 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1525 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1528 unsigned int block = 0;
1529 unsigned int tile, block_tile;
1531 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1534 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1536 unsigned int row =
td->block_structure[block_tile].row;
1537 unsigned int col =
td->block_structure[block_tile].col;
1538 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1542 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1543 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1545 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1546 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1547 if (
s->s.h.segmentation.absolute_vals)
1548 b->delta_qp -= par->
qp;
1565 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1570 }
else if (
ret == 0) {
1571 if (!
s->s.refs[
ref].f->buf[0]) {
1579 for (
i = 0;
i < 8;
i++) {
1580 if (
s->next_refs[
i].f->buf[0])
1582 if (
s->s.refs[
i].f->buf[0] &&
1592 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1595 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1601 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1609 f->key_frame =
s->s.h.keyframe;
1619 for (
i = 0;
i < 8;
i++) {
1620 if (
s->next_refs[
i].f->buf[0])
1622 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1624 }
else if (
s->s.refs[
i].f->buf[0]) {
1645 memset(
s->above_partition_ctx, 0,
s->cols);
1646 memset(
s->above_skip_ctx, 0,
s->cols);
1647 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1648 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1652 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1653 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1654 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1655 memset(
s->above_segpred_ctx, 0,
s->cols);
1660 "Failed to allocate block buffers\n");
1663 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1666 for (
i = 0;
i < 4;
i++) {
1667 for (j = 0; j < 2; j++)
1668 for (k = 0; k < 2; k++)
1669 for (l = 0; l < 6; l++)
1670 for (m = 0; m < 6; m++)
1671 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1672 s->prob.coef[
i][j][k][l][m], 3);
1673 if (
s->s.h.txfmmode ==
i)
1676 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1678 }
else if (!
s->s.h.refreshctx) {
1684 for (
i = 0;
i <
s->sb_rows;
i++)
1690 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1691 s->td[
i].b =
s->td[
i].b_base;
1692 s->td[
i].block =
s->td[
i].block_base;
1693 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1694 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1695 s->td[
i].eob =
s->td[
i].eob_base;
1696 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1697 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1698 s->td[
i].error_info = 0;
1703 int tile_row, tile_col;
1707 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1708 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1711 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1712 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1719 if (tile_size >
size)
1744 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1745 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1746 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1748 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1752 }
while (
s->pass++ == 1);
1755 if (
s->td->error_info < 0) {
1757 s->td->error_info = 0;
1768 for (
i = 0;
i < 8;
i++) {
1769 if (
s->s.refs[
i].f->buf[0])
1771 if (
s->next_refs[
i].f->buf[0] &&
1776 if (!
s->s.h.invisible) {
1790 for (
i = 0;
i < 3;
i++)
1792 for (
i = 0;
i < 8;
i++)
1802 s->s.h.filter.sharpness = -1;
1812 for (
int i = 0;
i < 3;
i++) {
1814 if (!
s->s.frames[
i].tf.f)
1817 for (
int i = 0;
i < 8;
i++) {
1820 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f)
1832 for (
i = 0;
i < 3;
i++) {
1833 if (
s->s.frames[
i].tf.f->buf[0])
1835 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1840 for (
i = 0;
i < 8;
i++) {
1841 if (
s->s.refs[
i].f->buf[0])
1843 if (ssrc->next_refs[
i].f->buf[0]) {
1849 s->s.h.invisible = ssrc->s.h.invisible;
1850 s->s.h.keyframe = ssrc->s.h.keyframe;
1851 s->s.h.intraonly = ssrc->s.h.intraonly;
1852 s->ss_v = ssrc->ss_v;
1853 s->ss_h = ssrc->ss_h;
1854 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1855 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1856 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1857 s->bytesperpixel = ssrc->bytesperpixel;
1858 s->gf_fmt = ssrc->gf_fmt;
1861 s->s.h.bpp = ssrc->s.h.bpp;
1862 s->bpp_index = ssrc->bpp_index;
1863 s->pix_fmt = ssrc->pix_fmt;
1864 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1865 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1866 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1867 sizeof(
s->s.h.segmentation.feat));
1889 .bsfs =
"vp9_superframe_split",
1891 #if CONFIG_VP9_DXVA2_HWACCEL
1894 #if CONFIG_VP9_D3D11VA_HWACCEL
1897 #if CONFIG_VP9_D3D11VA2_HWACCEL
1900 #if CONFIG_VP9_NVDEC_HWACCEL
1903 #if CONFIG_VP9_VAAPI_HWACCEL
1906 #if CONFIG_VP9_VDPAU_HWACCEL
1909 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL