41 #define VP9_SYNCCODE 0x498342
60 for (
i = 0;
i < n;
i++)
98 f->segmentation_map =
NULL;
99 f->hwaccel_picture_private =
NULL;
111 sz = 64 *
s->sb_cols *
s->sb_rows;
112 if (sz !=
s->frame_extradata_pool_size) {
115 if (!
s->frame_extradata_pool) {
116 s->frame_extradata_pool_size = 0;
119 s->frame_extradata_pool_size = sz;
125 memset(
f->extradata->data, 0,
f->extradata->size);
127 f->segmentation_map =
f->extradata->data;
135 if (!
f->hwaccel_priv_buf)
137 f->hwaccel_picture_private =
f->hwaccel_priv_buf->data;
164 if (
src->hwaccel_picture_private) {
180 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
181 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
182 CONFIG_VP9_NVDEC_HWACCEL + \
183 CONFIG_VP9_VAAPI_HWACCEL + \
184 CONFIG_VP9_VDPAU_HWACCEL + \
185 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
189 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
194 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
198 switch (
s->pix_fmt) {
201 #if CONFIG_VP9_DXVA2_HWACCEL
204 #if CONFIG_VP9_D3D11VA_HWACCEL
208 #if CONFIG_VP9_NVDEC_HWACCEL
211 #if CONFIG_VP9_VAAPI_HWACCEL
214 #if CONFIG_VP9_VDPAU_HWACCEL
217 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
222 #if CONFIG_VP9_NVDEC_HWACCEL
225 #if CONFIG_VP9_VAAPI_HWACCEL
228 #if CONFIG_VP9_VDPAU_HWACCEL
234 *fmtp++ =
s->pix_fmt;
242 s->gf_fmt =
s->pix_fmt;
250 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
253 s->last_fmt =
s->pix_fmt;
254 s->sb_cols = (
w + 63) >> 6;
255 s->sb_rows = (
h + 63) >> 6;
256 s->cols = (
w + 7) >> 3;
257 s->rows = (
h + 7) >> 3;
260 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
264 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
265 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
268 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
269 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
270 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
271 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
272 assign(
s->above_mode_ctx, uint8_t *, 16);
274 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
275 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
276 assign(
s->above_partition_ctx, uint8_t *, 8);
277 assign(
s->above_skip_ctx, uint8_t *, 8);
278 assign(
s->above_txfm_ctx, uint8_t *, 8);
279 assign(
s->above_segpred_ctx, uint8_t *, 8);
280 assign(
s->above_intra_ctx, uint8_t *, 8);
281 assign(
s->above_comp_ctx, uint8_t *, 8);
282 assign(
s->above_ref_ctx, uint8_t *, 8);
283 assign(
s->above_filter_ctx, uint8_t *, 8);
288 for (
i = 0;
i <
s->active_tile_cols;
i++)
292 if (
s->s.h.bpp !=
s->last_bpp) {
295 s->last_bpp =
s->s.h.bpp;
305 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
308 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
312 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
313 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
315 int sbs =
s->sb_cols *
s->sb_rows;
318 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
319 16 * 16 + 2 * chroma_eobs) * sbs);
320 if (!
td->b_base || !
td->block_base)
322 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
323 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
324 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
325 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
326 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
330 if (!
td->block_structure)
334 for (
i = 1;
i <
s->active_tile_cols;
i++)
337 for (
i = 0;
i <
s->active_tile_cols;
i++) {
339 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
340 16 * 16 + 2 * chroma_eobs);
341 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
343 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
344 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
345 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
346 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
347 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
351 if (!
s->td[
i].block_structure)
356 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
373 return m - ((v + 1) >> 1);
380 static const uint8_t inv_map_table[255] = {
381 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
382 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
383 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
384 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
385 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
386 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
387 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
388 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
389 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
390 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
391 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
392 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
393 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
394 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
395 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
396 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
397 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
398 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
446 s->s.h.bpp = 8 +
bits * 2;
447 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
453 s->ss_h =
s->ss_v = 0;
467 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
479 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
490 s->ss_h =
s->ss_v = 1;
491 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
502 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
504 const uint8_t *data2;
528 s->last_keyframe =
s->s.h.keyframe;
531 last_invisible =
s->s.h.invisible;
534 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
536 if (
s->s.h.keyframe) {
544 s->s.h.refreshrefmask = 0xff;
550 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
551 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
552 if (
s->s.h.intraonly) {
561 s->ss_h =
s->ss_v = 1;
564 s->bytesperpixel = 1;
577 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
579 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
581 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
582 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
583 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
584 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
589 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
590 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
592 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
593 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
595 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
596 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
604 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
611 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
612 s->s.h.signbias[0] !=
s->s.h.signbias[2];
613 if (
s->s.h.allowcompinter) {
614 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
615 s->s.h.fixcompref = 2;
616 s->s.h.varcompref[0] = 0;
617 s->s.h.varcompref[1] = 1;
618 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
619 s->s.h.fixcompref = 1;
620 s->s.h.varcompref[0] = 0;
621 s->s.h.varcompref[1] = 2;
623 s->s.h.fixcompref = 0;
624 s->s.h.varcompref[0] = 1;
625 s->s.h.varcompref[1] = 2;
630 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
631 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
633 if (
s->s.h.keyframe ||
s->s.h.intraonly)
634 s->s.h.framectxid = 0;
637 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
639 s->s.h.lf_delta.ref[0] = 1;
640 s->s.h.lf_delta.ref[1] = 0;
641 s->s.h.lf_delta.ref[2] = -1;
642 s->s.h.lf_delta.ref[3] = -1;
643 s->s.h.lf_delta.mode[0] = 0;
644 s->s.h.lf_delta.mode[1] = 0;
645 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
651 if (
s->s.h.filter.sharpness != sharp) {
652 for (
i = 1;
i <= 63;
i++) {
656 limit >>= (sharp + 3) >> 2;
661 s->filter_lut.lim_lut[
i] =
limit;
662 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
665 s->s.h.filter.sharpness = sharp;
666 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
667 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
668 for (
i = 0;
i < 4;
i++)
671 for (
i = 0;
i < 2;
i++)
682 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
683 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
688 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
689 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
690 for (
i = 0;
i < 7;
i++)
693 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
694 for (
i = 0;
i < 3;
i++)
700 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
701 for (
i = 0;
i < 8;
i++) {
702 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
704 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
706 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
707 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
708 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
714 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
715 int qyac, qydc, quvac, quvdc, lflvl, sh;
717 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
718 if (
s->s.h.segmentation.absolute_vals)
723 qyac =
s->s.h.yac_qi;
735 sh =
s->s.h.filter.level >= 32;
736 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
737 if (
s->s.h.segmentation.absolute_vals)
740 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
742 lflvl =
s->s.h.filter.level;
744 if (
s->s.h.lf_delta.enabled) {
745 s->s.h.segmentation.feat[
i].lflvl[0][0] =
746 s->s.h.segmentation.feat[
i].lflvl[0][1] =
748 for (j = 1; j < 4; j++) {
749 s->s.h.segmentation.feat[
i].lflvl[j][0] =
751 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
752 s->s.h.segmentation.feat[
i].lflvl[j][1] =
754 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
757 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
758 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
768 for (
s->s.h.tiling.log2_tile_cols = 0;
769 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
770 s->s.h.tiling.log2_tile_cols++) ;
771 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
773 while (
max >
s->s.h.tiling.log2_tile_cols) {
775 s->s.h.tiling.log2_tile_cols++;
780 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
781 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
786 for (
i = 0;
i <
s->active_tile_cols;
i++)
791 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
793 s->s.h.tiling.tile_cols : 1;
798 n_range_coders =
s->s.h.tiling.tile_cols;
805 for (
i = 0;
i <
s->active_tile_cols;
i++) {
808 rc += n_range_coders;
813 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
814 int valid_ref_frame = 0;
815 for (
i = 0;
i < 3;
i++) {
817 int refw =
ref->width, refh =
ref->height;
821 "Ref pixfmt (%s) did not match current frame (%s)",
825 }
else if (refw ==
w && refh ==
h) {
826 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
830 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
832 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
837 s->mvscale[
i][0] = (refw << 14) /
w;
838 s->mvscale[
i][1] = (refh << 14) /
h;
839 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
840 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
844 if (!valid_ref_frame) {
845 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
850 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
851 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
861 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
868 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
872 if (size2 >
size - (data2 -
data)) {
885 for (
i = 0;
i <
s->active_tile_cols;
i++) {
886 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
887 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
888 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
890 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
892 s->td[
i].nb_block_structure = 0;
898 s->prob.p =
s->prob_ctx[
c].p;
901 if (
s->s.h.lossless) {
905 if (
s->s.h.txfmmode == 3)
909 for (
i = 0;
i < 2;
i++)
912 for (
i = 0;
i < 2;
i++)
913 for (j = 0; j < 2; j++)
915 s->prob.p.tx16p[
i][j] =
917 for (
i = 0;
i < 2;
i++)
918 for (j = 0; j < 3; j++)
920 s->prob.p.tx32p[
i][j] =
926 for (
i = 0;
i < 4;
i++) {
927 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
929 for (j = 0; j < 2; j++)
930 for (k = 0; k < 2; k++)
931 for (l = 0; l < 6; l++)
932 for (m = 0; m < 6; m++) {
933 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
934 uint8_t *
r =
ref[j][k][l][m];
935 if (m >= 3 && l == 0)
937 for (n = 0; n < 3; n++) {
946 for (j = 0; j < 2; j++)
947 for (k = 0; k < 2; k++)
948 for (l = 0; l < 6; l++)
949 for (m = 0; m < 6; m++) {
950 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
951 uint8_t *
r =
ref[j][k][l][m];
958 if (
s->s.h.txfmmode ==
i)
963 for (
i = 0;
i < 3;
i++)
966 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
967 for (
i = 0;
i < 7;
i++)
968 for (j = 0; j < 3; j++)
970 s->prob.p.mv_mode[
i][j] =
974 for (
i = 0;
i < 4;
i++)
975 for (j = 0; j < 2; j++)
977 s->prob.p.filter[
i][j] =
980 for (
i = 0;
i < 4;
i++)
984 if (
s->s.h.allowcompinter) {
986 if (
s->s.h.comppredmode)
989 for (
i = 0;
i < 5;
i++)
998 for (
i = 0;
i < 5;
i++) {
1000 s->prob.p.single_ref[
i][0] =
1003 s->prob.p.single_ref[
i][1] =
1009 for (
i = 0;
i < 5;
i++)
1011 s->prob.p.comp_ref[
i] =
1015 for (
i = 0;
i < 4;
i++)
1016 for (j = 0; j < 9; j++)
1018 s->prob.p.y_mode[
i][j] =
1021 for (
i = 0;
i < 4;
i++)
1022 for (j = 0; j < 4; j++)
1023 for (k = 0; k < 3; k++)
1025 s->prob.p.partition[3 -
i][j][k] =
1027 s->prob.p.partition[3 -
i][j][k]);
1030 for (
i = 0;
i < 3;
i++)
1034 for (
i = 0;
i < 2;
i++) {
1036 s->prob.p.mv_comp[
i].sign =
1039 for (j = 0; j < 10; j++)
1041 s->prob.p.mv_comp[
i].classes[j] =
1045 s->prob.p.mv_comp[
i].class0 =
1048 for (j = 0; j < 10; j++)
1050 s->prob.p.mv_comp[
i].bits[j] =
1054 for (
i = 0;
i < 2;
i++) {
1055 for (j = 0; j < 2; j++)
1056 for (k = 0; k < 3; k++)
1058 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1061 for (j = 0; j < 3; j++)
1063 s->prob.p.mv_comp[
i].fp[j] =
1067 if (
s->s.h.highprecisionmvs) {
1068 for (
i = 0;
i < 2;
i++) {
1070 s->prob.p.mv_comp[
i].class0_hp =
1074 s->prob.p.mv_comp[
i].hp =
1080 return (data2 -
data) + size2;
1084 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1087 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1088 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1090 s->prob.p.partition[bl][
c];
1092 ptrdiff_t hbs = 4 >> bl;
1094 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1095 int bytesperpixel =
s->bytesperpixel;
1100 }
else if (col + hbs < s->cols) {
1101 if (row + hbs < s->rows) {
1109 yoff += hbs * 8 * y_stride;
1110 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1115 yoff += hbs * 8 * bytesperpixel;
1116 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1120 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1122 yoff + 8 * hbs * bytesperpixel,
1123 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1124 yoff += hbs * 8 * y_stride;
1125 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1126 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1128 yoff + 8 * hbs * bytesperpixel,
1129 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1136 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1138 yoff + 8 * hbs * bytesperpixel,
1139 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1144 }
else if (row + hbs < s->rows) {
1147 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1148 yoff += hbs * 8 * y_stride;
1149 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1150 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1157 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1159 td->counts.partition[bl][
c][bp]++;
1163 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1167 ptrdiff_t hbs = 4 >> bl;
1169 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1170 int bytesperpixel =
s->bytesperpixel;
1175 }
else if (
td->b->bl == bl) {
1178 yoff += hbs * 8 * y_stride;
1179 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1181 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1182 yoff += hbs * 8 * bytesperpixel;
1183 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1188 if (col + hbs < s->cols) {
1189 if (row + hbs < s->rows) {
1190 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1191 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1192 yoff += hbs * 8 * y_stride;
1193 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1196 yoff + 8 * hbs * bytesperpixel,
1197 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1199 yoff += hbs * 8 * bytesperpixel;
1200 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1203 }
else if (row + hbs < s->rows) {
1204 yoff += hbs * 8 * y_stride;
1205 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1213 int sb_start = ( idx * n) >> log2_n;
1214 int sb_end = ((idx + 1) * n) >> log2_n;
1215 *start =
FFMIN(sb_start, n) << 3;
1216 *end =
FFMIN(sb_end, n) << 3;
1224 for (
i = 0;
i <
s->active_tile_cols;
i++)
1233 for (
i = 0;
i < 3;
i++) {
1238 for (
i = 0;
i < 8;
i++) {
1259 int row, col, tile_row, tile_col,
ret;
1261 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1263 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1266 ls_y =
f->linesize[0];
1267 ls_uv =
f->linesize[1];
1268 bytesperpixel =
s->bytesperpixel;
1271 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1273 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1275 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1278 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1279 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1286 if (tile_size >
size) {
1301 for (row = tile_row_start; row < tile_row_end;
1302 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1304 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1306 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1308 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1309 td->tile_col_start = tile_col_start;
1311 memset(
td->left_partition_ctx, 0, 8);
1312 memset(
td->left_skip_ctx, 0, 8);
1313 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1318 memset(
td->left_y_nnz_ctx, 0, 16);
1319 memset(
td->left_uv_nnz_ctx, 0, 32);
1320 memset(
td->left_segpred_ctx, 0, 8);
1322 td->c = &
td->c_b[tile_col];
1325 for (col = tile_col_start;
1327 col += 8, yoff2 += 64 * bytesperpixel,
1328 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1332 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1353 if (row + 8 <
s->rows) {
1354 memcpy(
s->intra_pred_data[0],
1355 f->data[0] + yoff + 63 * ls_y,
1356 8 *
s->cols * bytesperpixel);
1357 memcpy(
s->intra_pred_data[1],
1358 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1359 8 *
s->cols * bytesperpixel >>
s->ss_h);
1360 memcpy(
s->intra_pred_data[2],
1361 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1362 8 *
s->cols * bytesperpixel >>
s->ss_h);
1366 if (
s->s.h.filter.level) {
1369 lflvl_ptr =
s->lflvl;
1370 for (col = 0; col <
s->cols;
1371 col += 8, yoff2 += 64 * bytesperpixel,
1372 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1389 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1394 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1395 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1396 unsigned tile_cols_len;
1397 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1402 ls_y =
f->linesize[0];
1403 ls_uv =
f->linesize[1];
1406 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1407 td->tile_col_start = tile_col_start;
1408 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1409 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1410 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1412 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1414 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1416 td->c = &
td->c_b[tile_row];
1417 for (row = tile_row_start; row < tile_row_end;
1418 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1419 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1420 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1422 memset(
td->left_partition_ctx, 0, 8);
1423 memset(
td->left_skip_ctx, 0, 8);
1424 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1429 memset(
td->left_y_nnz_ctx, 0, 16);
1430 memset(
td->left_uv_nnz_ctx, 0, 32);
1431 memset(
td->left_segpred_ctx, 0, 8);
1433 for (col = tile_col_start;
1435 col += 8, yoff2 += 64 * bytesperpixel,
1436 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1439 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1446 tile_cols_len = tile_col_end - tile_col_start;
1447 if (row + 8 <
s->rows) {
1448 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1449 f->data[0] + yoff + 63 * ls_y,
1450 8 * tile_cols_len * bytesperpixel);
1451 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1452 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1453 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1454 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1455 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1456 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1459 vp9_report_tile_progress(
s, row >> 3, 1);
1469 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1471 int bytesperpixel =
s->bytesperpixel, col,
i;
1475 ls_y =
f->linesize[0];
1476 ls_uv =
f->linesize[1];
1478 for (
i = 0;
i <
s->sb_rows;
i++) {
1479 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1481 if (
s->s.h.filter.level) {
1482 yoff = (ls_y * 64)*
i;
1483 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1484 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1485 for (col = 0; col <
s->cols;
1486 col += 8, yoff += 64 * bytesperpixel,
1487 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1500 unsigned int tile, nb_blocks = 0;
1502 if (
s->s.h.segmentation.enabled) {
1503 for (tile = 0; tile <
s->active_tile_cols; tile++)
1504 nb_blocks +=
s->td[tile].nb_block_structure;
1512 par->
qp =
s->s.h.yac_qi;
1513 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1514 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1515 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1516 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1517 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1520 unsigned int block = 0;
1521 unsigned int tile, block_tile;
1523 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1526 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1528 unsigned int row =
td->block_structure[block_tile].row;
1529 unsigned int col =
td->block_structure[block_tile].col;
1530 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1534 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1535 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1537 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1538 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1539 if (
s->s.h.segmentation.absolute_vals)
1540 b->delta_qp -= par->
qp;
1557 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1562 }
else if (
ret == 0) {
1563 if (!
s->s.refs[
ref].f->buf[0]) {
1571 for (
i = 0;
i < 8;
i++) {
1572 if (
s->next_refs[
i].f->buf[0])
1574 if (
s->s.refs[
i].f->buf[0] &&
1584 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1587 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1593 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1601 f->key_frame =
s->s.h.keyframe;
1611 for (
i = 0;
i < 8;
i++) {
1612 if (
s->next_refs[
i].f->buf[0])
1614 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1616 }
else if (
s->s.refs[
i].f->buf[0]) {
1637 memset(
s->above_partition_ctx, 0,
s->cols);
1638 memset(
s->above_skip_ctx, 0,
s->cols);
1639 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1640 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1644 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1645 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1646 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1647 memset(
s->above_segpred_ctx, 0,
s->cols);
1652 "Failed to allocate block buffers\n");
1655 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1658 for (
i = 0;
i < 4;
i++) {
1659 for (j = 0; j < 2; j++)
1660 for (k = 0; k < 2; k++)
1661 for (l = 0; l < 6; l++)
1662 for (m = 0; m < 6; m++)
1663 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1664 s->prob.coef[
i][j][k][l][m], 3);
1665 if (
s->s.h.txfmmode ==
i)
1668 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1670 }
else if (!
s->s.h.refreshctx) {
1676 for (
i = 0;
i <
s->sb_rows;
i++)
1682 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1683 s->td[
i].b =
s->td[
i].b_base;
1684 s->td[
i].block =
s->td[
i].block_base;
1685 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1686 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1687 s->td[
i].eob =
s->td[
i].eob_base;
1688 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1689 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1690 s->td[
i].error_info = 0;
1695 int tile_row, tile_col;
1699 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1700 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1703 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1704 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1711 if (tile_size >
size)
1736 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1737 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1738 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1740 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1744 }
while (
s->pass++ == 1);
1747 if (
s->td->error_info < 0) {
1749 s->td->error_info = 0;
1760 for (
i = 0;
i < 8;
i++) {
1761 if (
s->s.refs[
i].f->buf[0])
1763 if (
s->next_refs[
i].f->buf[0] &&
1768 if (!
s->s.h.invisible) {
1782 for (
i = 0;
i < 3;
i++)
1784 for (
i = 0;
i < 8;
i++)
1794 s->s.h.filter.sharpness = -1;
1804 for (
int i = 0;
i < 3;
i++) {
1806 if (!
s->s.frames[
i].tf.f)
1809 for (
int i = 0;
i < 8;
i++) {
1812 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f)
1824 for (
i = 0;
i < 3;
i++) {
1825 if (
s->s.frames[
i].tf.f->buf[0])
1827 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1832 for (
i = 0;
i < 8;
i++) {
1833 if (
s->s.refs[
i].f->buf[0])
1835 if (ssrc->next_refs[
i].f->buf[0]) {
1841 s->s.h.invisible = ssrc->s.h.invisible;
1842 s->s.h.keyframe = ssrc->s.h.keyframe;
1843 s->s.h.intraonly = ssrc->s.h.intraonly;
1844 s->ss_v = ssrc->ss_v;
1845 s->ss_h = ssrc->ss_h;
1846 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1847 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1848 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1849 s->bytesperpixel = ssrc->bytesperpixel;
1850 s->gf_fmt = ssrc->gf_fmt;
1853 s->s.h.bpp = ssrc->s.h.bpp;
1854 s->bpp_index = ssrc->bpp_index;
1855 s->pix_fmt = ssrc->pix_fmt;
1856 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1857 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1858 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1859 sizeof(
s->s.h.segmentation.feat));
1881 .bsfs =
"vp9_superframe_split",
1883 #if CONFIG_VP9_DXVA2_HWACCEL
1886 #if CONFIG_VP9_D3D11VA_HWACCEL
1889 #if CONFIG_VP9_D3D11VA2_HWACCEL
1892 #if CONFIG_VP9_NVDEC_HWACCEL
1895 #if CONFIG_VP9_VAAPI_HWACCEL
1898 #if CONFIG_VP9_VDPAU_HWACCEL
1901 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL