24 #include "config_components.h"
45 #define VP9_SYNCCODE 0x498342
64 for (
i = 0;
i < n;
i++)
102 f->segmentation_map =
NULL;
103 f->hwaccel_picture_private =
NULL;
115 sz = 64 *
s->sb_cols *
s->sb_rows;
116 if (sz !=
s->frame_extradata_pool_size) {
119 if (!
s->frame_extradata_pool) {
120 s->frame_extradata_pool_size = 0;
123 s->frame_extradata_pool_size = sz;
129 memset(
f->extradata->data, 0,
f->extradata->size);
131 f->segmentation_map =
f->extradata->data;
139 if (!
f->hwaccel_priv_buf)
141 f->hwaccel_picture_private =
f->hwaccel_priv_buf->data;
168 if (
src->hwaccel_picture_private) {
184 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
185 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
186 CONFIG_VP9_NVDEC_HWACCEL + \
187 CONFIG_VP9_VAAPI_HWACCEL + \
188 CONFIG_VP9_VDPAU_HWACCEL + \
189 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
193 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
198 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
202 switch (
s->pix_fmt) {
205 #if CONFIG_VP9_DXVA2_HWACCEL
208 #if CONFIG_VP9_D3D11VA_HWACCEL
212 #if CONFIG_VP9_NVDEC_HWACCEL
215 #if CONFIG_VP9_VAAPI_HWACCEL
218 #if CONFIG_VP9_VDPAU_HWACCEL
221 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
226 #if CONFIG_VP9_NVDEC_HWACCEL
229 #if CONFIG_VP9_VAAPI_HWACCEL
232 #if CONFIG_VP9_VDPAU_HWACCEL
238 *fmtp++ =
s->pix_fmt;
246 s->gf_fmt =
s->pix_fmt;
254 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
257 s->last_fmt =
s->pix_fmt;
258 s->sb_cols = (
w + 63) >> 6;
259 s->sb_rows = (
h + 63) >> 6;
260 s->cols = (
w + 7) >> 3;
261 s->rows = (
h + 7) >> 3;
264 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
268 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
269 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
272 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
273 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
274 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
275 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
276 assign(
s->above_mode_ctx, uint8_t *, 16);
278 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
279 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
280 assign(
s->above_partition_ctx, uint8_t *, 8);
281 assign(
s->above_skip_ctx, uint8_t *, 8);
282 assign(
s->above_txfm_ctx, uint8_t *, 8);
283 assign(
s->above_segpred_ctx, uint8_t *, 8);
284 assign(
s->above_intra_ctx, uint8_t *, 8);
285 assign(
s->above_comp_ctx, uint8_t *, 8);
286 assign(
s->above_ref_ctx, uint8_t *, 8);
287 assign(
s->above_filter_ctx, uint8_t *, 8);
292 for (
i = 0;
i <
s->active_tile_cols;
i++)
296 if (
s->s.h.bpp !=
s->last_bpp) {
299 s->last_bpp =
s->s.h.bpp;
309 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
312 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
316 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
317 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
319 int sbs =
s->sb_cols *
s->sb_rows;
322 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
323 16 * 16 + 2 * chroma_eobs) * sbs);
324 if (!
td->b_base || !
td->block_base)
326 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
327 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
328 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
329 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
330 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
334 if (!
td->block_structure)
338 for (
i = 1;
i <
s->active_tile_cols;
i++)
341 for (
i = 0;
i <
s->active_tile_cols;
i++) {
343 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
344 16 * 16 + 2 * chroma_eobs);
345 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
347 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
348 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
349 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
350 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
351 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
355 if (!
s->td[
i].block_structure)
360 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
377 return m - ((v + 1) >> 1);
384 static const uint8_t inv_map_table[255] = {
385 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
386 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
387 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
388 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
389 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
390 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
391 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
392 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
393 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
394 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
395 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
396 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
397 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
398 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
399 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
400 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
401 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
402 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
450 s->s.h.bpp = 8 +
bits * 2;
451 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
457 s->ss_h =
s->ss_v = 0;
471 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
483 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
494 s->ss_h =
s->ss_v = 1;
495 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
506 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
508 const uint8_t *data2;
532 s->last_keyframe =
s->s.h.keyframe;
535 last_invisible =
s->s.h.invisible;
538 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
540 if (
s->s.h.keyframe) {
548 s->s.h.refreshrefmask = 0xff;
554 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
555 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
556 if (
s->s.h.intraonly) {
565 s->ss_h =
s->ss_v = 1;
568 s->bytesperpixel = 1;
581 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
583 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
585 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
586 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
587 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
588 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
593 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
594 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
596 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
597 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
599 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
600 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
608 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
615 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
616 s->s.h.signbias[0] !=
s->s.h.signbias[2];
617 if (
s->s.h.allowcompinter) {
618 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
619 s->s.h.fixcompref = 2;
620 s->s.h.varcompref[0] = 0;
621 s->s.h.varcompref[1] = 1;
622 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
623 s->s.h.fixcompref = 1;
624 s->s.h.varcompref[0] = 0;
625 s->s.h.varcompref[1] = 2;
627 s->s.h.fixcompref = 0;
628 s->s.h.varcompref[0] = 1;
629 s->s.h.varcompref[1] = 2;
634 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
635 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
637 if (
s->s.h.keyframe ||
s->s.h.intraonly)
638 s->s.h.framectxid = 0;
641 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
643 s->s.h.lf_delta.ref[0] = 1;
644 s->s.h.lf_delta.ref[1] = 0;
645 s->s.h.lf_delta.ref[2] = -1;
646 s->s.h.lf_delta.ref[3] = -1;
647 s->s.h.lf_delta.mode[0] = 0;
648 s->s.h.lf_delta.mode[1] = 0;
649 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
655 if (
s->s.h.filter.sharpness != sharp) {
656 for (
i = 1;
i <= 63;
i++) {
660 limit >>= (sharp + 3) >> 2;
665 s->filter_lut.lim_lut[
i] =
limit;
666 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
669 s->s.h.filter.sharpness = sharp;
670 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
671 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
672 for (
i = 0;
i < 4;
i++)
675 for (
i = 0;
i < 2;
i++)
686 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
687 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
692 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
693 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
694 for (
i = 0;
i < 7;
i++)
697 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
698 for (
i = 0;
i < 3;
i++)
704 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
705 for (
i = 0;
i < 8;
i++) {
706 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
708 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
710 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
711 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
712 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
718 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
719 int qyac, qydc, quvac, quvdc, lflvl, sh;
721 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
722 if (
s->s.h.segmentation.absolute_vals)
727 qyac =
s->s.h.yac_qi;
739 sh =
s->s.h.filter.level >= 32;
740 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
741 if (
s->s.h.segmentation.absolute_vals)
744 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
746 lflvl =
s->s.h.filter.level;
748 if (
s->s.h.lf_delta.enabled) {
749 s->s.h.segmentation.feat[
i].lflvl[0][0] =
750 s->s.h.segmentation.feat[
i].lflvl[0][1] =
752 for (j = 1; j < 4; j++) {
753 s->s.h.segmentation.feat[
i].lflvl[j][0] =
755 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
756 s->s.h.segmentation.feat[
i].lflvl[j][1] =
758 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
761 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
762 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
772 for (
s->s.h.tiling.log2_tile_cols = 0;
773 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
774 s->s.h.tiling.log2_tile_cols++) ;
775 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
777 while (
max >
s->s.h.tiling.log2_tile_cols) {
779 s->s.h.tiling.log2_tile_cols++;
784 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
785 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
790 for (
i = 0;
i <
s->active_tile_cols;
i++)
795 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
797 s->s.h.tiling.tile_cols : 1;
802 n_range_coders =
s->s.h.tiling.tile_cols;
809 for (
i = 0;
i <
s->active_tile_cols;
i++) {
812 rc += n_range_coders;
817 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
818 int valid_ref_frame = 0;
819 for (
i = 0;
i < 3;
i++) {
821 int refw =
ref->width, refh =
ref->height;
825 "Ref pixfmt (%s) did not match current frame (%s)",
829 }
else if (refw ==
w && refh ==
h) {
830 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
834 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
836 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
841 s->mvscale[
i][0] = (refw << 14) /
w;
842 s->mvscale[
i][1] = (refh << 14) /
h;
843 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
844 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
848 if (!valid_ref_frame) {
849 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
854 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
855 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
865 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
872 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
876 if (size2 >
size - (data2 -
data)) {
889 for (
i = 0;
i <
s->active_tile_cols;
i++) {
890 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
891 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
892 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
894 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
896 s->td[
i].nb_block_structure = 0;
902 s->prob.p =
s->prob_ctx[
c].p;
905 if (
s->s.h.lossless) {
909 if (
s->s.h.txfmmode == 3)
913 for (
i = 0;
i < 2;
i++)
916 for (
i = 0;
i < 2;
i++)
917 for (j = 0; j < 2; j++)
919 s->prob.p.tx16p[
i][j] =
921 for (
i = 0;
i < 2;
i++)
922 for (j = 0; j < 3; j++)
924 s->prob.p.tx32p[
i][j] =
930 for (
i = 0;
i < 4;
i++) {
931 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
933 for (j = 0; j < 2; j++)
934 for (k = 0; k < 2; k++)
935 for (l = 0; l < 6; l++)
936 for (m = 0; m < 6; m++) {
937 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
938 uint8_t *
r =
ref[j][k][l][m];
939 if (m >= 3 && l == 0)
941 for (n = 0; n < 3; n++) {
950 for (j = 0; j < 2; j++)
951 for (k = 0; k < 2; k++)
952 for (l = 0; l < 6; l++)
953 for (m = 0; m < 6; m++) {
954 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
955 uint8_t *
r =
ref[j][k][l][m];
962 if (
s->s.h.txfmmode ==
i)
967 for (
i = 0;
i < 3;
i++)
970 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
971 for (
i = 0;
i < 7;
i++)
972 for (j = 0; j < 3; j++)
974 s->prob.p.mv_mode[
i][j] =
978 for (
i = 0;
i < 4;
i++)
979 for (j = 0; j < 2; j++)
981 s->prob.p.filter[
i][j] =
984 for (
i = 0;
i < 4;
i++)
988 if (
s->s.h.allowcompinter) {
990 if (
s->s.h.comppredmode)
993 for (
i = 0;
i < 5;
i++)
1002 for (
i = 0;
i < 5;
i++) {
1004 s->prob.p.single_ref[
i][0] =
1007 s->prob.p.single_ref[
i][1] =
1013 for (
i = 0;
i < 5;
i++)
1015 s->prob.p.comp_ref[
i] =
1019 for (
i = 0;
i < 4;
i++)
1020 for (j = 0; j < 9; j++)
1022 s->prob.p.y_mode[
i][j] =
1025 for (
i = 0;
i < 4;
i++)
1026 for (j = 0; j < 4; j++)
1027 for (k = 0; k < 3; k++)
1029 s->prob.p.partition[3 -
i][j][k] =
1031 s->prob.p.partition[3 -
i][j][k]);
1034 for (
i = 0;
i < 3;
i++)
1038 for (
i = 0;
i < 2;
i++) {
1040 s->prob.p.mv_comp[
i].sign =
1043 for (j = 0; j < 10; j++)
1045 s->prob.p.mv_comp[
i].classes[j] =
1049 s->prob.p.mv_comp[
i].class0 =
1052 for (j = 0; j < 10; j++)
1054 s->prob.p.mv_comp[
i].bits[j] =
1058 for (
i = 0;
i < 2;
i++) {
1059 for (j = 0; j < 2; j++)
1060 for (k = 0; k < 3; k++)
1062 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1065 for (j = 0; j < 3; j++)
1067 s->prob.p.mv_comp[
i].fp[j] =
1071 if (
s->s.h.highprecisionmvs) {
1072 for (
i = 0;
i < 2;
i++) {
1074 s->prob.p.mv_comp[
i].class0_hp =
1078 s->prob.p.mv_comp[
i].hp =
1084 return (data2 -
data) + size2;
1088 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1091 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1092 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1094 s->prob.p.partition[bl][
c];
1096 ptrdiff_t hbs = 4 >> bl;
1098 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1099 int bytesperpixel =
s->bytesperpixel;
1104 }
else if (col + hbs < s->cols) {
1105 if (row + hbs < s->rows) {
1113 yoff += hbs * 8 * y_stride;
1114 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1119 yoff += hbs * 8 * bytesperpixel;
1120 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1124 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1126 yoff + 8 * hbs * bytesperpixel,
1127 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1128 yoff += hbs * 8 * y_stride;
1129 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1130 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1132 yoff + 8 * hbs * bytesperpixel,
1133 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1140 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1142 yoff + 8 * hbs * bytesperpixel,
1143 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1148 }
else if (row + hbs < s->rows) {
1151 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1152 yoff += hbs * 8 * y_stride;
1153 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1154 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1161 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1163 td->counts.partition[bl][
c][bp]++;
1167 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1171 ptrdiff_t hbs = 4 >> bl;
1173 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1174 int bytesperpixel =
s->bytesperpixel;
1179 }
else if (
td->b->bl == bl) {
1182 yoff += hbs * 8 * y_stride;
1183 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1185 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1186 yoff += hbs * 8 * bytesperpixel;
1187 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1192 if (col + hbs < s->cols) {
1193 if (row + hbs < s->rows) {
1194 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1195 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1196 yoff += hbs * 8 * y_stride;
1197 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1200 yoff + 8 * hbs * bytesperpixel,
1201 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1203 yoff += hbs * 8 * bytesperpixel;
1204 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1207 }
else if (row + hbs < s->rows) {
1208 yoff += hbs * 8 * y_stride;
1209 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1217 int sb_start = ( idx * n) >> log2_n;
1218 int sb_end = ((idx + 1) * n) >> log2_n;
1219 *start =
FFMIN(sb_start, n) << 3;
1220 *end =
FFMIN(sb_end, n) << 3;
1228 for (
i = 0;
i <
s->active_tile_cols;
i++)
1237 for (
i = 0;
i < 3;
i++) {
1242 for (
i = 0;
i < 8;
i++) {
1263 int row, col, tile_row, tile_col,
ret;
1265 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1267 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1270 ls_y =
f->linesize[0];
1271 ls_uv =
f->linesize[1];
1272 bytesperpixel =
s->bytesperpixel;
1275 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1277 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1279 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1282 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1283 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1290 if (tile_size >
size) {
1305 for (row = tile_row_start; row < tile_row_end;
1306 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1308 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1310 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1312 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1313 td->tile_col_start = tile_col_start;
1315 memset(
td->left_partition_ctx, 0, 8);
1316 memset(
td->left_skip_ctx, 0, 8);
1317 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1322 memset(
td->left_y_nnz_ctx, 0, 16);
1323 memset(
td->left_uv_nnz_ctx, 0, 32);
1324 memset(
td->left_segpred_ctx, 0, 8);
1326 td->c = &
td->c_b[tile_col];
1329 for (col = tile_col_start;
1331 col += 8, yoff2 += 64 * bytesperpixel,
1332 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1336 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1357 if (row + 8 <
s->rows) {
1358 memcpy(
s->intra_pred_data[0],
1359 f->data[0] + yoff + 63 * ls_y,
1360 8 *
s->cols * bytesperpixel);
1361 memcpy(
s->intra_pred_data[1],
1362 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1363 8 *
s->cols * bytesperpixel >>
s->ss_h);
1364 memcpy(
s->intra_pred_data[2],
1365 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1366 8 *
s->cols * bytesperpixel >>
s->ss_h);
1370 if (
s->s.h.filter.level) {
1373 lflvl_ptr =
s->lflvl;
1374 for (col = 0; col <
s->cols;
1375 col += 8, yoff2 += 64 * bytesperpixel,
1376 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1393 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1398 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1399 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1400 unsigned tile_cols_len;
1401 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1406 ls_y =
f->linesize[0];
1407 ls_uv =
f->linesize[1];
1410 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1411 td->tile_col_start = tile_col_start;
1412 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1413 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1414 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1416 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1418 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1420 td->c = &
td->c_b[tile_row];
1421 for (row = tile_row_start; row < tile_row_end;
1422 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1423 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1424 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1426 memset(
td->left_partition_ctx, 0, 8);
1427 memset(
td->left_skip_ctx, 0, 8);
1428 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1433 memset(
td->left_y_nnz_ctx, 0, 16);
1434 memset(
td->left_uv_nnz_ctx, 0, 32);
1435 memset(
td->left_segpred_ctx, 0, 8);
1437 for (col = tile_col_start;
1439 col += 8, yoff2 += 64 * bytesperpixel,
1440 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1443 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1450 tile_cols_len = tile_col_end - tile_col_start;
1451 if (row + 8 <
s->rows) {
1452 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1453 f->data[0] + yoff + 63 * ls_y,
1454 8 * tile_cols_len * bytesperpixel);
1455 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1456 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1457 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1458 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1459 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1460 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1463 vp9_report_tile_progress(
s, row >> 3, 1);
1473 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1475 int bytesperpixel =
s->bytesperpixel, col,
i;
1479 ls_y =
f->linesize[0];
1480 ls_uv =
f->linesize[1];
1482 for (
i = 0;
i <
s->sb_rows;
i++) {
1483 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1485 if (
s->s.h.filter.level) {
1486 yoff = (ls_y * 64)*
i;
1487 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1488 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1489 for (col = 0; col <
s->cols;
1490 col += 8, yoff += 64 * bytesperpixel,
1491 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1504 unsigned int tile, nb_blocks = 0;
1506 if (
s->s.h.segmentation.enabled) {
1507 for (tile = 0; tile <
s->active_tile_cols; tile++)
1508 nb_blocks +=
s->td[tile].nb_block_structure;
1516 par->
qp =
s->s.h.yac_qi;
1517 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1518 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1519 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1520 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1521 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1524 unsigned int block = 0;
1525 unsigned int tile, block_tile;
1527 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1530 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1532 unsigned int row =
td->block_structure[block_tile].row;
1533 unsigned int col =
td->block_structure[block_tile].col;
1534 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1538 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1539 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1541 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1542 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1543 if (
s->s.h.segmentation.absolute_vals)
1544 b->delta_qp -= par->
qp;
1561 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1566 }
else if (
ret == 0) {
1567 if (!
s->s.refs[
ref].f->buf[0]) {
1575 for (
i = 0;
i < 8;
i++) {
1576 if (
s->next_refs[
i].f->buf[0])
1578 if (
s->s.refs[
i].f->buf[0] &&
1588 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1591 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1597 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1605 f->key_frame =
s->s.h.keyframe;
1615 for (
i = 0;
i < 8;
i++) {
1616 if (
s->next_refs[
i].f->buf[0])
1618 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1620 }
else if (
s->s.refs[
i].f->buf[0]) {
1641 memset(
s->above_partition_ctx, 0,
s->cols);
1642 memset(
s->above_skip_ctx, 0,
s->cols);
1643 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1644 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1648 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1649 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1650 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1651 memset(
s->above_segpred_ctx, 0,
s->cols);
1656 "Failed to allocate block buffers\n");
1659 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1662 for (
i = 0;
i < 4;
i++) {
1663 for (j = 0; j < 2; j++)
1664 for (k = 0; k < 2; k++)
1665 for (l = 0; l < 6; l++)
1666 for (m = 0; m < 6; m++)
1667 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1668 s->prob.coef[
i][j][k][l][m], 3);
1669 if (
s->s.h.txfmmode ==
i)
1672 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1674 }
else if (!
s->s.h.refreshctx) {
1680 for (
i = 0;
i <
s->sb_rows;
i++)
1686 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1687 s->td[
i].b =
s->td[
i].b_base;
1688 s->td[
i].block =
s->td[
i].block_base;
1689 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1690 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1691 s->td[
i].eob =
s->td[
i].eob_base;
1692 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1693 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1694 s->td[
i].error_info = 0;
1699 int tile_row, tile_col;
1703 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1704 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1707 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1708 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1715 if (tile_size >
size)
1740 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1741 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1742 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1744 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1748 }
while (
s->pass++ == 1);
1751 if (
s->td->error_info < 0) {
1753 s->td->error_info = 0;
1764 for (
i = 0;
i < 8;
i++) {
1765 if (
s->s.refs[
i].f->buf[0])
1767 if (
s->next_refs[
i].f->buf[0] &&
1772 if (!
s->s.h.invisible) {
1786 for (
i = 0;
i < 3;
i++)
1788 for (
i = 0;
i < 8;
i++)
1798 s->s.h.filter.sharpness = -1;
1808 for (
int i = 0;
i < 3;
i++) {
1810 if (!
s->s.frames[
i].tf.f)
1813 for (
int i = 0;
i < 8;
i++) {
1816 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f)
1828 for (
i = 0;
i < 3;
i++) {
1829 if (
s->s.frames[
i].tf.f->buf[0])
1831 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1836 for (
i = 0;
i < 8;
i++) {
1837 if (
s->s.refs[
i].f->buf[0])
1839 if (ssrc->next_refs[
i].f->buf[0]) {
1845 s->s.h.invisible = ssrc->s.h.invisible;
1846 s->s.h.keyframe = ssrc->s.h.keyframe;
1847 s->s.h.intraonly = ssrc->s.h.intraonly;
1848 s->ss_v = ssrc->ss_v;
1849 s->ss_h = ssrc->ss_h;
1850 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1851 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1852 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1853 s->bytesperpixel = ssrc->bytesperpixel;
1854 s->gf_fmt = ssrc->gf_fmt;
1857 s->s.h.bpp = ssrc->s.h.bpp;
1858 s->bpp_index = ssrc->bpp_index;
1859 s->pix_fmt = ssrc->pix_fmt;
1860 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1861 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1862 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1863 sizeof(
s->s.h.segmentation.feat));
1885 .bsfs =
"vp9_superframe_split",
1887 #if CONFIG_VP9_DXVA2_HWACCEL
1890 #if CONFIG_VP9_D3D11VA_HWACCEL
1893 #if CONFIG_VP9_D3D11VA2_HWACCEL
1896 #if CONFIG_VP9_NVDEC_HWACCEL
1899 #if CONFIG_VP9_VAAPI_HWACCEL
1902 #if CONFIG_VP9_VDPAU_HWACCEL
1905 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL