30 #define PROF_TEMP_OFFSET (MAX_PB_SIZE + 32)
34 const int x_off,
const int y_off,
const int block_w,
const int block_h,
const int is_luma)
39 const int pic_width = is_luma ?
fc->ps.pps->width : (
fc->ps.pps->width >>
fc->ps.sps->hshift[1]);
40 const int pic_height = is_luma ?
fc->ps.pps->height : (
fc->ps.pps->height >>
fc->ps.sps->vshift[1]);
42 if (x_off < extra_before || y_off < extra_before ||
43 x_off >= pic_width - block_w - extra_after ||
44 y_off >= pic_height - block_h - extra_after) {
46 int offset = extra_before * *src_stride + (extra_before <<
fc->ps.sps->pixel_shift);
47 int buf_offset = extra_before * edge_emu_stride + (extra_before <<
fc->ps.sps->pixel_shift);
49 fc->vdsp.emulated_edge_mc(dst, *
src -
offset, edge_emu_stride, *src_stride,
50 block_w + extra, block_h + extra, x_off - extra_before, y_off - extra_before,
51 pic_width, pic_height);
53 *
src = dst + buf_offset;
54 *src_stride = edge_emu_stride;
61 const int x_sb,
const int y_sb,
const int x_off,
const int y_off,
const int block_w,
const int block_h,
const int is_luma)
66 const int pic_width = is_luma ?
fc->ps.pps->width : (
fc->ps.pps->width >>
fc->ps.sps->hshift[1]);
67 const int pic_height = is_luma ?
fc->ps.pps->height : (
fc->ps.pps->height >>
fc->ps.sps->vshift[1]);
69 if (x_off < extra_before || y_off < extra_before ||
70 x_off >= pic_width - block_w - extra_after ||
71 y_off >= pic_height - block_h - extra_after||
72 (x_off != x_sb || y_off != y_sb)) {
73 const int ps =
fc->ps.sps->pixel_shift;
75 const int offset = extra_before * *src_stride + (extra_before << ps);
76 const int buf_offset = extra_before * edge_emu_stride + (extra_before << ps);
78 const int start_x =
FFMIN(
FFMAX(x_sb - extra_before, 0), pic_width - 1);
79 const int start_y =
FFMIN(
FFMAX(y_sb - extra_before, 0), pic_height - 1);
80 const int width =
FFMAX(
FFMIN(pic_width, x_sb + block_w + extra_after) - start_x, 1);
81 const int height =
FFMAX(
FFMIN(pic_height, y_sb + block_h + extra_after) - start_y, 1);
83 fc->vdsp.emulated_edge_mc(dst, *
src -
offset, edge_emu_stride, *src_stride, block_w + extra, block_h + extra,
84 x_off - start_x - extra_before, y_off - start_y - extra_before,
width,
height);
86 *
src = dst + buf_offset;
87 *src_stride = edge_emu_stride;
92 const int x_off,
const int y_off,
const int block_w,
const int block_h)
94 int pic_width =
fc->ps.pps->width;
95 int pic_height =
fc->ps.pps->height;
107 *
src = dst + buf_offset;
108 *src_stride = edge_emu_stride;
113 #define EMULATED_EDGE_LUMA(dst, src, src_stride, x_off, y_off) \
114 emulated_edge(fc, dst, src, src_stride, x_off, y_off, block_w, block_h, 1)
116 #define EMULATED_EDGE_CHROMA(dst, src, src_stride, x_off, y_off) \
117 emulated_edge(fc, dst, src, src_stride, x_off, y_off, block_w, block_h, 0)
119 #define EMULATED_EDGE_DMVR_LUMA(dst, src, src_stride, x_sb, y_sb, x_off, y_off) \
120 emulated_edge_dmvr(fc, dst, src, src_stride, x_sb, y_sb, x_off, y_off, block_w, block_h, 1)
122 #define EMULATED_EDGE_DMVR_CHROMA(dst, src, src_stride, x_sb, y_sb, x_off, y_off) \
123 emulated_edge_dmvr(fc, dst, src, src_stride, x_sb, y_sb, x_off, y_off, block_w, block_h, 0)
125 #define EMULATED_EDGE_BILINEAR(dst, src, src_stride, x_off, y_off) \
126 emulated_edge_bilinear(fc, dst, src, src_stride, x_off, y_off, pred_w, pred_h)
135 const int weight_flag = (
IS_P(sh->
r) &&
pps->r->pps_weighted_pred_flag) ||
136 (
IS_B(sh->
r) &&
pps->r->pps_weighted_bipred_flag);
141 *denom =
w->log2_denom[c_idx > 0];
142 *wx =
w->weight[lx][c_idx][mvf->
ref_idx[lx]];
143 *ox =
w->offset[lx][c_idx][mvf->
ref_idx[lx]];
155 const int bcw_idx = mvf->
bcw_idx;
156 const int weight_flag = (
IS_P(sh->
r) &&
pps->r->pps_weighted_pred_flag) ||
157 (
IS_B(sh->
r) &&
pps->r->pps_weighted_bipred_flag && !dmvr_flag);
158 if ((!weight_flag && !bcw_idx) || (bcw_idx && lc->
cu->
ciip_flag))
170 *denom =
w->log2_denom[c_idx > 0];
180 int x_off,
int y_off,
const int block_w,
const int block_h)
183 const uint8_t *
src =
ref->data[0];
184 ptrdiff_t src_stride =
ref->linesize[0];
185 const int idx =
av_log2(block_w) - 1;
186 const int mx =
mv->x & 0xf;
187 const int my =
mv->y & 0xf;
193 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
197 fc->vvcdsp.inter.put[
LUMA][idx][!!my][!!mx](dst,
src, src_stride, block_h, hf, vf, block_w);
201 int x_off,
int y_off,
const int block_w,
const int block_h,
const int c_idx)
204 const uint8_t *
src =
ref->data[c_idx];
205 ptrdiff_t src_stride =
ref->linesize[c_idx];
206 int hs =
fc->ps.sps->hshift[c_idx];
207 int vs =
fc->ps.sps->vshift[c_idx];
208 const int idx =
av_log2(block_w) - 1;
214 x_off +=
mv->x >> (4 + hs);
215 y_off +=
mv->y >> (4 + vs);
216 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
219 fc->vvcdsp.inter.put[
CHROMA][idx][!!my][!!mx](dst,
src, src_stride, block_h, hf, vf, block_w);
223 const AVFrame *
ref,
const MvField *mvf,
int x_off,
int y_off,
const int block_w,
const int block_h,
224 const int hf_idx,
const int vf_idx)
228 const Mv *
mv = mvf->
mv + lx;
229 const uint8_t *
src =
ref->data[0];
230 ptrdiff_t src_stride =
ref->linesize[0];
231 const int idx =
av_log2(block_w) - 1;
232 const int mx =
mv->x & 0xf;
233 const int my =
mv->y & 0xf;
240 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
245 fc->vvcdsp.inter.put_uni_w[
LUMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride,
246 block_h, denom, wx, ox, hf, vf, block_w);
248 fc->vvcdsp.inter.put_uni[
LUMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride,
249 block_h, hf, vf, block_w);
254 const AVFrame *ref0,
const Mv *mv0,
const int x_off,
const int y_off,
const int block_w,
const int block_h,
255 const AVFrame *ref1,
const Mv *mv1,
const MvField *mvf,
const int hf_idx,
const int vf_idx,
256 const MvField *orig_mv,
const int sb_bdof_flag)
260 const int idx =
av_log2(block_w) - 1;
263 int denom, w0, w1, o0, o1;
266 for (
int i =
L0;
i <=
L1;
i++) {
268 const int mx =
mv->x & 0xf;
269 const int my =
mv->y & 0xf;
270 const int ox = x_off + (
mv->x >> 4);
271 const int oy = y_off + (
mv->y >> 4);
272 ptrdiff_t src_stride =
ref[
i]->linesize[0];
273 const uint8_t *
src =
ref[
i]->data[0] + oy * src_stride + (ox * (1 <<
fc->ps.sps->pixel_shift));
278 const int x_sb = x_off + (orig_mv->
mv[
i].
x >> 4);
279 const int y_sb = y_off + (orig_mv->
mv[
i].
y >> 4);
285 fc->vvcdsp.inter.put[
LUMA][idx][!!my][!!mx](
tmp[
i],
src, src_stride, block_h, hf, vf, block_w);
287 fc->vvcdsp.inter.bdof_fetch_samples(
tmp[
i],
src, src_stride, mx, my, block_w, block_h);
291 fc->vvcdsp.inter.apply_bdof(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h);
292 else if (weight_flag)
293 fc->vvcdsp.inter.w_avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h, denom, w0, w1, o0, o1);
295 fc->vvcdsp.inter.avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h);
299 const uint8_t *
src, ptrdiff_t src_stride,
int x_off,
int y_off,
300 const int block_w,
const int block_h,
const MvField *mvf,
const int c_idx,
301 const int hf_idx,
const int vf_idx)
305 const int hs =
fc->ps.sps->hshift[1];
306 const int vs =
fc->ps.sps->vshift[1];
307 const int idx =
av_log2(block_w) - 1;
308 const Mv *
mv = &mvf->
mv[lx];
315 x_off +=
mv->x >> (4 + hs);
316 y_off +=
mv->y >> (4 + vs);
317 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
322 fc->vvcdsp.inter.put_uni_w[
CHROMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride,
323 block_h, denom, wx, ox, hf, vf, block_w);
325 fc->vvcdsp.inter.put_uni[
CHROMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride,
326 block_h, hf, vf, block_w);
331 const AVFrame *ref0,
const AVFrame *ref1,
const int x_off,
const int y_off,
332 const int block_w,
const int block_h,
const MvField *mvf,
const int c_idx,
333 const int hf_idx,
const int vf_idx,
const MvField *orig_mv,
const int dmvr_flag,
const int ciip_flag)
336 const int hs =
fc->ps.sps->hshift[1];
337 const int vs =
fc->ps.sps->vshift[1];
338 const int idx =
av_log2(block_w) - 1;
341 int denom, w0, w1, o0, o1;
342 const int weight_flag =
derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf, c_idx, dmvr_flag);
344 for (
int i =
L0;
i <=
L1;
i++) {
348 const int ox = x_off + (
mv->x >> (4 + hs));
349 const int oy = y_off + (
mv->y >> (4 + vs));
350 ptrdiff_t src_stride =
ref[
i]->linesize[c_idx];
351 const uint8_t *
src =
ref[
i]->data[c_idx] + oy * src_stride + (ox * (1 <<
fc->ps.sps->pixel_shift));
355 const int x_sb = x_off + (orig_mv->
mv[
i].
x >> (4 + hs));
356 const int y_sb = y_off + (orig_mv->
mv[
i].
y >> (4 + vs));
361 fc->vvcdsp.inter.put[
CHROMA][idx][!!my][!!mx](
tmp[
i],
src, src_stride, block_h, hf, vf, block_w);
364 fc->vvcdsp.inter.w_avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h, denom, w0, w1, o0, o1);
366 fc->vvcdsp.inter.avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h);
370 const AVFrame *
ref,
const MvField *mvf,
int x_off,
int y_off,
const int block_w,
const int block_h,
371 const int cb_prof_flag,
const int16_t *diff_mv_x,
const int16_t *diff_mv_y)
374 const uint8_t *
src =
ref->data[0];
375 ptrdiff_t src_stride =
ref->linesize[0];
377 const int idx =
av_log2(block_w) - 1;
379 const Mv *
mv = mvf->
mv + lx;
380 const int mx =
mv->x & 0xf;
381 const int my =
mv->y & 0xf;
389 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
394 fc->vvcdsp.inter.fetch_samples(prof_tmp,
src, src_stride, mx, my);
396 fc->vvcdsp.inter.apply_prof_uni(dst, dst_stride, prof_tmp, diff_mv_x, diff_mv_y);
398 fc->vvcdsp.inter.apply_prof_uni_w(dst, dst_stride, prof_tmp, diff_mv_x, diff_mv_y, denom, wx, ox);
401 fc->vvcdsp.inter.put_uni[
LUMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride, block_h, hf, vf, block_w);
403 fc->vvcdsp.inter.put_uni_w[
LUMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride, block_h, denom, wx, ox, hf, vf, block_w);
409 const int block_w,
const int block_h)
416 const int idx =
av_log2(block_w) - 1;
417 int denom, w0, w1, o0, o1;
418 const int weight_flag =
derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf,
LUMA, 0);
420 for (
int i =
L0;
i <=
L1;
i++) {
422 const int mx =
mv->x & 0xf;
423 const int my =
mv->y & 0xf;
424 const int ox = x_off + (
mv->x >> 4);
425 const int oy = y_off + (
mv->y >> 4);
426 ptrdiff_t src_stride =
ref[
i]->linesize[0];
427 const uint8_t *
src =
ref[
i]->data[0] + oy * src_stride + (ox * (1 <<
fc->ps.sps->pixel_shift));
433 fc->vvcdsp.inter.put[
LUMA][idx][!!my][!!mx](
tmp[
i],
src, src_stride, block_h, hf, vf, block_w);
436 fc->vvcdsp.inter.fetch_samples(prof_tmp,
src, src_stride, mx, my);
442 fc->vvcdsp.inter.w_avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h, denom, w0, w1, o0, o1);
444 fc->vvcdsp.inter.avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h);
452 if (
mv->pred_flag &
mask) {
454 ref[lx] = rpl[lx].
ref[
mv->ref_idx[lx]];
462 #define POS(c_idx, x, y) \
463 &fc->frame->data[c_idx][((y) >> fc->ps.sps->vshift[c_idx]) * fc->frame->linesize[c_idx] + \
464 (((x) >> fc->ps.sps->hshift[c_idx]) << fc->ps.sps->pixel_shift)]
481 const int c_end =
fc->ps.sps->r->sps_chroma_format_idc ? 3 : 1;
485 for (
int c_idx = 0; c_idx < c_end; c_idx++) {
486 const int hs =
fc->ps.sps->hshift[c_idx];
487 const int vs =
fc->ps.sps->vshift[c_idx];
488 const int x = lc->
cu->
x0 >> hs;
489 const int y = lc->
cu->
y0 >> vs;
493 ptrdiff_t dst_stride =
fc->frame->linesize[c_idx];
495 int step_x = 1 << hs;
499 }
else if (mirror_type == 1) {
507 for (
int i = 0;
i < 2;
i++) {
509 const int lx =
mv->pred_flag -
PF_L0;
532 const int min_pu_width =
fc->ps.pps->min_pu_width;
546 const int x0,
const int y0,
const int sbw,
const int sbh,
const MvField *orig_mv,
const int sb_bdof_flag)
551 uint8_t *dst =
POS(0, x0, y0);
552 const ptrdiff_t dst_stride =
fc->frame->linesize[0];
553 uint8_t *inter = ciip_flag ? (uint8_t *)lc->
ciip_tmp1 : dst;
554 const ptrdiff_t inter_stride = ciip_flag ? (
MAX_PB_SIZE *
sizeof(uint16_t)) : dst_stride;
561 const int lx =
mv->pred_flag -
PF_L0;
563 mv, x0, y0, sbw, sbh, hf_idx, vf_idx);
566 &
mv->mv[0], x0, y0, sbw, sbh,
ref[1]->frame, &
mv->mv[1],
mv,
567 hf_idx, vf_idx, orig_mv, sb_bdof_flag);
572 fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, 0);
574 fc->vvcdsp.lmcs.filter(inter, inter_stride, sbw, sbh, &
fc->ps.lmcs.fwd_lut);
575 fc->vvcdsp.inter.put_ciip(dst, dst_stride, sbw, sbh, inter, inter_stride, intra_weight);
581 const int x0,
const int y0,
const int sbw,
const int sbh,
const MvField *orig_mv,
const int dmvr_flag)
584 const int hs =
fc->ps.sps->hshift[1];
585 const int vs =
fc->ps.sps->vshift[1];
586 const int x0_c = x0 >> hs;
587 const int y0_c = y0 >> vs;
588 const int w_c = sbw >> hs;
589 const int h_c = sbh >> vs;
592 uint8_t* dst1 =
POS(1, x0, y0);
593 uint8_t* dst2 =
POS(2, x0, y0);
594 const ptrdiff_t dst1_stride =
fc->frame->linesize[1];
595 const ptrdiff_t dst2_stride =
fc->frame->linesize[2];
597 uint8_t *inter1 = do_ciip ? (uint8_t *)lc->
ciip_tmp1 : dst1;
598 const ptrdiff_t inter1_stride = do_ciip ? (
MAX_PB_SIZE *
sizeof(uint16_t)) : dst1_stride;
600 uint8_t *inter2 = do_ciip ? (uint8_t *)lc->
ciip_tmp2 : dst2;
601 const ptrdiff_t inter2_stride = do_ciip ? (
MAX_PB_SIZE *
sizeof(uint16_t)) : dst2_stride;
604 const int hf_idx = 0;
605 const int vf_idx = 0;
612 const int lx =
mv->pred_flag -
PF_L0;
617 x0_c, y0_c,
w_c, h_c,
mv,
CB, hf_idx, vf_idx);
619 x0_c, y0_c,
w_c, h_c,
mv,
CR, hf_idx, vf_idx);
625 x0_c, y0_c,
w_c, h_c,
mv,
CB, hf_idx, vf_idx, orig_mv, dmvr_flag, lc->
cu->
ciip_flag);
628 x0_c, y0_c,
w_c, h_c,
mv,
CR, hf_idx, vf_idx, orig_mv, dmvr_flag, lc->
cu->
ciip_flag);
633 fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, 1);
634 fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, 2);
635 fc->vvcdsp.inter.put_ciip(dst1, dst1_stride,
w_c, h_c, inter1, inter1_stride, intra_weight);
636 fc->vvcdsp.inter.put_ciip(dst2, dst2_stride,
w_c, h_c, inter2, inter2_stride, intra_weight);
644 const int sad_minus = sad[-
stride];
645 const int sad_center = sad[0];
646 const int sad_plus = sad[
stride];
648 int denom = (( sad_minus + sad_plus) - (sad_center << 1 ) ) << 3;
652 if (sad_minus == sad_center)
654 else if (sad_plus == sad_center)
657 int num = ( sad_minus - sad_plus ) * (1 << 4);
665 while (counter > 0) {
666 counter = counter - 1;
667 quotient = quotient << 1;
668 if ( num >= denom ) {
670 quotient = quotient + 1;
672 denom = (denom >> 1);
683 #define SAD_ARRAY_SIZE 5
686 const AVFrame *ref0,
const AVFrame *ref1,
const int x_off,
const int y_off,
const int block_w,
const int block_h)
689 const int sr_range = 2;
693 int min_dx, min_dy, min_sad, dx, dy;
696 min_dx = min_dy = dx = dy = 2;
698 for (
int i =
L0;
i <=
L1;
i++) {
699 const int pred_w = block_w + 2 * sr_range;
700 const int pred_h = block_h + 2 * sr_range;
702 const int mx =
mv->x & 0xf;
703 const int my =
mv->y & 0xf;
704 const int ox = x_off + (
mv->x >> 4) - sr_range;
705 const int oy = y_off + (
mv->y >> 4) - sr_range;
706 ptrdiff_t src_stride =
ref[
i]->linesize[
LUMA];
707 const uint8_t *
src =
ref[
i]->data[
LUMA] + oy * src_stride + (ox * (1 <<
fc->ps.sps->pixel_shift));
709 fc->vvcdsp.inter.dmvr[!!my][!!mx](
tmp[
i],
src, src_stride,
pred_h, mx, my, pred_w);
712 min_sad =
fc->vvcdsp.inter.sad(
tmp[
L0],
tmp[
L1], dx, dy, block_w, block_h);
713 min_sad -= min_sad >> 2;
714 sad[dy][dx] = min_sad;
716 if (min_sad >= block_w * block_h) {
721 if (dx != sr_range || dy != sr_range) {
722 sad[dy][dx] =
fc->vvcdsp.inter.sad(lc->
tmp, lc->
tmp1, dx, dy, block_w, block_h);
723 if (sad[dy][dx] < min_sad) {
724 min_sad = sad[dy][dx];
731 dmv[0] = (min_dx - sr_range) * (1 << 4);
732 dmv[1] = (min_dy - sr_range) * (1 << 4);
733 if (min_dx != 0 && min_dx != 4 && min_dy != 0 && min_dy != 4) {
738 for (
int i =
L0;
i <=
L1;
i++) {
740 mv->x += (1 - 2 *
i) * dmv[0];
741 mv->y += (1 - 2 *
i) * dmv[1];
745 if (min_sad < 2 * block_w * block_h) {
759 fc->ref->tab_dmvr_mvf[idx] = *mvf;
765 const int x0,
const int y0,
const int sbw,
const int sbh)
789 int sbw, sbh, sb_bdof_flag = 0;
797 for (
int sby = 0; sby <
mi->num_sb_y; sby++) {
798 for (
int sbx = 0; sbx <
mi->num_sb_x; sbx++) {
799 const int x0 = cu->
x0 + sbx * sbw;
800 const int y0 = cu->
y0 + sby * sbh;
807 if (
fc->ps.sps->r->sps_chroma_format_idc)
814 const int x0,
const int y0,
const int sbw,
const int sbh)
816 const int hs =
fc->ps.sps->hshift[1];
817 const int vs =
fc->ps.sps->vshift[1];
823 mvc->
mv[0].
x += (
unsigned int)mv2->
mv[0].
x;
824 mvc->
mv[0].
y += (
unsigned int)mv2->
mv[0].
y;
825 mvc->
mv[1].
x += (
unsigned int)mv2->
mv[1].
x;
826 mvc->
mv[1].
y += (
unsigned int)mv2->
mv[1].
y;
837 const int x0 = cu->
x0;
838 const int y0 = cu->
y0;
841 const int hs =
fc->ps.sps->hshift[1];
842 const int vs =
fc->ps.sps->vshift[1];
844 for (
int sby = 0; sby <
mi->num_sb_y; sby++) {
845 for (
int sbx = 0; sbx <
mi->num_sb_x; sbx++) {
846 const int x = x0 + sbx * sbw;
847 const int y = y0 + sby * sbh;
849 uint8_t *dst0 =
POS(0, x, y);
857 const int lx =
mi->pred_flag -
PF_L0;
865 if (
fc->ps.sps->r->sps_chroma_format_idc) {
892 uint8_t* dst0 =
POS(0, cu->
x0, cu->
y0);
905 const CTU *ctu =
fc->tab.ctus + rs;