43 int filter_height_down = (raw_my & 3) ? 3 : 0;
44 int full_my = (raw_my >> 2) + y_offset;
45 int bottom = full_my + filter_height_down +
height;
49 return FFMAX(0, bottom);
53 int16_t refs[2][48],
int n,
54 int height,
int y_offset,
int list0,
55 int list1,
int *nrefs)
68 if (
ref->parent->tf.progress->data !=
h->cur_pic.tf.progress->data ||
69 (
ref->reference & 3) !=
h->picture_structure) {
71 if (refs[0][ref_n] < 0)
73 refs[0][ref_n] =
FFMAX(refs[0][ref_n], my);
81 if (
ref->parent->tf.progress->data !=
h->cur_pic.tf.progress->data ||
82 (
ref->reference & 3) !=
h->picture_structure) {
84 if (refs[1][ref_n] < 0)
86 refs[1][ref_n] =
FFMAX(refs[1][ref_n], my);
98 const int mb_xy = sl->
mb_xy;
99 const int mb_type =
h->cur_pic.mb_type[mb_xy];
101 int nrefs[2] = { 0 };
104 memset(refs, -1,
sizeof(refs));
124 for (
i = 0;
i < 4;
i++) {
127 int y_offset = (
i & 2) << 2;
131 IS_DIR(sub_mb_type, 0, 0),
132 IS_DIR(sub_mb_type, 0, 1),
136 IS_DIR(sub_mb_type, 0, 0),
137 IS_DIR(sub_mb_type, 0, 1),
140 IS_DIR(sub_mb_type, 0, 0),
141 IS_DIR(sub_mb_type, 0, 1),
145 IS_DIR(sub_mb_type, 0, 0),
146 IS_DIR(sub_mb_type, 0, 1),
149 IS_DIR(sub_mb_type, 0, 0),
150 IS_DIR(sub_mb_type, 0, 1),
155 for (j = 0; j < 4; j++) {
156 int sub_y_offset = y_offset + 2 * (j & 2);
158 IS_DIR(sub_mb_type, 0, 0),
159 IS_DIR(sub_mb_type, 0, 1),
173 int pic_height = 16 *
h->mb_height >> ref_field_picture;
181 FFMIN((row >> 1) - !(row & 1),
185 FFMIN((row >> 1), pic_height - 1),
189 FFMIN(row * 2 + ref_field,
194 FFMIN(row, pic_height - 1),
198 FFMIN(row, pic_height - 1),
211 int src_x_offset,
int src_y_offset,
214 int pixel_shift,
int chroma_idc)
218 const int luma_xy = (mx & 3) + ((my & 3) << 2);
223 int extra_height = 0;
225 const int full_mx = mx >> 2;
226 const int full_my = my >> 2;
227 const int pic_width = 16 *
h->mb_width;
228 const int pic_height = 16 *
h->mb_height >>
MB_FIELD(sl);
236 if (full_mx < 0 - extra_width ||
237 full_my < 0 - extra_height ||
238 full_mx + 16 > pic_width + extra_width ||
239 full_my + 16 > pic_height + extra_height) {
243 16 + 5, 16 + 5 , full_mx - 2,
244 full_my - 2, pic_width, pic_height);
256 if (chroma_idc == 3 ) {
263 full_mx - 2, full_my - 2,
264 pic_width, pic_height);
267 qpix_op[luma_xy](dest_cb, src_cb, sl->
mb_linesize);
277 full_mx - 2, full_my - 2,
278 pic_width, pic_height);
281 qpix_op[luma_xy](dest_cr, src_cr, sl->
mb_linesize);
287 ysh = 3 - (chroma_idc == 2 );
288 if (chroma_idc == 1 &&
MB_FIELD(sl)) {
291 emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
294 src_cb = pic->
data[1] + ((mx >> 3) * (1 << pixel_shift)) +
296 src_cr = pic->
data[2] + ((mx >> 3) * (1 << pixel_shift)) +
302 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
303 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
307 height >> (chroma_idc == 1 ),
308 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
313 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
314 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
318 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
326 int x_offset,
int y_offset,
331 int list0,
int list1,
332 int pixel_shift,
int chroma_idc)
337 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
338 if (chroma_idc == 3 ) {
339 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
340 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
341 }
else if (chroma_idc == 2 ) {
342 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
343 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
345 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
346 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
348 x_offset += 8 * sl->
mb_x;
354 dest_y, dest_cb, dest_cr, x_offset, y_offset,
355 qpix_op, chroma_op, pixel_shift, chroma_idc);
358 chroma_op = chroma_avg;
364 dest_y, dest_cb, dest_cr, x_offset, y_offset,
365 qpix_op, chroma_op, pixel_shift, chroma_idc);
374 int x_offset,
int y_offset,
381 int list0,
int list1,
382 int pixel_shift,
int chroma_idc)
386 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
387 if (chroma_idc == 3 ) {
389 chroma_weight_avg = luma_weight_avg;
390 chroma_weight_op = luma_weight_op;
391 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
392 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
393 }
else if (chroma_idc == 2 ) {
395 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
396 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
398 chroma_height =
height >> 1;
399 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
400 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
402 x_offset += 8 * sl->
mb_x;
405 if (list0 && list1) {
415 dest_y, dest_cb, dest_cr,
416 x_offset, y_offset, qpix_put, chroma_put,
417 pixel_shift, chroma_idc);
419 tmp_y, tmp_cb, tmp_cr,
420 x_offset, y_offset, qpix_put, chroma_put,
421 pixel_shift, chroma_idc);
425 int weight1 = 64 - weight0;
427 height, 5, weight0, weight1, 0);
430 chroma_height, 5, weight0, weight1, 0);
432 chroma_height, 5, weight0, weight1, 0);
442 chroma_weight_avg(dest_cb, tmp_cb, sl->
mb_uvlinesize, chroma_height,
448 chroma_weight_avg(dest_cr, tmp_cr, sl->
mb_uvlinesize, chroma_height,
457 int list = list1 ? 1 : 0;
461 dest_y, dest_cb, dest_cr, x_offset, y_offset,
462 qpix_put, chroma_put, pixel_shift, chroma_idc);
484 int list,
int pixel_shift,
494 int off = mx * (1<< pixel_shift) +
498 if (chroma_idc == 3 ) {
502 off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->
mb_x&7))*sl->
uvlinesize;
503 h->vdsp.prefetch(
src[1] + off,
src[2] -
src[1], 2);
511 int linesize,
int uvlinesize,
512 int xchg,
int chroma444,
513 int simple,
int pixel_shift)
531 deblock_topleft =
h->slice_table[sl->
mb_xy - 1 -
h->mb_stride] == sl->
slice_num;
534 deblock_topleft = (sl->
mb_x > 0);
538 src_y -= linesize + 1 + pixel_shift;
539 src_cb -= uvlinesize + 1 + pixel_shift;
540 src_cr -= uvlinesize + 1 + pixel_shift;
545 #define XCHG(a, b, xchg) \
548 AV_SWAP64(b + 0, a + 0); \
549 AV_SWAP64(b + 8, a + 8); \
559 if (deblock_topleft) {
560 XCHG(top_border_m1 + (8 << pixel_shift),
561 src_y - (7 << pixel_shift), 1);
563 XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
564 XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
565 if (sl->
mb_x + 1 <
h->mb_width) {
567 src_y + (17 << pixel_shift), 1);
571 if (deblock_topleft) {
572 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
573 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
575 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
576 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
577 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
578 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
579 if (sl->
mb_x + 1 <
h->mb_width) {
580 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
581 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
584 if (deblock_topleft) {
585 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
586 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
588 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
589 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
598 if (high_bit_depth) {
607 if (high_bit_depth) {
615 int mb_type,
int simple,
616 int transform_bypass,
618 const int *block_offset,
626 block_offset += 16 * p;
629 if (transform_bypass) {
631 idct_add =
h->h264dsp.h264_add_pixels8_clear;
633 idct_dc_add =
h->h264dsp.h264_idct8_dc_add;
634 idct_add =
h->h264dsp.h264_idct8_add;
636 for (
i = 0;
i < 16;
i += 4) {
637 uint8_t *
const ptr = dest_y + block_offset[
i];
639 if (transform_bypass &&
h->ps.sps->profile_idc == 244 && dir <= 1) {
640 if (
h->x264_build < 151
U) {
641 h->hpc.pred8x8l_add[dir](ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
643 h->hpc.pred8x8l_filter_add[dir](ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift),
644 (sl-> topleft_samples_available <<
i) & 0x8000,
651 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift,
i * 16 + p * 256))
652 idct_dc_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
654 idct_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
659 if (transform_bypass) {
661 idct_add =
h->h264dsp.h264_add_pixels4_clear;
663 idct_dc_add =
h->h264dsp.h264_idct_dc_add;
664 idct_add =
h->h264dsp.h264_idct_add;
666 for (
i = 0;
i < 16;
i++) {
667 uint8_t *
const ptr = dest_y + block_offset[
i];
670 if (transform_bypass &&
h->ps.sps->profile_idc == 244 && dir <= 1) {
671 h->hpc.pred4x4_add[dir](ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
679 if (!topright_avail) {
681 tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
682 topright = (
uint8_t *)&tr_high;
684 tr = ptr[3 - linesize] * 0x01010101
u;
688 topright = ptr + (4 << pixel_shift) - linesize;
692 h->hpc.pred4x4[dir](ptr, topright, linesize);
695 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift,
i * 16 + p * 256))
696 idct_dc_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
698 idct_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
706 if (!transform_bypass)
707 h->h264dsp.h264_luma_dc_dequant_idct(sl->
mb + (p * 256 << pixel_shift),
709 h->ps.pps->dequant4_coeff[p][qscale][0]);
711 static const uint8_t dc_mapping[16] = {
712 0 * 16, 1 * 16, 4 * 16, 5 * 16,
713 2 * 16, 3 * 16, 6 * 16, 7 * 16,
714 8 * 16, 9 * 16, 12 * 16, 13 * 16,
715 10 * 16, 11 * 16, 14 * 16, 15 * 16
717 for (
i = 0;
i < 16;
i++)
719 pixel_shift, dc_mapping[
i],
728 int mb_type,
int simple,
729 int transform_bypass,
731 const int *block_offset,
737 block_offset += 16 * p;
740 if (transform_bypass) {
741 if (
h->ps.sps->profile_idc == 244 &&
745 sl->
mb + (p * 256 << pixel_shift),
748 for (
i = 0;
i < 16;
i++)
751 h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[
i],
752 sl->
mb + (
i * 16 + p * 256 << pixel_shift),
756 h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
757 sl->
mb + (p * 256 << pixel_shift),
761 }
else if (sl->
cbp & 15) {
762 if (transform_bypass) {
763 const int di =
IS_8x8DCT(mb_type) ? 4 : 1;
764 idct_add =
IS_8x8DCT(mb_type) ?
h->h264dsp.h264_add_pixels8_clear
765 :
h->h264dsp.h264_add_pixels4_clear;
766 for (
i = 0;
i < 16;
i += di)
768 idct_add(dest_y + block_offset[
i],
769 sl->
mb + (
i * 16 + p * 256 << pixel_shift),
773 h->h264dsp.h264_idct8_add4(dest_y, block_offset,
774 sl->
mb + (p * 256 << pixel_shift),
778 h->h264dsp.h264_idct_add16(dest_y, block_offset,
779 sl->
mb + (p * 256 << pixel_shift),
801 const int mb_xy = sl->
mb_xy;
802 const int mb_type =
h->cur_pic.mb_type[mb_xy];
803 int is_complex = CONFIG_SMALL || sl->
is_complex ||
807 if (is_complex ||
h->pixel_shift)
808 hl_decode_mb_444_complex(
h, sl);
810 hl_decode_mb_444_simple_8(
h, sl);
811 }
else if (is_complex) {
812 hl_decode_mb_complex(
h, sl);
813 }
else if (
h->pixel_shift) {
814 hl_decode_mb_simple_16(
h, sl);
816 hl_decode_mb_simple_8(
h, sl);