40 int n,
int height,
int y_offset,
int list)
43 int filter_height_down = (raw_my & 3) ? 3 : 0;
44 int full_my = (raw_my >> 2) + y_offset;
45 int bottom = full_my + filter_height_down +
height;
49 return FFMAX(0, bottom);
53 int16_t refs[2][48],
int n,
54 int height,
int y_offset,
int list0,
55 int list1,
int *nrefs)
71 if (refs[0][ref_n] < 0)
73 refs[0][ref_n] =
FFMAX(refs[0][ref_n], my);
84 if (refs[1][ref_n] < 0)
86 refs[1][ref_n] =
FFMAX(refs[1][ref_n], my);
98 const int mb_xy = sl->
mb_xy;
101 int nrefs[2] = { 0 };
104 memset(refs, -1,
sizeof(refs));
124 for (i = 0; i < 4; i++) {
127 int y_offset = (i & 2) << 2;
131 IS_DIR(sub_mb_type, 0, 0),
132 IS_DIR(sub_mb_type, 0, 1),
136 IS_DIR(sub_mb_type, 0, 0),
137 IS_DIR(sub_mb_type, 0, 1),
140 IS_DIR(sub_mb_type, 0, 0),
141 IS_DIR(sub_mb_type, 0, 1),
145 IS_DIR(sub_mb_type, 0, 0),
146 IS_DIR(sub_mb_type, 0, 1),
149 IS_DIR(sub_mb_type, 0, 0),
150 IS_DIR(sub_mb_type, 0, 1),
155 for (j = 0; j < 4; j++) {
156 int sub_y_offset = y_offset + 2 * (j & 2);
158 IS_DIR(sub_mb_type, 0, 0),
159 IS_DIR(sub_mb_type, 0, 1),
166 for (list = sl->
list_count - 1; list >= 0; list--)
167 for (ref = 0; ref < 48 && nrefs[list]; ref++) {
168 int row = refs[list][
ref];
173 int pic_height = 16 * h->
mb_height >> ref_field_picture;
181 FFMIN((row >> 1) - !(row & 1),
185 FFMIN((row >> 1), pic_height - 1),
189 FFMIN(row * 2 + ref_field,
194 FFMIN(row, pic_height - 1),
198 FFMIN(row, pic_height - 1),
211 int src_x_offset,
int src_y_offset,
214 int pixel_shift,
int chroma_idc)
216 const int mx = sl->
mv_cache[list][
scan8[
n]][0] + src_x_offset * 8;
218 const int luma_xy = (mx & 3) + ((my & 3) << 2);
223 int extra_height = 0;
225 const int full_mx = mx >> 2;
226 const int full_my = my >> 2;
227 const int pic_width = 16 * h->
mb_width;
236 if (full_mx < 0 - extra_width ||
237 full_my < 0 - extra_height ||
238 full_mx + 16 > pic_width + extra_width ||
239 full_my + 16 > pic_height + extra_height) {
243 16 + 5, 16 + 5 , full_mx - 2,
244 full_my - 2, pic_width, pic_height);
256 if (chroma_idc == 3 ) {
263 full_mx - 2, full_my - 2,
264 pic_width, pic_height);
267 qpix_op[luma_xy](dest_cb, src_cb, sl->
mb_linesize);
277 full_mx - 2, full_my - 2,
278 pic_width, pic_height);
281 qpix_op[luma_xy](dest_cr, src_cr, sl->
mb_linesize);
287 ysh = 3 - (chroma_idc == 2 );
288 if (chroma_idc == 1 &&
MB_FIELD(sl)) {
291 emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
294 src_cb = pic->
data[1] + ((mx >> 3) * (1 << pixel_shift)) +
296 src_cr = pic->
data[2] + ((mx >> 3) * (1 << pixel_shift)) +
302 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
303 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
307 height >> (chroma_idc == 1 ),
308 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
313 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
314 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
317 chroma_op(dest_cr, src_cr, sl->
mb_uvlinesize, height >> (chroma_idc == 1 ),
318 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
326 int x_offset,
int y_offset,
331 int list0,
int list1,
332 int pixel_shift,
int chroma_idc)
337 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
338 if (chroma_idc == 3 ) {
339 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
340 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
341 }
else if (chroma_idc == 2 ) {
342 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
343 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
345 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
346 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
348 x_offset += 8 * sl->
mb_x;
353 mc_dir_part(h, sl, ref, n, square, height, delta, 0,
354 dest_y, dest_cb, dest_cr, x_offset, y_offset,
355 qpix_op, chroma_op, pixel_shift, chroma_idc);
358 chroma_op = chroma_avg;
363 mc_dir_part(h, sl, ref, n, square, height, delta, 1,
364 dest_y, dest_cb, dest_cr, x_offset, y_offset,
365 qpix_op, chroma_op, pixel_shift, chroma_idc);
374 int x_offset,
int y_offset,
381 int list0,
int list1,
382 int pixel_shift,
int chroma_idc)
386 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
387 if (chroma_idc == 3 ) {
389 chroma_weight_avg = luma_weight_avg;
390 chroma_weight_op = luma_weight_op;
391 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
392 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
393 }
else if (chroma_idc == 2 ) {
395 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
396 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
398 chroma_height = height >> 1;
399 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
400 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
402 x_offset += 8 * sl->
mb_x;
405 if (list0 && list1) {
415 dest_y, dest_cb, dest_cr,
416 x_offset, y_offset, qpix_put, chroma_put,
417 pixel_shift, chroma_idc);
419 tmp_y, tmp_cb, tmp_cr,
420 x_offset, y_offset, qpix_put, chroma_put,
421 pixel_shift, chroma_idc);
425 int weight1 = 64 - weight0;
427 height, 5, weight0, weight1, 0);
430 chroma_height, 5, weight0, weight1, 0);
432 chroma_height, 5, weight0, weight1, 0);
435 luma_weight_avg(dest_y, tmp_y, sl->
mb_linesize, height,
442 chroma_weight_avg(dest_cb, tmp_cb, sl->
mb_uvlinesize, chroma_height,
448 chroma_weight_avg(dest_cr, tmp_cr, sl->
mb_uvlinesize, chroma_height,
457 int list = list1 ? 1 : 0;
460 mc_dir_part(h, sl, ref, n, square, height, delta, list,
461 dest_y, dest_cb, dest_cr, x_offset, y_offset,
462 qpix_put, chroma_put, pixel_shift, chroma_idc);
484 int list,
int pixel_shift,
494 int off = mx * (1<< pixel_shift) +
498 if (chroma_idc == 3 ) {
502 off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->
mb_x&7))*sl->
uvlinesize;
511 int linesize,
int uvlinesize,
512 int xchg,
int chroma444,
513 int simple,
int pixel_shift)
534 deblock_topleft = (sl->
mb_x > 0);
538 src_y -= linesize + 1 + pixel_shift;
539 src_cb -= uvlinesize + 1 + pixel_shift;
540 src_cr -= uvlinesize + 1 + pixel_shift;
545 #define XCHG(a, b, xchg) \
548 AV_SWAP64(b + 0, a + 0); \
549 AV_SWAP64(b + 8, a + 8); \
559 if (deblock_topleft) {
560 XCHG(top_border_m1 + (8 << pixel_shift),
561 src_y - (7 << pixel_shift), 1);
563 XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
564 XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
567 src_y + (17 << pixel_shift), 1);
571 if (deblock_topleft) {
572 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
573 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
575 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
576 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
577 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
578 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
580 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
581 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
584 if (deblock_topleft) {
585 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
586 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
588 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
589 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
598 if (high_bit_depth) {
607 if (high_bit_depth) {
615 int mb_type,
int simple,
616 int transform_bypass,
618 const int *block_offset,
626 block_offset += 16 * p;
629 if (transform_bypass) {
636 for (i = 0; i < 16; i += 4) {
637 uint8_t *
const ptr = dest_y + block_offset[i];
641 h->
hpc.
pred8x8l_add[dir](ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
644 (sl-> topleft_samples_available << i) & 0x8000,
651 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift, i * 16 + p * 256))
652 idct_dc_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
654 idct_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
659 if (transform_bypass) {
666 for (i = 0; i < 16; i++) {
667 uint8_t *
const ptr = dest_y + block_offset[i];
671 h->
hpc.
pred4x4_add[dir](ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
679 if (!topright_avail) {
681 tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
682 topright = (
uint8_t *)&tr_high;
684 tr = ptr[3 - linesize] * 0x01010101
u;
688 topright = ptr + (4 << pixel_shift) - linesize;
695 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift, i * 16 + p * 256))
696 idct_dc_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
698 idct_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
706 if (!transform_bypass)
711 static const uint8_t dc_mapping[16] = {
712 0 * 16, 1 * 16, 4 * 16, 5 * 16,
713 2 * 16, 3 * 16, 6 * 16, 7 * 16,
714 8 * 16, 9 * 16, 12 * 16, 13 * 16,
715 10 * 16, 11 * 16, 14 * 16, 15 * 16
717 for (i = 0; i < 16; i++)
719 pixel_shift, dc_mapping[i],
728 int mb_type,
int simple,
729 int transform_bypass,
731 const int *block_offset,
737 block_offset += 16 * p;
740 if (transform_bypass) {
745 sl->
mb + (p * 256 << pixel_shift),
748 for (i = 0; i < 16; i++)
752 sl->
mb + (i * 16 + p * 256 << pixel_shift),
757 sl->
mb + (p * 256 << pixel_shift),
761 }
else if (sl->
cbp & 15) {
762 if (transform_bypass) {
763 const int di =
IS_8x8DCT(mb_type) ? 4 : 1;
766 for (i = 0; i < 16; i += di)
768 idct_add(dest_y + block_offset[i],
769 sl->
mb + (i * 16 + p * 256 << pixel_shift),
774 sl->
mb + (p * 256 << pixel_shift),
779 sl->
mb + (p * 256 << pixel_shift),
801 const int mb_xy = sl->
mb_xy;
803 int is_complex = CONFIG_SMALL || sl->
is_complex ||
808 hl_decode_mb_444_complex(h, sl);
810 hl_decode_mb_444_simple_8(h, sl);
811 }
else if (is_complex) {
812 hl_decode_mb_complex(h, sl);
814 hl_decode_mb_simple_16(h, sl);
816 hl_decode_mb_simple_8(h, sl);
static void await_references(const H264Context *h, H264SliceContext *sl)
Wait until all reference frames are available for MC operations.
void(* h264_idct_add)(uint8_t *dst, int16_t *block, int stride)
void(* prefetch)(uint8_t *buf, ptrdiff_t stride, int h)
Prefetch memory into cache (if supported by hardware).
void(* pred8x8l_add[2])(uint8_t *pix, int16_t *block, ptrdiff_t stride)
int16_t mv_cache[2][5 *8][2]
Motion vector cache.
static av_always_inline void prefetch_motion(const H264Context *h, H264SliceContext *sl, int list, int pixel_shift, int chroma_idc)
void(* h264_chroma_mc_func)(uint8_t *dst, uint8_t *src, ptrdiff_t srcStride, int h, int x, int y)
unsigned int topleft_samples_available
int chroma_weight[48][2][2][2]
void(* qpel_mc_func)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void(* pred8x8l_filter_add[2])(uint8_t *pix, int16_t *block, int topleft, int topright, ptrdiff_t stride)
void ff_thread_await_progress(ThreadFrame *f, int n, int field)
Wait for earlier decoding threads to finish reference pictures.
void(* h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride)
void(* pred16x16_add[3])(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
void(* h264_idct_add16)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
void(* emulated_edge_mc)(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_linesize, ptrdiff_t src_linesize, int block_w, int block_h, int src_x, int src_y, int w, int h)
Copy a rectangular area of samples to a temporary buffer and replicate the border samples...
uint16_t sub_mb_type[4]
as a DCT coefficient is int32_t in high depth, we need to reserve twice the space.
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
int field_picture
whether or not picture was encoded in separate fields
void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl)
Multithreading support functions.
uint8_t(*[2] top_borders)[(16 *3)*2]
#define IS_DIR(a, part, list)
uint32_t(*[6] dequant4_coeff)[16]
void(* h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride)
int luma_weight[48][2][2]
void(* pred4x4[9+3+3])(uint8_t *src, const uint8_t *topright, ptrdiff_t stride)
static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceContext *sl, int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int x_offset, int y_offset, const qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, int list0, int list1, int pixel_shift, int chroma_idc)
#define AV_CODEC_FLAG_GRAY
Only decode/encode grayscale.
unsigned int topright_samples_available
H.264 parameter set handling.
int chroma_log2_weight_denom
int8_t intra4x4_pred_mode_cache[5 *8]
void(* h264_idct8_add)(uint8_t *dst, int16_t *block, int stride)
int deblocking_filter
disable_deblocking_filter_idc with 1 <-> 0
void(* h264_luma_dc_dequant_idct)(int16_t *output, int16_t *input, int qmul)
H264SEIUnregistered unregistered
static const uint8_t offset[127][2]
int16_t mb_luma_dc[3][16 *2]
as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too lar...
static av_always_inline void mc_part_std(const H264Context *h, H264SliceContext *sl, int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int x_offset, int y_offset, const qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, const qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, int list0, int list1, int pixel_shift, int chroma_idc)
uint16_t * slice_table
slice_table_base + 2*mb_stride + 1
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
GLsizei GLboolean const GLfloat * value
void(* h264_idct_dc_add)(uint8_t *dst, int16_t *block, int stride)
int luma_log2_weight_denom
static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl, int mb_type, int simple, int transform_bypass, int pixel_shift, const int *block_offset, int linesize, uint8_t *dest_y, int p)
H.264 / AVC / MPEG-4 part10 codec.
void(* pred4x4_add[2])(uint8_t *pix, int16_t *block, ptrdiff_t stride)
#define LUMA_DC_BLOCK_INDEX
uint8_t * edge_emu_buffer
static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, int index, int value)
Libavcodec external API header.
void(* h264_weight_func)(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
uint8_t * data
The data buffer.
int implicit_weight[48][48][2]
static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth, int index)
static const uint8_t scan8[16 *3+3]
void(* pred16x16[4+3+2])(uint8_t *src, ptrdiff_t stride)
void(* h264_idct8_add4)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
uint8_t non_zero_count_cache[15 *8]
non zero coeff count cache.
int pixel_shift
0 for 8-bit H.264, 1 for high-bit-depth H.264
ptrdiff_t mb_linesize
may be equal to s->linesize or s->linesize * 2, for mbaff
void(* h264_idct8_dc_add)(uint8_t *dst, int16_t *block, int stride)
static int get_lowest_part_list_y(H264SliceContext *sl, int n, int height, int y_offset, int list)
static av_always_inline void xchg_mb_border(const H264Context *h, H264SliceContext *sl, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int chroma444, int simple, int pixel_shift)
static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, H264SliceContext *sl, int mb_type, int simple, int transform_bypass, int pixel_shift, const int *block_offset, int linesize, uint8_t *dest_y, int p)
common internal and external API header
static int ref[MAX_W *MAX_W]
static void get_lowest_part_y(const H264Context *h, H264SliceContext *sl, int16_t refs[2][48], int n, int height, int y_offset, int list0, int list1, int *nrefs)
int8_t ref_cache[2][5 *8]
void(* h264_idct_add16intra)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext *sl, H264Ref *pic, int n, int square, int height, int delta, int list, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int src_x_offset, int src_y_offset, const qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, int pixel_shift, int chroma_idc)
H264Ref ref_list[2][48]
0..15: frame refs, 16..47: mbaff field refs.
uint8_t * bipred_scratchpad
void(* pred8x8l[9+3])(uint8_t *src, int topleft, int topright, ptrdiff_t stride)
void(* h264_biweight_func)(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)