35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
61 4, 7, 9, 11, 13, 14, 15, 63,
62 7, 7, 11, 12, 14, 15, 63, 63,
63 9, 11, 13, 14, 15, 63, 63, 63,
64 11, 11, 13, 14, 63, 63, 63, 63,
65 11, 13, 14, 63, 63, 63, 63, 63,
66 13, 14, 63, 63, 63, 63, 63, 63,
67 13, 63, 63, 63, 63, 63, 63, 63,
68 63, 63, 63, 63, 63, 63, 63, 63,
71 4, 5, 6, 7, 9, 11, 13, 15,
72 5, 5, 7, 8, 11, 13, 15, 17,
73 6, 7, 9, 11, 13, 15, 15, 17,
74 7, 7, 9, 11, 13, 15, 17, 19,
75 7, 9, 11, 13, 14, 16, 19, 23,
76 9, 11, 13, 14, 16, 19, 23, 29,
77 9, 11, 13, 15, 17, 21, 28, 35,
78 11, 13, 16, 17, 21, 28, 35, 41,
81 4, 4, 5, 5, 6, 7, 7, 9,
82 4, 4, 5, 6, 7, 7, 9, 9,
83 5, 5, 6, 7, 7, 9, 9, 10,
84 5, 5, 6, 7, 7, 9, 9, 10,
85 5, 6, 7, 7, 8, 9, 10, 12,
86 6, 7, 7, 8, 9, 10, 12, 15,
87 6, 7, 7, 9, 10, 11, 14, 17,
88 7, 7, 9, 10, 11, 14, 17, 21,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 4,
94 4, 4, 4, 4, 4, 4, 4, 5,
95 4, 4, 4, 4, 4, 4, 5, 5,
96 4, 4, 4, 4, 4, 5, 5, 6,
97 4, 4, 4, 4, 5, 5, 6, 7,
98 4, 4, 4, 4, 5, 6, 7, 7,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
112 #define NUM_MB_LIMITS 4
130 .tag =
MKTAG(
'a',
'p',
'c',
'o'),
133 .br_tab = { 300, 242, 220, 194 },
138 .tag =
MKTAG(
'a',
'p',
'c',
's'),
141 .br_tab = { 720, 560, 490, 440 },
145 .full_name =
"standard",
146 .tag =
MKTAG(
'a',
'p',
'c',
'n'),
149 .br_tab = { 1050, 808, 710, 632 },
153 .full_name =
"high quality",
154 .tag =
MKTAG(
'a',
'p',
'c',
'h'),
157 .br_tab = { 1566, 1216, 1070, 950 },
162 .tag =
MKTAG(
'a',
'p',
'4',
'h'),
165 .br_tab = { 2350, 1828, 1600, 1425 },
170 #define TRELLIS_WIDTH 16
171 #define SCORE_LIMIT INT_MAX / 2
180 #define MAX_STORED_Q 16
199 int linesize, int16_t *
block);
229 int linesize,
int x,
int y,
int w,
int h,
230 int16_t *blocks, uint16_t *emu_buf,
231 int mbs_per_slice,
int blocks_per_mb,
int is_chroma)
233 const uint16_t *esrc;
234 const int mb_width = 4 * blocks_per_mb;
238 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
240 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
244 if (x + mb_width <= w && y + 16 <= h) {
246 elinesize = linesize;
251 elinesize = 16 *
sizeof(*emu_buf);
253 bw =
FFMIN(w - x, mb_width);
254 bh =
FFMIN(h - y, 16);
256 for (j = 0; j < bh; j++) {
257 memcpy(emu_buf + j * 16,
258 (
const uint8_t*)src + j * linesize,
260 pix = emu_buf[j * 16 + bw - 1];
261 for (k = bw; k < mb_width; k++)
262 emu_buf[j * 16 + k] = pix;
265 memcpy(emu_buf + j * 16,
266 emu_buf + (bh - 1) * 16,
267 mb_width *
sizeof(*emu_buf));
270 ctx->
fdct(&ctx->
fdsp, esrc, elinesize, blocks);
272 if (blocks_per_mb > 2) {
273 ctx->
fdct(&ctx->
fdsp, esrc + 8, elinesize, blocks);
276 ctx->
fdct(&ctx->
fdsp, esrc + elinesize * 4, elinesize, blocks);
278 if (blocks_per_mb > 2) {
279 ctx->
fdct(&ctx->
fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
283 ctx->
fdct(&ctx->
fdsp, esrc, elinesize, blocks);
285 ctx->
fdct(&ctx->
fdsp, esrc + elinesize * 4, elinesize, blocks);
287 if (blocks_per_mb > 2) {
288 ctx->
fdct(&ctx->
fdsp, esrc + 8, elinesize, blocks);
290 ctx->
fdct(&ctx->
fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
300 int linesize,
int x,
int y,
int w,
int h,
301 int16_t *blocks,
int mbs_per_slice,
int abits)
303 const int slice_width = 16 * mbs_per_slice;
304 int i, j, copy_w, copy_h;
306 copy_w =
FFMIN(w - x, slice_width);
307 copy_h =
FFMIN(h - y, 16);
308 for (i = 0; i < copy_h; i++) {
309 memcpy(blocks, src, copy_w *
sizeof(*src));
311 for (j = 0; j < copy_w; j++)
314 for (j = 0; j < copy_w; j++)
315 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
316 for (j = copy_w; j < slice_width; j++)
317 blocks[j] = blocks[copy_w - 1];
318 blocks += slice_width;
319 src += linesize >> 1;
321 for (; i < 16; i++) {
322 memcpy(blocks, blocks - slice_width, slice_width *
sizeof(*blocks));
323 blocks += slice_width;
332 unsigned int rice_order, exp_order, switch_bits, switch_val;
336 switch_bits = (codebook & 3) + 1;
337 rice_order = codebook >> 5;
338 exp_order = (codebook >> 2) & 7;
340 switch_val = switch_bits << rice_order;
342 if (val >= switch_val) {
343 val -= switch_val - (1 << exp_order);
346 put_bits(pb, exponent - exp_order + switch_bits, 0);
349 exponent = val >> rice_order;
359 #define GET_SIGN(x) ((x) >> 31)
360 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
363 int blocks_per_slice,
int scale)
366 int codebook = 3, code,
dc, prev_dc,
delta, sign, new_sign;
368 prev_dc = (blocks[0] - 0x4000) / scale;
374 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
375 dc = (blocks[0] - 0x4000) / scale;
376 delta = dc - prev_dc;
378 delta = (delta ^ sign) - sign;
381 codebook = (code + (code & 1)) >> 1;
382 codebook =
FFMIN(codebook, 3);
389 int blocks_per_slice,
390 int plane_size_factor,
391 const uint8_t *scan,
const int16_t *qmat)
395 int max_coeffs, abs_level;
397 max_coeffs = blocks_per_slice << 6;
402 for (i = 1; i < 64; i++) {
403 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
404 level = blocks[idx] / qmat[scan[i]];
406 abs_level =
FFABS(level);
423 const uint16_t *
src,
int linesize,
424 int mbs_per_slice, int16_t *blocks,
425 int blocks_per_mb,
int plane_size_factor,
428 int blocks_per_slice, saved_pos;
431 blocks_per_slice = mbs_per_slice * blocks_per_mb;
433 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
434 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
443 const int mask = (1 << abits) - 1;
444 const int dbits = (abits == 8) ? 4 : 7;
445 const int dsize = 1 << dbits - 1;
446 int diff = cur - prev;
449 if (diff >= (1 << abits) - dsize)
451 if (diff < -dsize || diff > dsize || !diff) {
476 int mbs_per_slice, uint16_t *blocks,
480 const int mask = (1 << abits) - 1;
481 const int num_coeffs = mbs_per_slice * 256;
483 int prev =
mask, cur;
500 }
while (idx < num_coeffs);
516 int slice_width_factor =
av_log2(mbs_per_slice);
517 int num_cblocks, pwidth, linesize, line_add;
518 int plane_factor, is_chroma;
532 for (i = 0; i < 64; i++)
537 is_chroma = (i == 1 || i == 2);
538 plane_factor = slice_width_factor + 2;
545 pwidth = avctx->
width;
550 pwidth = avctx->
width >> 1;
554 src = (
const uint16_t*)(pic->
data[i] + yp * linesize +
561 mbs_per_slice, num_cblocks, is_chroma);
563 mbs_per_slice, ctx->
blocks[0],
564 num_cblocks, plane_factor,
573 total_size += sizes[i];
576 "Underestimated required buffer size.\n");
585 unsigned int rice_order, exp_order, switch_bits, switch_val;
589 switch_bits = (codebook & 3) + 1;
590 rice_order = codebook >> 5;
591 exp_order = (codebook >> 2) & 7;
593 switch_val = switch_bits << rice_order;
595 if (val >= switch_val) {
596 val -= switch_val - (1 << exp_order);
599 return exponent * 2 - exp_order + switch_bits + 1;
601 return (val >> rice_order) + rice_order + 1;
605 static int estimate_dcs(
int *error, int16_t *blocks,
int blocks_per_slice,
609 int codebook = 3, code,
dc, prev_dc,
delta, sign, new_sign;
612 prev_dc = (blocks[0] - 0x4000) / scale;
617 *error +=
FFABS(blocks[0] - 0x4000) % scale;
619 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
620 dc = (blocks[0] - 0x4000) / scale;
621 *error +=
FFABS(blocks[0] - 0x4000) % scale;
622 delta = dc - prev_dc;
624 delta = (delta ^ sign) - sign;
627 codebook = (code + (code & 1)) >> 1;
628 codebook =
FFMIN(codebook, 3);
636 static int estimate_acs(
int *error, int16_t *blocks,
int blocks_per_slice,
637 int plane_size_factor,
638 const uint8_t *scan,
const int16_t *qmat)
642 int max_coeffs, abs_level;
645 max_coeffs = blocks_per_slice << 6;
650 for (i = 1; i < 64; i++) {
651 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
652 level = blocks[idx] / qmat[scan[i]];
653 *error +=
FFABS(blocks[idx]) % qmat[scan[i]];
655 abs_level =
FFABS(level);
673 const uint16_t *
src,
int linesize,
675 int blocks_per_mb,
int plane_size_factor,
678 int blocks_per_slice;
681 blocks_per_slice = mbs_per_slice * blocks_per_mb;
685 plane_size_factor, ctx->
scantable, qmat);
692 const int mask = (1 << abits) - 1;
693 const int dbits = (abits == 8) ? 4 : 7;
694 const int dsize = 1 << dbits - 1;
695 int diff = cur - prev;
698 if (diff >= (1 << abits) - dsize)
700 if (diff < -dsize || diff > dsize || !diff)
707 const uint16_t *
src,
int linesize,
708 int mbs_per_slice,
int quant,
712 const int mask = (1 << abits) - 1;
713 const int num_coeffs = mbs_per_slice * 256;
714 int prev =
mask, cur;
738 }
while (idx < num_coeffs);
751 int trellis_node,
int x,
int y,
int mbs_per_slice,
755 int i, q, pq, xp, yp;
757 int slice_width_factor =
av_log2(mbs_per_slice);
762 int error,
bits, bits_limit;
763 int mbs, prev, cur, new_score;
767 int linesize[4], line_add;
773 mbs = x + mbs_per_slice;
776 is_chroma[i] = (i == 1 || i == 2);
777 plane_factor[i] = slice_width_factor + 2;
784 pwidth = avctx->
width;
789 pwidth = avctx->
width >> 1;
793 src = (
const uint16_t*)(pic->
data[i] + yp * linesize[i] +
800 mbs_per_slice, num_cblocks[i], is_chroma[i]);
808 for (q = min_quant; q < max_quant + 2; q++) {
814 for (q = min_quant; q <=
max_quant; q++) {
821 num_cblocks[i], plane_factor[i],
826 mbs_per_slice, q, td->
blocks[3]);
827 if (bits > 65000 * 8)
830 slice_bits[q] =
bits;
831 slice_score[q] = error;
833 if (slice_bits[max_quant] <= ctx->
bits_per_mb * mbs_per_slice) {
834 slice_bits[max_quant + 1] = slice_bits[
max_quant];
835 slice_score[max_quant + 1] = slice_score[
max_quant] + 1;
838 for (q = max_quant + 1; q < 128; q++) {
845 for (i = 0; i < 64; i++)
852 num_cblocks[i], plane_factor[i],
857 mbs_per_slice, q, td->
blocks[3]);
858 if (bits <= ctx->bits_per_mb * mbs_per_slice)
862 slice_bits[max_quant + 1] =
bits;
863 slice_score[max_quant + 1] = error;
866 td->
nodes[trellis_node + max_quant + 1].
quant = overquant;
869 for (pq = min_quant; pq < max_quant + 2; pq++) {
872 for (q = min_quant; q < max_quant + 2; q++) {
873 cur = trellis_node + q;
875 bits = td->
nodes[prev].
bits + slice_bits[q];
876 error = slice_score[q];
877 if (bits > bits_limit)
896 for (q = min_quant + 1; q < max_quant + 2; q++) {
897 if (td->
nodes[trellis_node + q].
score <= error) {
899 pq = trellis_node + q;
907 int jobnr,
int threadnr)
912 int x,
y = jobnr,
mb, q = 0;
914 for (x = mb = 0; x < ctx->
mb_width; x += mbs_per_slice, mb++) {
915 while (ctx->
mb_width - x < mbs_per_slice)
931 const AVFrame *pic,
int *got_packet)
934 uint8_t *orig_buf, *
buf, *slice_hdr, *slice_sizes, *tmp;
937 int x,
y, i,
mb, q = 0;
938 int sizes[4] = { 0 };
939 int slice_hdr_size = 2 + 2 * (ctx->
num_planes - 1);
950 orig_buf = pkt->
data;
954 bytestream_put_be32 (&orig_buf,
FRAME_ID);
960 bytestream_put_be16 (&buf, 0);
962 bytestream_put_be16 (&buf, avctx->
width);
963 bytestream_put_be16 (&buf, avctx->
height);
968 bytestream_put_byte (&buf, frame_flags);
970 bytestream_put_byte (&buf, 0);
972 bytestream_put_byte (&buf, avctx->
color_trc);
973 bytestream_put_byte (&buf, avctx->
colorspace);
974 bytestream_put_byte (&buf, 0x40 | (ctx->
alpha_bits >> 3));
975 bytestream_put_byte (&buf, 0);
977 bytestream_put_byte (&buf, 0x03);
979 for (i = 0; i < 64; i++)
980 bytestream_put_byte(&buf, ctx->
quant_mat[i]);
982 for (i = 0; i < 64; i++)
983 bytestream_put_byte(&buf, ctx->
quant_mat[i]);
985 bytestream_put_byte (&buf, 0x00);
987 bytestream_put_be16 (&tmp, buf - orig_buf);
993 picture_size_pos = buf + 1;
994 bytestream_put_byte (&buf, 0x40);
1013 for (x = mb = 0; x < ctx->
mb_width; x += mbs_per_slice, mb++) {
1017 while (ctx->
mb_width - x < mbs_per_slice)
1018 mbs_per_slice >>= 1;
1020 bytestream_put_byte(&buf, slice_hdr_size << 3);
1022 buf += slice_hdr_size - 1;
1023 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1029 max_slice_size - pkt_size;
1031 delta =
FFMAX(delta, 2 * max_slice_size);
1036 "Packet too small: is %i,"
1037 " needs %i (slice: %i). "
1038 "Correct allocation",
1039 pkt_size, delta, max_slice_size);
1049 orig_buf = pkt->
data + (orig_buf -
start);
1051 picture_size_pos = pkt->
data + (picture_size_pos -
start);
1052 slice_sizes = pkt->
data + (slice_sizes -
start);
1053 slice_hdr = pkt->
data + (slice_hdr -
start);
1062 bytestream_put_byte(&slice_hdr, q);
1063 slice_size = slice_hdr_size + sizes[ctx->
num_planes - 1];
1065 bytestream_put_be16(&slice_hdr, sizes[i]);
1066 slice_size += sizes[i];
1068 bytestream_put_be16(&slice_sizes, slice_size);
1069 buf += slice_size - slice_hdr_size;
1070 if (max_slice_size < slice_size)
1071 max_slice_size = slice_size;
1075 picture_size = buf - (picture_size_pos - 1);
1076 bytestream_put_be32(&picture_size_pos, picture_size);
1080 frame_size = buf - orig_buf;
1081 bytestream_put_be32(&orig_buf, frame_size);
1108 int linesize, int16_t *
block)
1111 const uint16_t *tsrc =
src;
1113 for (y = 0; y < 8; y++) {
1114 for (x = 0; x < 8; x++)
1115 block[y * 8 + x] = tsrc[x];
1116 tsrc += linesize >> 1;
1142 if (mps & (mps - 1)) {
1144 "there should be an integer power of two MBs per slice\n");
1154 ?
"4:4:4:4 profile because of the used input colorspace"
1155 :
"HQ profile to keep best quality");
1161 "encode alpha. Override with -profile if needed.\n");
1195 if (strlen(ctx->
vendor) != 4) {
1216 for (j = 0; j < 64; j++)
1240 for (i = min_quant; i < max_quant + 2; i++) {
1254 for (j = 0; j < 64; j++) {
1281 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1290 #define OFFSET(x) offsetof(ProresContext, x)
1291 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1294 {
"mbs_per_slice",
"macroblocks per slice",
OFFSET(mbs_per_slice),
1300 0, 0,
VE,
"profile" },
1302 0, 0,
VE,
"profile" },
1304 0, 0,
VE,
"profile" },
1306 0, 0,
VE,
"profile" },
1308 0, 0,
VE,
"profile" },
1310 0, 0,
VE,
"profile" },
1311 {
"vendor",
"vendor ID",
OFFSET(vendor),
1313 {
"bits_per_mb",
"desired bits per macroblock",
OFFSET(bits_per_mb),
1318 0, 0,
VE,
"quant_mat" },
1320 0, 0,
VE,
"quant_mat" },
1322 0, 0,
VE,
"quant_mat" },
1324 0, 0,
VE,
"quant_mat" },
1326 0, 0,
VE,
"quant_mat" },
1328 0, 0,
VE,
"quant_mat" },
1330 { .i64 = 16 }, 0, 16,
VE },
1342 .
name =
"prores_ks",