FFmpeg
aacenc_quantization.h
Go to the documentation of this file.
1 /*
2  * AAC encoder quantizer
3  * Copyright (C) 2015 Rostislav Pehlivanov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder quantizer
25  * @author Rostislav Pehlivanov ( atomnuker gmail com )
26  */
27 
28 #ifndef AVCODEC_AACENC_QUANTIZATION_H
29 #define AVCODEC_AACENC_QUANTIZATION_H
30 
31 #include "aactab.h"
32 #include "aacenc.h"
33 #include "aacenctab.h"
34 #include "aacenc_utils.h"
35 
36 /**
37  * Calculate rate distortion cost for quantizing with given codebook
38  *
39  * @return quantization distortion
40  */
42  struct AACEncContext *s,
43  PutBitContext *pb, const float *in, float *out,
44  const float *scaled, int size, int scale_idx,
45  int cb, const float lambda, const float uplim,
46  int *bits, float *energy, int BT_ZERO, int BT_UNSIGNED,
47  int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO,
48  const float ROUNDING)
49 {
50  const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
51  const float Q = ff_aac_pow2sf_tab [q_idx];
52  const float Q34 = ff_aac_pow34sf_tab[q_idx];
53  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
54  const float CLIPPED_ESCAPE = 165140.0f*IQ;
55  int i, j;
56  float cost = 0;
57  float qenergy = 0;
58  const int dim = BT_PAIR ? 2 : 4;
59  int resbits = 0;
60  int off;
61 
62  if (BT_ZERO || BT_NOISE || BT_STEREO) {
63  for (i = 0; i < size; i++)
64  cost += in[i]*in[i];
65  if (bits)
66  *bits = 0;
67  if (energy)
68  *energy = qenergy;
69  if (out) {
70  for (i = 0; i < size; i += dim)
71  for (j = 0; j < dim; j++)
72  out[i+j] = 0.0f;
73  }
74  return cost * lambda;
75  }
76  if (!scaled) {
77  s->abs_pow34(s->scoefs, in, size);
78  scaled = s->scoefs;
79  }
80  s->quant_bands(s->qcoefs, in, scaled, size, !BT_UNSIGNED, aac_cb_maxval[cb], Q34, ROUNDING);
81  if (BT_UNSIGNED) {
82  off = 0;
83  } else {
84  off = aac_cb_maxval[cb];
85  }
86  for (i = 0; i < size; i += dim) {
87  const float *vec;
88  int *quants = s->qcoefs + i;
89  int curidx = 0;
90  int curbits;
91  float quantized, rd = 0.0f;
92  for (j = 0; j < dim; j++) {
93  curidx *= aac_cb_range[cb];
94  curidx += quants[j] + off;
95  }
96  curbits = ff_aac_spectral_bits[cb-1][curidx];
97  vec = &ff_aac_codebook_vectors[cb-1][curidx*dim];
98  if (BT_UNSIGNED) {
99  for (j = 0; j < dim; j++) {
100  float t = fabsf(in[i+j]);
101  float di;
102  if (BT_ESC && vec[j] == 64.0f) { //FIXME: slow
103  if (t >= CLIPPED_ESCAPE) {
104  quantized = CLIPPED_ESCAPE;
105  curbits += 21;
106  } else {
107  int c = av_clip_uintp2(quant(t, Q, ROUNDING), 13);
108  quantized = c*cbrtf(c)*IQ;
109  curbits += av_log2(c)*2 - 4 + 1;
110  }
111  } else {
112  quantized = vec[j]*IQ;
113  }
114  di = t - quantized;
115  if (out)
116  out[i+j] = in[i+j] >= 0 ? quantized : -quantized;
117  if (vec[j] != 0.0f)
118  curbits++;
119  qenergy += quantized*quantized;
120  rd += di*di;
121  }
122  } else {
123  for (j = 0; j < dim; j++) {
124  quantized = vec[j]*IQ;
125  qenergy += quantized*quantized;
126  if (out)
127  out[i+j] = quantized;
128  rd += (in[i+j] - quantized)*(in[i+j] - quantized);
129  }
130  }
131  cost += rd * lambda + curbits;
132  resbits += curbits;
133  if (cost >= uplim)
134  return uplim;
135  if (pb) {
136  put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
137  if (BT_UNSIGNED)
138  for (j = 0; j < dim; j++)
139  if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
140  put_bits(pb, 1, in[i+j] < 0.0f);
141  if (BT_ESC) {
142  for (j = 0; j < 2; j++) {
143  if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
144  int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q, ROUNDING), 13);
145  int len = av_log2(coef);
146 
147  put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
148  put_sbits(pb, len, coef);
149  }
150  }
151  }
152  }
153  }
154 
155  if (bits)
156  *bits = resbits;
157  if (energy)
158  *energy = qenergy;
159  return cost;
160 }
161 
163  const float *in, float *quant, const float *scaled,
164  int size, int scale_idx, int cb,
165  const float lambda, const float uplim,
166  int *bits, float *energy) {
167  av_assert0(0);
168  return 0.0f;
169 }
170 
171 #define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING) \
172 static float quantize_and_encode_band_cost_ ## NAME( \
173  struct AACEncContext *s, \
174  PutBitContext *pb, const float *in, float *quant, \
175  const float *scaled, int size, int scale_idx, \
176  int cb, const float lambda, const float uplim, \
177  int *bits, float *energy) { \
178  return quantize_and_encode_band_cost_template( \
179  s, pb, in, quant, scaled, size, scale_idx, \
180  BT_ESC ? ESC_BT : cb, lambda, uplim, bits, energy, \
181  BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, \
182  ROUNDING); \
183 }
184 
185 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0, 0, 0, ROUND_STANDARD)
186 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0, ROUND_STANDARD)
187 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0, ROUND_STANDARD)
188 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0, ROUND_STANDARD)
189 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0, ROUND_STANDARD)
191 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC_RTZ, 0, 1, 1, 1, 0, 0, ROUND_TO_ZERO)
192 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0, ROUND_STANDARD)
194 
195 static float (*const quantize_and_encode_band_cost_arr[])(
196  struct AACEncContext *s,
197  PutBitContext *pb, const float *in, float *quant,
198  const float *scaled, int size, int scale_idx,
199  int cb, const float lambda, const float uplim,
200  int *bits, float *energy) = {
201  quantize_and_encode_band_cost_ZERO,
202  quantize_and_encode_band_cost_SQUAD,
203  quantize_and_encode_band_cost_SQUAD,
204  quantize_and_encode_band_cost_UQUAD,
205  quantize_and_encode_band_cost_UQUAD,
206  quantize_and_encode_band_cost_SPAIR,
207  quantize_and_encode_band_cost_SPAIR,
208  quantize_and_encode_band_cost_UPAIR,
209  quantize_and_encode_band_cost_UPAIR,
210  quantize_and_encode_band_cost_UPAIR,
211  quantize_and_encode_band_cost_UPAIR,
212  quantize_and_encode_band_cost_ESC,
213  quantize_and_encode_band_cost_NONE, /* CB 12 doesn't exist */
214  quantize_and_encode_band_cost_NOISE,
215  quantize_and_encode_band_cost_STEREO,
216  quantize_and_encode_band_cost_STEREO,
217 };
218 
220  struct AACEncContext *s,
221  PutBitContext *pb, const float *in, float *quant,
222  const float *scaled, int size, int scale_idx,
223  int cb, const float lambda, const float uplim,
224  int *bits, float *energy) = {
225  quantize_and_encode_band_cost_ZERO,
226  quantize_and_encode_band_cost_SQUAD,
227  quantize_and_encode_band_cost_SQUAD,
228  quantize_and_encode_band_cost_UQUAD,
229  quantize_and_encode_band_cost_UQUAD,
230  quantize_and_encode_band_cost_SPAIR,
231  quantize_and_encode_band_cost_SPAIR,
232  quantize_and_encode_band_cost_UPAIR,
233  quantize_and_encode_band_cost_UPAIR,
234  quantize_and_encode_band_cost_UPAIR,
235  quantize_and_encode_band_cost_UPAIR,
236  quantize_and_encode_band_cost_ESC_RTZ,
237  quantize_and_encode_band_cost_NONE, /* CB 12 doesn't exist */
238  quantize_and_encode_band_cost_NOISE,
239  quantize_and_encode_band_cost_STEREO,
240  quantize_and_encode_band_cost_STEREO,
241 };
242 
243 #define quantize_and_encode_band_cost( \
244  s, pb, in, quant, scaled, size, scale_idx, cb, \
245  lambda, uplim, bits, energy, rtz) \
246  ((rtz) ? quantize_and_encode_band_cost_rtz_arr : quantize_and_encode_band_cost_arr)[cb]( \
247  s, pb, in, quant, scaled, size, scale_idx, cb, \
248  lambda, uplim, bits, energy)
249 
250 static inline float quantize_band_cost(struct AACEncContext *s, const float *in,
251  const float *scaled, int size, int scale_idx,
252  int cb, const float lambda, const float uplim,
253  int *bits, float *energy, int rtz)
254 {
255  return quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
256  cb, lambda, uplim, bits, energy, rtz);
257 }
258 
259 static inline int quantize_band_cost_bits(struct AACEncContext *s, const float *in,
260  const float *scaled, int size, int scale_idx,
261  int cb, const float lambda, const float uplim,
262  int *bits, float *energy, int rtz)
263 {
264  int auxbits;
265  quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
266  cb, 0.0f, uplim, &auxbits, energy, rtz);
267  if (bits) {
268  *bits = auxbits;
269  }
270  return auxbits;
271 }
272 
274  const float *in, float *out, int size, int scale_idx,
275  int cb, const float lambda, int rtz)
276 {
278  INFINITY, NULL, NULL, rtz);
279 }
280 
282 
283 #endif /* AVCODEC_AACENC_QUANTIZATION_H */
INFINITY
#define INFINITY
Definition: mathematics.h:67
quantize_and_encode_band_cost_rtz_arr
static float(*const quantize_and_encode_band_cost_rtz_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
Definition: aacenc_quantization.h:219
out
FILE * out
Definition: movenc.c:54
cb
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:112
aacenctab.h
put_sbits
static void put_sbits(PutBitContext *pb, int n, int32_t value)
Definition: put_bits.h:240
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:208
ROUND_TO_ZERO
#define ROUND_TO_ZERO
Definition: aacenc_utils.h:37
STEREO
#define STEREO
Definition: cook.c:61
aac_cb_maxval
static const uint8_t aac_cb_maxval[12]
Definition: aacenctab.h:126
SCALE_DIV_512
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
Definition: aac.h:148
POW_SF2_ZERO
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
Definition: aac.h:154
quantize_band_cost_bits
static int quantize_band_cost_bits(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
Definition: aacenc_quantization.h:259
s
#define s(width, name)
Definition: cbs_vp9.c:257
bits
uint8_t bits
Definition: vp3data.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
quantize_and_encode_band_cost_NONE
static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
Definition: aacenc_quantization.h:162
f
#define f(width, name)
Definition: cbs_vp9.c:255
PutBitContext
Definition: put_bits.h:35
NULL
#define NULL
Definition: coverity.c:32
aac_cb_range
static const uint8_t aac_cb_range[12]
Definition: aacenctab.h:125
aactab.h
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
QUANTIZE_AND_ENCODE_BAND_COST_FUNC
#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING)
Definition: aacenc_quantization.h:171
size
int size
Definition: twinvq_data.h:11134
aacenc_quantization_misc.h
ff_aac_spectral_codes
const uint16_t *const ff_aac_spectral_codes[11]
Definition: aactab.c:417
ROUND_STANDARD
#define ROUND_STANDARD
Definition: aacenc_utils.h:36
quantize_and_encode_band_cost
#define quantize_and_encode_band_cost( s, pb, in, quant, scaled, size, scale_idx, cb, lambda, uplim, bits, energy, rtz)
Definition: aacenc_quantization.h:243
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
SCALE_ONE_POS
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Definition: aac.h:149
av_always_inline
#define av_always_inline
Definition: attributes.h:43
cbrtf
static av_always_inline float cbrtf(float x)
Definition: libm.h:61
ff_aac_codebook_vectors
const float *const ff_aac_codebook_vectors[]
Definition: aactab.c:918
len
int len
Definition: vorbis_enc_data.h:452
dim
int dim
Definition: vorbis_enc_data.h:451
ff_aac_spectral_bits
const uint8_t *const ff_aac_spectral_bits[11]
Definition: aactab.c:422
quantize_and_encode_band_cost_template
static av_always_inline float quantize_and_encode_band_cost_template(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int BT_ZERO, int BT_UNSIGNED, int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO, const float ROUNDING)
Calculate rate distortion cost for quantizing with given codebook.
Definition: aacenc_quantization.h:41
AACEncContext
AAC encoder context.
Definition: aacenc.h:376
ff_aac_pow2sf_tab
float ff_aac_pow2sf_tab[428]
Definition: aactab.c:35
quantize_and_encode_band
static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
Definition: aacenc_quantization.h:273
ff_aac_pow34sf_tab
float ff_aac_pow34sf_tab[428]
Definition: aactab.c:36
quant
const uint8_t * quant
Definition: vorbis_enc_data.h:458
AACEncContext::pb
PutBitContext pb
Definition: aacenc.h:379
quantize_band_cost
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
Definition: aacenc_quantization.h:250
aacenc_utils.h
AACEncContext::lambda
float lambda
Definition: aacenc.h:400
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
quantize_and_encode_band_cost_arr
static float(*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
Definition: aacenc_quantization.h:195
aacenc.h