00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00038 #include "libavutil/mathematics.h"
00039 #include "nellymoser.h"
00040 #include "avcodec.h"
00041 #include "dsputil.h"
00042 #include "fft.h"
00043 #include "sinewin.h"
00044
00045 #define BITSTREAM_WRITER_LE
00046 #include "put_bits.h"
00047
00048 #define POW_TABLE_SIZE (1<<11)
00049 #define POW_TABLE_OFFSET 3
00050 #define OPT_SIZE ((1<<15) + 3000)
00051
00052 typedef struct NellyMoserEncodeContext {
00053 AVCodecContext *avctx;
00054 int last_frame;
00055 int bufsel;
00056 int have_saved;
00057 DSPContext dsp;
00058 FFTContext mdct_ctx;
00059 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
00060 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
00061 DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN];
00062 float (*opt )[NELLY_BANDS];
00063 uint8_t (*path)[NELLY_BANDS];
00064 } NellyMoserEncodeContext;
00065
00066 static float pow_table[POW_TABLE_SIZE];
00067
00068 static const uint8_t sf_lut[96] = {
00069 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
00070 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
00071 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
00072 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
00073 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
00074 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
00075 };
00076
00077 static const uint8_t sf_delta_lut[78] = {
00078 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
00079 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
00080 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
00081 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
00082 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
00083 };
00084
00085 static const uint8_t quant_lut[230] = {
00086 0,
00087
00088 0, 1, 2,
00089
00090 0, 1, 2, 3, 4, 5, 6,
00091
00092 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
00093 12, 13, 13, 13, 14,
00094
00095 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
00096 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
00097 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
00098 30,
00099
00100 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
00101 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
00102 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
00103 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
00104 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
00105 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
00106 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
00107 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
00108 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
00109 61, 61, 61, 61, 62,
00110 };
00111
00112 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
00113 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
00114 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
00115
00116 static void apply_mdct(NellyMoserEncodeContext *s)
00117 {
00118 s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
00119 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
00120 NELLY_BUF_LEN);
00121 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
00122
00123 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
00124 ff_sine_128, NELLY_BUF_LEN);
00125 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
00126 NELLY_BUF_LEN);
00127 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
00128 }
00129
00130 static av_cold int encode_init(AVCodecContext *avctx)
00131 {
00132 NellyMoserEncodeContext *s = avctx->priv_data;
00133 int i;
00134
00135 if (avctx->channels != 1) {
00136 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
00137 return -1;
00138 }
00139
00140 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
00141 avctx->sample_rate != 11025 &&
00142 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
00143 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
00144 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
00145 return -1;
00146 }
00147
00148 avctx->frame_size = NELLY_SAMPLES;
00149 s->avctx = avctx;
00150 ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0);
00151 dsputil_init(&s->dsp, avctx);
00152
00153
00154 ff_sine_window_init(ff_sine_128, 128);
00155 for (i = 0; i < POW_TABLE_SIZE; i++)
00156 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
00157
00158 if (s->avctx->trellis) {
00159 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
00160 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
00161 }
00162
00163 return 0;
00164 }
00165
00166 static av_cold int encode_end(AVCodecContext *avctx)
00167 {
00168 NellyMoserEncodeContext *s = avctx->priv_data;
00169
00170 ff_mdct_end(&s->mdct_ctx);
00171
00172 if (s->avctx->trellis) {
00173 av_free(s->opt);
00174 av_free(s->path);
00175 }
00176
00177 return 0;
00178 }
00179
00180 #define find_best(val, table, LUT, LUT_add, LUT_size) \
00181 best_idx = \
00182 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
00183 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
00184 best_idx++;
00185
00186 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00187 {
00188 int band, best_idx, power_idx = 0;
00189 float power_candidate;
00190
00191
00192 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
00193 idx_table[0] = best_idx;
00194 power_idx = ff_nelly_init_table[best_idx];
00195
00196 for (band = 1; band < NELLY_BANDS; band++) {
00197 power_candidate = cand[band] - power_idx;
00198 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
00199 idx_table[band] = best_idx;
00200 power_idx += ff_nelly_delta_table[best_idx];
00201 }
00202 }
00203
00204 static inline float distance(float x, float y, int band)
00205 {
00206
00207 float tmp = x - y;
00208 return tmp * tmp;
00209 }
00210
00211 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00212 {
00213 int i, j, band, best_idx;
00214 float power_candidate, best_val;
00215
00216 float (*opt )[NELLY_BANDS] = s->opt ;
00217 uint8_t(*path)[NELLY_BANDS] = s->path;
00218
00219 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
00220 opt[0][i] = INFINITY;
00221 }
00222
00223 for (i = 0; i < 64; i++) {
00224 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
00225 path[0][ff_nelly_init_table[i]] = i;
00226 }
00227
00228 for (band = 1; band < NELLY_BANDS; band++) {
00229 int q, c = 0;
00230 float tmp;
00231 int idx_min, idx_max, idx;
00232 power_candidate = cand[band];
00233 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
00234 idx_min = FFMAX(0, cand[band] - q);
00235 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
00236 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
00237 if ( isinf(opt[band - 1][i]) )
00238 continue;
00239 for (j = 0; j < 32; j++) {
00240 idx = i + ff_nelly_delta_table[j];
00241 if (idx > idx_max)
00242 break;
00243 if (idx >= idx_min) {
00244 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
00245 if (opt[band][idx] > tmp) {
00246 opt[band][idx] = tmp;
00247 path[band][idx] = j;
00248 c = 1;
00249 }
00250 }
00251 }
00252 }
00253 }
00254 assert(c);
00255 }
00256
00257 best_val = INFINITY;
00258 best_idx = -1;
00259 band = NELLY_BANDS - 1;
00260 for (i = 0; i < OPT_SIZE; i++) {
00261 if (best_val > opt[band][i]) {
00262 best_val = opt[band][i];
00263 best_idx = i;
00264 }
00265 }
00266 for (band = NELLY_BANDS - 1; band >= 0; band--) {
00267 idx_table[band] = path[band][best_idx];
00268 if (band) {
00269 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
00270 }
00271 }
00272 }
00273
00280 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
00281 {
00282 PutBitContext pb;
00283 int i, j, band, block, best_idx, power_idx = 0;
00284 float power_val, coeff, coeff_sum;
00285 float pows[NELLY_FILL_LEN];
00286 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
00287 float cand[NELLY_BANDS];
00288
00289 apply_mdct(s);
00290
00291 init_put_bits(&pb, output, output_size * 8);
00292
00293 i = 0;
00294 for (band = 0; band < NELLY_BANDS; band++) {
00295 coeff_sum = 0;
00296 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00297 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
00298 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
00299 }
00300 cand[band] =
00301 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
00302 }
00303
00304 if (s->avctx->trellis) {
00305 get_exponent_dynamic(s, cand, idx_table);
00306 } else {
00307 get_exponent_greedy(s, cand, idx_table);
00308 }
00309
00310 i = 0;
00311 for (band = 0; band < NELLY_BANDS; band++) {
00312 if (band) {
00313 power_idx += ff_nelly_delta_table[idx_table[band]];
00314 put_bits(&pb, 5, idx_table[band]);
00315 } else {
00316 power_idx = ff_nelly_init_table[idx_table[0]];
00317 put_bits(&pb, 6, idx_table[0]);
00318 }
00319 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
00320 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00321 s->mdct_out[i] *= power_val;
00322 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
00323 pows[i] = power_idx;
00324 }
00325 }
00326
00327 ff_nelly_get_sample_bits(pows, bits);
00328
00329 for (block = 0; block < 2; block++) {
00330 for (i = 0; i < NELLY_FILL_LEN; i++) {
00331 if (bits[i] > 0) {
00332 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
00333 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
00334 best_idx =
00335 quant_lut[av_clip (
00336 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
00337 quant_lut_offset[bits[i]],
00338 quant_lut_offset[bits[i]+1] - 1
00339 )];
00340 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
00341 best_idx++;
00342
00343 put_bits(&pb, bits[i], best_idx);
00344 }
00345 }
00346 if (!block)
00347 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
00348 }
00349
00350 flush_put_bits(&pb);
00351 }
00352
00353 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
00354 {
00355 NellyMoserEncodeContext *s = avctx->priv_data;
00356 const float *samples = data;
00357 int i;
00358
00359 if (s->last_frame)
00360 return 0;
00361
00362 if (data) {
00363 memcpy(s->buf[s->bufsel], samples, avctx->frame_size * sizeof(*samples));
00364 for (i = avctx->frame_size; i < NELLY_SAMPLES; i++) {
00365 s->buf[s->bufsel][i] = 0;
00366 }
00367 s->bufsel = 1 - s->bufsel;
00368 if (!s->have_saved) {
00369 s->have_saved = 1;
00370 return 0;
00371 }
00372 } else {
00373 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
00374 s->bufsel = 1 - s->bufsel;
00375 s->last_frame = 1;
00376 }
00377
00378 if (s->have_saved) {
00379 encode_block(s, frame, buf_size);
00380 return NELLY_BLOCK_LEN;
00381 }
00382 return 0;
00383 }
00384
00385 AVCodec ff_nellymoser_encoder = {
00386 .name = "nellymoser",
00387 .type = AVMEDIA_TYPE_AUDIO,
00388 .id = CODEC_ID_NELLYMOSER,
00389 .priv_data_size = sizeof(NellyMoserEncodeContext),
00390 .init = encode_init,
00391 .encode = encode_frame,
00392 .close = encode_end,
00393 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
00394 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
00395 .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
00396 };