FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
nellymoserenc.c
Go to the documentation of this file.
1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * Nellymoser encoder
27  * by Bartlomiej Wolowiec
28  *
29  * Generic codec information: libavcodec/nellymoserdec.c
30  *
31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32  * (Copyright Joseph Artsimovich and UAB "DKD")
33  *
34  * for more information about nellymoser format, visit:
35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36  */
37 
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
41 
42 #include "audio_frame_queue.h"
43 #include "avcodec.h"
44 #include "fft.h"
45 #include "internal.h"
46 #include "nellymoser.h"
47 #include "sinewin.h"
48 
49 #define BITSTREAM_WRITER_LE
50 #include "put_bits.h"
51 
52 #define POW_TABLE_SIZE (1<<11)
53 #define POW_TABLE_OFFSET 3
54 #define OPT_SIZE ((1<<15) + 3000)
55 
56 typedef struct NellyMoserEncodeContext {
64  DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
65  float (*opt )[OPT_SIZE];
68 
69 static float pow_table[POW_TABLE_SIZE]; ///< pow(2, -i / 2048.0 - 3.0);
70 
71 static const uint8_t sf_lut[96] = {
72  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
73  5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
74  15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75  27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76  41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77  54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
78 };
79 
80 static const uint8_t sf_delta_lut[78] = {
81  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
82  4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
83  13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84  23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85  28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
86 };
87 
88 static const uint8_t quant_lut[230] = {
89  0,
90 
91  0, 1, 2,
92 
93  0, 1, 2, 3, 4, 5, 6,
94 
95  0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
96  12, 13, 13, 13, 14,
97 
98  0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
99  8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100  22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
101  30,
102 
103  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
104  4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
105  10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106  15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107  21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108  33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109  46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110  53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111  58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
112  61, 61, 61, 61, 62,
113 };
114 
115 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
116 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
118 
120 {
121  float *in0 = s->buf;
122  float *in1 = s->buf + NELLY_BUF_LEN;
123  float *in2 = s->buf + 2 * NELLY_BUF_LEN;
124 
125  s->fdsp->vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
126  s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127  s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
128 
129  s->fdsp->vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
130  s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
132 }
133 
135 {
137 
138  ff_mdct_end(&s->mdct_ctx);
139 
140  if (s->avctx->trellis) {
141  av_freep(&s->opt);
142  av_freep(&s->path);
143  }
144  ff_af_queue_close(&s->afq);
145  av_freep(&s->fdsp);
146 
147  return 0;
148 }
149 
151 {
153  int i, ret;
154 
155  if (avctx->channels != 1) {
156  av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
157  return AVERROR(EINVAL);
158  }
159 
160  if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
161  avctx->sample_rate != 11025 &&
162  avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
164  av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
165  return AVERROR(EINVAL);
166  }
167 
168  avctx->frame_size = NELLY_SAMPLES;
170  ff_af_queue_init(avctx, &s->afq);
171  s->avctx = avctx;
172  if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
173  goto error;
175  if (!s->fdsp) {
176  ret = AVERROR(ENOMEM);
177  goto error;
178  }
179 
180  /* Generate overlap window */
182  for (i = 0; i < POW_TABLE_SIZE; i++)
183  pow_table[i] = pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
184 
185  if (s->avctx->trellis) {
186  s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
187  s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
188  if (!s->opt || !s->path) {
189  ret = AVERROR(ENOMEM);
190  goto error;
191  }
192  }
193 
194  return 0;
195 error:
196  encode_end(avctx);
197  return ret;
198 }
199 
200 #define find_best(val, table, LUT, LUT_add, LUT_size) \
201  best_idx = \
202  LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
203  if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
204  best_idx++;
205 
206 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
207 {
208  int band, best_idx, power_idx = 0;
209  float power_candidate;
210 
211  //base exponent
212  find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
213  idx_table[0] = best_idx;
214  power_idx = ff_nelly_init_table[best_idx];
215 
216  for (band = 1; band < NELLY_BANDS; band++) {
217  power_candidate = cand[band] - power_idx;
218  find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
219  idx_table[band] = best_idx;
220  power_idx += ff_nelly_delta_table[best_idx];
221  }
222 }
223 
224 static inline float distance(float x, float y, int band)
225 {
226  //return pow(fabs(x-y), 2.0);
227  float tmp = x - y;
228  return tmp * tmp;
229 }
230 
231 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
232 {
233  int i, j, band, best_idx;
234  float power_candidate, best_val;
235 
236  float (*opt )[OPT_SIZE] = s->opt ;
237  uint8_t(*path)[OPT_SIZE] = s->path;
238 
239  for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
240  opt[0][i] = INFINITY;
241  }
242 
243  for (i = 0; i < 64; i++) {
244  opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
245  path[0][ff_nelly_init_table[i]] = i;
246  }
247 
248  for (band = 1; band < NELLY_BANDS; band++) {
249  int q, c = 0;
250  float tmp;
251  int idx_min, idx_max, idx;
252  power_candidate = cand[band];
253  for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
254  idx_min = FFMAX(0, cand[band] - q);
255  idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
256  for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
257  if ( isinf(opt[band - 1][i]) )
258  continue;
259  for (j = 0; j < 32; j++) {
260  idx = i + ff_nelly_delta_table[j];
261  if (idx > idx_max)
262  break;
263  if (idx >= idx_min) {
264  tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
265  if (opt[band][idx] > tmp) {
266  opt[band][idx] = tmp;
267  path[band][idx] = j;
268  c = 1;
269  }
270  }
271  }
272  }
273  }
274  av_assert1(c); //FIXME
275  }
276 
277  best_val = INFINITY;
278  best_idx = -1;
279  band = NELLY_BANDS - 1;
280  for (i = 0; i < OPT_SIZE; i++) {
281  if (best_val > opt[band][i]) {
282  best_val = opt[band][i];
283  best_idx = i;
284  }
285  }
286  for (band = NELLY_BANDS - 1; band >= 0; band--) {
287  idx_table[band] = path[band][best_idx];
288  if (band) {
289  best_idx -= ff_nelly_delta_table[path[band][best_idx]];
290  }
291  }
292 }
293 
294 /**
295  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
296  * @param s encoder context
297  * @param output output buffer
298  * @param output_size size of output buffer
299  */
300 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
301 {
302  PutBitContext pb;
303  int i, j, band, block, best_idx, power_idx = 0;
304  float power_val, coeff, coeff_sum;
305  float pows[NELLY_FILL_LEN];
306  int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
307  float cand[NELLY_BANDS];
308 
309  apply_mdct(s);
310 
311  init_put_bits(&pb, output, output_size);
312 
313  i = 0;
314  for (band = 0; band < NELLY_BANDS; band++) {
315  coeff_sum = 0;
316  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
317  coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
318  + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
319  }
320  cand[band] =
321  log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
322  }
323 
324  if (s->avctx->trellis) {
325  get_exponent_dynamic(s, cand, idx_table);
326  } else {
327  get_exponent_greedy(s, cand, idx_table);
328  }
329 
330  i = 0;
331  for (band = 0; band < NELLY_BANDS; band++) {
332  if (band) {
333  power_idx += ff_nelly_delta_table[idx_table[band]];
334  put_bits(&pb, 5, idx_table[band]);
335  } else {
336  power_idx = ff_nelly_init_table[idx_table[0]];
337  put_bits(&pb, 6, idx_table[0]);
338  }
339  power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
340  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
341  s->mdct_out[i] *= power_val;
342  s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
343  pows[i] = power_idx;
344  }
345  }
346 
347  ff_nelly_get_sample_bits(pows, bits);
348 
349  for (block = 0; block < 2; block++) {
350  for (i = 0; i < NELLY_FILL_LEN; i++) {
351  if (bits[i] > 0) {
352  const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
353  coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
354  best_idx =
355  quant_lut[av_clip (
356  coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
357  quant_lut_offset[bits[i]],
358  quant_lut_offset[bits[i]+1] - 1
359  )];
360  if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
361  best_idx++;
362 
363  put_bits(&pb, bits[i], best_idx);
364  }
365  }
366  if (!block)
368  }
369 
370  flush_put_bits(&pb);
371  memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
372 }
373 
374 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
375  const AVFrame *frame, int *got_packet_ptr)
376 {
378  int ret;
379 
380  if (s->last_frame)
381  return 0;
382 
383  memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
384  if (frame) {
385  memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
386  frame->nb_samples * sizeof(*s->buf));
387  if (frame->nb_samples < NELLY_SAMPLES) {
388  memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
389  (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
390  if (frame->nb_samples >= NELLY_BUF_LEN)
391  s->last_frame = 1;
392  }
393  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
394  return ret;
395  } else {
396  memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
397  s->last_frame = 1;
398  }
399 
400  if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
401  return ret;
402  encode_block(s, avpkt->data, avpkt->size);
403 
404  /* Get the next frame pts/duration */
405  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
406  &avpkt->duration);
407 
408  *got_packet_ptr = 1;
409  return 0;
410 }
411 
413  .name = "nellymoser",
414  .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
415  .type = AVMEDIA_TYPE_AUDIO,
417  .priv_data_size = sizeof(NellyMoserEncodeContext),
418  .init = encode_init,
419  .encode2 = encode_frame,
420  .close = encode_end,
422  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
424 };