FFmpeg: libavcodec/nellymoserenc.c Source File

00001 /*
00002  * Nellymoser encoder
00003  * This code is developed as part of Google Summer of Code 2008 Program.
00004  *
00005  * Copyright (c) 2008 Bartlomiej Wolowiec
00006  *
00007  * This file is part of FFmpeg.
00008  *
00009  * FFmpeg is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Lesser General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2.1 of the License, or (at your option) any later version.
00013  *
00014  * FFmpeg is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  * Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with FFmpeg; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00022  */
00023 
00038 #include "nellymoser.h"
00039 #include "avcodec.h"
00040 #include "dsputil.h"
00041 
00042 #define BITSTREAM_WRITER_LE
00043 #include "bitstream.h"
00044 
00045 #define POW_TABLE_SIZE (1<<11)
00046 #define POW_TABLE_OFFSET 3
00047 #define OPT_SIZE ((1<<15) + 3000)
00048 
00049 typedef struct NellyMoserEncodeContext {
00050     AVCodecContext  *avctx;
00051     int             last_frame;
00052     int             bufsel;
00053     int             have_saved;
00054     DSPContext      dsp;
00055     MDCTContext     mdct_ctx;
00056     DECLARE_ALIGNED_16(float, mdct_out[NELLY_SAMPLES]);
00057     DECLARE_ALIGNED_16(float, buf[2][3 * NELLY_BUF_LEN]);     
00058     float           (*opt )[NELLY_BANDS];
00059     uint8_t         (*path)[NELLY_BANDS];
00060 } NellyMoserEncodeContext;
00061 
00062 static float pow_table[POW_TABLE_SIZE];     
00063 
00064 static const uint8_t sf_lut[96] = {
00065      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
00066      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
00067     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
00068     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
00069     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
00070     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
00071 };
00072 
00073 static const uint8_t sf_delta_lut[78] = {
00074      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
00075      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
00076     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
00077     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
00078     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
00079 };
00080 
00081 static const uint8_t quant_lut[230] = {
00082      0,
00083 
00084      0,  1,  2,
00085 
00086      0,  1,  2,  3,  4,  5,  6,
00087 
00088      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
00089     12, 13, 13, 13, 14,
00090 
00091      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
00092      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
00093     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
00094     30,
00095 
00096      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
00097      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
00098     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
00099     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
00100     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
00101     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
00102     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
00103     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
00104     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
00105     61, 61, 61, 61, 62,
00106 };
00107 
00108 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
00109 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
00110 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
00111 
00112 void apply_mdct(NellyMoserEncodeContext *s)
00113 {
00114     DECLARE_ALIGNED_16(float, in_buff[NELLY_SAMPLES]);
00115 
00116     memcpy(in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float));
00117     s->dsp.vector_fmul(in_buff, ff_sine_128, NELLY_BUF_LEN);
00118     s->dsp.vector_fmul_reverse(in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
00119                                NELLY_BUF_LEN);
00120     ff_mdct_calc(&s->mdct_ctx, s->mdct_out, in_buff);
00121 
00122     s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN);
00123     s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
00124                                NELLY_BUF_LEN);
00125     ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
00126 }
00127 
00128 static av_cold int encode_init(AVCodecContext *avctx)
00129 {
00130     NellyMoserEncodeContext *s = avctx->priv_data;
00131     int i;
00132 
00133     if (avctx->channels != 1) {
00134         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
00135         return -1;
00136     }
00137 
00138     if (avctx->sample_rate != 8000 && avctx->sample_rate != 11025 &&
00139         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
00140         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
00141         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 11025, 22050 and 44100 sample rate\n");
00142         return -1;
00143     }
00144 
00145     avctx->frame_size = NELLY_SAMPLES;
00146     s->avctx = avctx;
00147     ff_mdct_init(&s->mdct_ctx, 8, 0);
00148     dsputil_init(&s->dsp, avctx);
00149 
00150     /* Generate overlap window */
00151     ff_sine_window_init(ff_sine_128, 128);
00152     for (i = 0; i < POW_TABLE_SIZE; i++)
00153         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
00154 
00155     if (s->avctx->trellis) {
00156         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
00157         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
00158     }
00159 
00160     return 0;
00161 }
00162 
00163 static av_cold int encode_end(AVCodecContext *avctx)
00164 {
00165     NellyMoserEncodeContext *s = avctx->priv_data;
00166 
00167     ff_mdct_end(&s->mdct_ctx);
00168 
00169     if (s->avctx->trellis) {
00170         av_free(s->opt);
00171         av_free(s->path);
00172     }
00173 
00174     return 0;
00175 }
00176 
00177 #define find_best(val, table, LUT, LUT_add, LUT_size) \
00178     best_idx = \
00179         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
00180     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
00181         best_idx++;
00182 
00183 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00184 {
00185     int band, best_idx, power_idx = 0;
00186     float power_candidate;
00187 
00188     //base exponent
00189     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
00190     idx_table[0] = best_idx;
00191     power_idx = ff_nelly_init_table[best_idx];
00192 
00193     for (band = 1; band < NELLY_BANDS; band++) {
00194         power_candidate = cand[band] - power_idx;
00195         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
00196         idx_table[band] = best_idx;
00197         power_idx += ff_nelly_delta_table[best_idx];
00198     }
00199 }
00200 
00201 static inline float distance(float x, float y, int band)
00202 {
00203     //return pow(fabs(x-y), 2.0);
00204     float tmp = x - y;
00205     return tmp * tmp;
00206 }
00207 
00208 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00209 {
00210     int i, j, band, best_idx;
00211     float power_candidate, best_val;
00212 
00213     float  (*opt )[NELLY_BANDS] = s->opt ;
00214     uint8_t(*path)[NELLY_BANDS] = s->path;
00215 
00216     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
00217         opt[0][i] = INFINITY;
00218     }
00219 
00220     for (i = 0; i < 64; i++) {
00221         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
00222         path[0][ff_nelly_init_table[i]] = i;
00223     }
00224 
00225     for (band = 1; band < NELLY_BANDS; band++) {
00226         int q, c = 0;
00227         float tmp;
00228         int idx_min, idx_max, idx;
00229         power_candidate = cand[band];
00230         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
00231             idx_min = FFMAX(0, cand[band] - q);
00232             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
00233             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
00234                 if ( isinf(opt[band - 1][i]) )
00235                     continue;
00236                 for (j = 0; j < 32; j++) {
00237                     idx = i + ff_nelly_delta_table[j];
00238                     if (idx > idx_max)
00239                         break;
00240                     if (idx >= idx_min) {
00241                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
00242                         if (opt[band][idx] > tmp) {
00243                             opt[band][idx] = tmp;
00244                             path[band][idx] = j;
00245                             c = 1;
00246                         }
00247                     }
00248                 }
00249             }
00250         }
00251         assert(c); //FIXME
00252     }
00253 
00254     best_val = INFINITY;
00255     best_idx = -1;
00256     band = NELLY_BANDS - 1;
00257     for (i = 0; i < OPT_SIZE; i++) {
00258         if (best_val > opt[band][i]) {
00259             best_val = opt[band][i];
00260             best_idx = i;
00261         }
00262     }
00263     for (band = NELLY_BANDS - 1; band >= 0; band--) {
00264         idx_table[band] = path[band][best_idx];
00265         if (band) {
00266             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
00267         }
00268     }
00269 }
00270 
00277 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
00278 {
00279     PutBitContext pb;
00280     int i, j, band, block, best_idx, power_idx = 0;
00281     float power_val, coeff, coeff_sum;
00282     float pows[NELLY_FILL_LEN];
00283     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
00284     float cand[NELLY_BANDS];
00285 
00286     apply_mdct(s);
00287 
00288     init_put_bits(&pb, output, output_size * 8);
00289 
00290     i = 0;
00291     for (band = 0; band < NELLY_BANDS; band++) {
00292         coeff_sum = 0;
00293         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00294             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
00295                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
00296         }
00297         cand[band] =
00298             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
00299     }
00300 
00301     if (s->avctx->trellis) {
00302         get_exponent_dynamic(s, cand, idx_table);
00303     } else {
00304         get_exponent_greedy(s, cand, idx_table);
00305     }
00306 
00307     i = 0;
00308     for (band = 0; band < NELLY_BANDS; band++) {
00309         if (band) {
00310             power_idx += ff_nelly_delta_table[idx_table[band]];
00311             put_bits(&pb, 5, idx_table[band]);
00312         } else {
00313             power_idx = ff_nelly_init_table[idx_table[0]];
00314             put_bits(&pb, 6, idx_table[0]);
00315         }
00316         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
00317         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00318             s->mdct_out[i] *= power_val;
00319             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
00320             pows[i] = power_idx;
00321         }
00322     }
00323 
00324     ff_nelly_get_sample_bits(pows, bits);
00325 
00326     for (block = 0; block < 2; block++) {
00327         for (i = 0; i < NELLY_FILL_LEN; i++) {
00328             if (bits[i] > 0) {
00329                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
00330                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
00331                 best_idx =
00332                     quant_lut[av_clip (
00333                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
00334                             quant_lut_offset[bits[i]],
00335                             quant_lut_offset[bits[i]+1] - 1
00336                             )];
00337                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
00338                     best_idx++;
00339 
00340                 put_bits(&pb, bits[i], best_idx);
00341             }
00342         }
00343         if (!block)
00344             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
00345     }
00346 
00347     flush_put_bits(&pb);
00348 }
00349 
00350 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
00351 {
00352     NellyMoserEncodeContext *s = avctx->priv_data;
00353     int16_t *samples = data;
00354     int i;
00355 
00356     if (s->last_frame)
00357         return 0;
00358 
00359     if (data) {
00360         for (i = 0; i < avctx->frame_size; i++) {
00361             s->buf[s->bufsel][i] = samples[i];
00362         }
00363         for (; i < NELLY_SAMPLES; i++) {
00364             s->buf[s->bufsel][i] = 0;
00365         }
00366         s->bufsel = 1 - s->bufsel;
00367         if (!s->have_saved) {
00368             s->have_saved = 1;
00369             return 0;
00370         }
00371     } else {
00372         memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
00373         s->bufsel = 1 - s->bufsel;
00374         s->last_frame = 1;
00375     }
00376 
00377     if (s->have_saved) {
00378         encode_block(s, frame, buf_size);
00379         return NELLY_BLOCK_LEN;
00380     }
00381     return 0;
00382 }
00383 
00384 AVCodec nellymoser_encoder = {
00385     .name = "nellymoser",
00386     .type = CODEC_TYPE_AUDIO,
00387     .id = CODEC_ID_NELLYMOSER,
00388     .priv_data_size = sizeof(NellyMoserEncodeContext),
00389     .init = encode_init,
00390     .encode = encode_frame,
00391     .close = encode_end,
00392     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
00393     .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
00394 };