FFmpeg: libavcodec/aacenc.c Source File

00001 /*
00002  * AAC encoder
00003  * Copyright (C) 2008 Konstantin Shishkov
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00027 /***********************************
00028  *              TODOs:
00029  * add sane pulse detection
00030  * add temporal noise shaping
00031  ***********************************/
00032 
00033 #include "libavutil/opt.h"
00034 #include "avcodec.h"
00035 #include "put_bits.h"
00036 #include "dsputil.h"
00037 #include "mpeg4audio.h"
00038 #include "kbdwin.h"
00039 #include "sinewin.h"
00040 
00041 #include "aac.h"
00042 #include "aactab.h"
00043 #include "aacenc.h"
00044 
00045 #include "psymodel.h"
00046 
00047 #define AAC_MAX_CHANNELS 6
00048 
00049 #define ERROR_IF(cond, ...) \
00050     if (cond) { \
00051         av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
00052         return AVERROR(EINVAL); \
00053     }
00054 
00055 float ff_aac_pow34sf_tab[428];
00056 
00057 static const uint8_t swb_size_1024_96[] = {
00058     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
00059     12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
00060     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
00061 };
00062 
00063 static const uint8_t swb_size_1024_64[] = {
00064     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
00065     12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
00066     40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
00067 };
00068 
00069 static const uint8_t swb_size_1024_48[] = {
00070     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
00071     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
00072     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
00073     96
00074 };
00075 
00076 static const uint8_t swb_size_1024_32[] = {
00077     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
00078     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
00079     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
00080 };
00081 
00082 static const uint8_t swb_size_1024_24[] = {
00083     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00084     12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
00085     32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
00086 };
00087 
00088 static const uint8_t swb_size_1024_16[] = {
00089     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00090     12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
00091     32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
00092 };
00093 
00094 static const uint8_t swb_size_1024_8[] = {
00095     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
00096     16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
00097     32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
00098 };
00099 
00100 static const uint8_t *swb_size_1024[] = {
00101     swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
00102     swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
00103     swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
00104     swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
00105 };
00106 
00107 static const uint8_t swb_size_128_96[] = {
00108     4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
00109 };
00110 
00111 static const uint8_t swb_size_128_48[] = {
00112     4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
00113 };
00114 
00115 static const uint8_t swb_size_128_24[] = {
00116     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
00117 };
00118 
00119 static const uint8_t swb_size_128_16[] = {
00120     4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
00121 };
00122 
00123 static const uint8_t swb_size_128_8[] = {
00124     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
00125 };
00126 
00127 static const uint8_t *swb_size_128[] = {
00128     /* the last entry on the following row is swb_size_128_64 but is a
00129        duplicate of swb_size_128_96 */
00130     swb_size_128_96, swb_size_128_96, swb_size_128_96,
00131     swb_size_128_48, swb_size_128_48, swb_size_128_48,
00132     swb_size_128_24, swb_size_128_24, swb_size_128_16,
00133     swb_size_128_16, swb_size_128_16, swb_size_128_8
00134 };
00135 
00137 static const uint8_t aac_chan_configs[6][5] = {
00138  {1, TYPE_SCE},                               // 1 channel  - single channel element
00139  {1, TYPE_CPE},                               // 2 channels - channel pair
00140  {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
00141  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
00142  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
00143  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
00144 };
00145 
00149 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
00150     { 0 },
00151     { 0, 1 },
00152     { 2, 0, 1 },
00153     { 2, 0, 1, 3 },
00154     { 2, 0, 1, 3, 4 },
00155     { 2, 0, 1, 4, 5, 3 },
00156 };
00157 
00162 static void put_audio_specific_config(AVCodecContext *avctx)
00163 {
00164     PutBitContext pb;
00165     AACEncContext *s = avctx->priv_data;
00166 
00167     init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
00168     put_bits(&pb, 5, 2); //object type - AAC-LC
00169     put_bits(&pb, 4, s->samplerate_index); //sample rate index
00170     put_bits(&pb, 4, s->channels);
00171     //GASpecificConfig
00172     put_bits(&pb, 1, 0); //frame length - 1024 samples
00173     put_bits(&pb, 1, 0); //does not depend on core coder
00174     put_bits(&pb, 1, 0); //is not extension
00175 
00176     //Explicitly Mark SBR absent
00177     put_bits(&pb, 11, 0x2b7); //sync extension
00178     put_bits(&pb, 5,  AOT_SBR);
00179     put_bits(&pb, 1,  0);
00180     flush_put_bits(&pb);
00181 }
00182 
00183 #define WINDOW_FUNC(type) \
00184 static void apply_ ##type ##_window(DSPContext *dsp, SingleChannelElement *sce, const float *audio)
00185 
00186 WINDOW_FUNC(only_long)
00187 {
00188     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
00189     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
00190     float *out = sce->ret;
00191 
00192     dsp->vector_fmul        (out,        audio,        lwindow, 1024);
00193     dsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
00194 }
00195 
00196 WINDOW_FUNC(long_start)
00197 {
00198     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
00199     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
00200     float *out = sce->ret;
00201 
00202     dsp->vector_fmul(out, audio, lwindow, 1024);
00203     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
00204     dsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
00205     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
00206 }
00207 
00208 WINDOW_FUNC(long_stop)
00209 {
00210     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
00211     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
00212     float *out = sce->ret;
00213 
00214     memset(out, 0, sizeof(out[0]) * 448);
00215     dsp->vector_fmul(out + 448, audio + 448, swindow, 128);
00216     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
00217     dsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
00218 }
00219 
00220 WINDOW_FUNC(eight_short)
00221 {
00222     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
00223     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
00224     const float *in = audio + 448;
00225     float *out = sce->ret;
00226 
00227     for (int w = 0; w < 8; w++) {
00228         dsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
00229         out += 128;
00230         in  += 128;
00231         dsp->vector_fmul_reverse(out, in, swindow, 128);
00232         out += 128;
00233     }
00234 }
00235 
00236 static void (*const apply_window[4])(DSPContext *dsp, SingleChannelElement *sce, const float *audio) = {
00237     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
00238     [LONG_START_SEQUENCE]  = apply_long_start_window,
00239     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
00240     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
00241 };
00242 
00243 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
00244                                   float *audio)
00245 {
00246     int i;
00247     float *output = sce->ret;
00248 
00249     apply_window[sce->ics.window_sequence[0]](&s->dsp, sce, audio);
00250 
00251     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
00252         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
00253     else
00254         for (i = 0; i < 1024; i += 128)
00255             s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
00256     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
00257 }
00258 
00263 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
00264 {
00265     int w;
00266 
00267     put_bits(&s->pb, 1, 0);                // ics_reserved bit
00268     put_bits(&s->pb, 2, info->window_sequence[0]);
00269     put_bits(&s->pb, 1, info->use_kb_window[0]);
00270     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
00271         put_bits(&s->pb, 6, info->max_sfb);
00272         put_bits(&s->pb, 1, 0);            // no prediction
00273     } else {
00274         put_bits(&s->pb, 4, info->max_sfb);
00275         for (w = 1; w < 8; w++)
00276             put_bits(&s->pb, 1, !info->group_len[w]);
00277     }
00278 }
00279 
00284 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
00285 {
00286     int i, w;
00287 
00288     put_bits(pb, 2, cpe->ms_mode);
00289     if (cpe->ms_mode == 1)
00290         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
00291             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
00292                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
00293 }
00294 
00298 static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, int chans)
00299 {
00300     int i, w, w2, g, ch;
00301     int start, maxsfb, cmaxsfb;
00302 
00303     for (ch = 0; ch < chans; ch++) {
00304         IndividualChannelStream *ics = &cpe->ch[ch].ics;
00305         start = 0;
00306         maxsfb = 0;
00307         cpe->ch[ch].pulse.num_pulse = 0;
00308         for (w = 0; w < ics->num_windows*16; w += 16) {
00309             for (g = 0; g < ics->num_swb; g++) {
00310                 //apply M/S
00311                 if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
00312                     for (i = 0; i < ics->swb_sizes[g]; i++) {
00313                         cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
00314                         cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
00315                     }
00316                 }
00317                 start += ics->swb_sizes[g];
00318             }
00319             for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--)
00320                 ;
00321             maxsfb = FFMAX(maxsfb, cmaxsfb);
00322         }
00323         ics->max_sfb = maxsfb;
00324 
00325         //adjust zero bands for window groups
00326         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
00327             for (g = 0; g < ics->max_sfb; g++) {
00328                 i = 1;
00329                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
00330                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
00331                         i = 0;
00332                         break;
00333                     }
00334                 }
00335                 cpe->ch[ch].zeroes[w*16 + g] = i;
00336             }
00337         }
00338     }
00339 
00340     if (chans > 1 && cpe->common_window) {
00341         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
00342         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
00343         int msc = 0;
00344         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
00345         ics1->max_sfb = ics0->max_sfb;
00346         for (w = 0; w < ics0->num_windows*16; w += 16)
00347             for (i = 0; i < ics0->max_sfb; i++)
00348                 if (cpe->ms_mask[w+i])
00349                     msc++;
00350         if (msc == 0 || ics0->max_sfb == 0)
00351             cpe->ms_mode = 0;
00352         else
00353             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
00354     }
00355 }
00356 
00360 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
00361 {
00362     int w;
00363 
00364     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
00365         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
00366 }
00367 
00371 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
00372                                  SingleChannelElement *sce)
00373 {
00374     int off = sce->sf_idx[0], diff;
00375     int i, w;
00376 
00377     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
00378         for (i = 0; i < sce->ics.max_sfb; i++) {
00379             if (!sce->zeroes[w*16 + i]) {
00380                 diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
00381                 if (diff < 0 || diff > 120)
00382                     av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
00383                 off = sce->sf_idx[w*16 + i];
00384                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
00385             }
00386         }
00387     }
00388 }
00389 
00393 static void encode_pulses(AACEncContext *s, Pulse *pulse)
00394 {
00395     int i;
00396 
00397     put_bits(&s->pb, 1, !!pulse->num_pulse);
00398     if (!pulse->num_pulse)
00399         return;
00400 
00401     put_bits(&s->pb, 2, pulse->num_pulse - 1);
00402     put_bits(&s->pb, 6, pulse->start);
00403     for (i = 0; i < pulse->num_pulse; i++) {
00404         put_bits(&s->pb, 5, pulse->pos[i]);
00405         put_bits(&s->pb, 4, pulse->amp[i]);
00406     }
00407 }
00408 
00412 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
00413 {
00414     int start, i, w, w2;
00415 
00416     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
00417         start = 0;
00418         for (i = 0; i < sce->ics.max_sfb; i++) {
00419             if (sce->zeroes[w*16 + i]) {
00420                 start += sce->ics.swb_sizes[i];
00421                 continue;
00422             }
00423             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
00424                 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
00425                                                    sce->ics.swb_sizes[i],
00426                                                    sce->sf_idx[w*16 + i],
00427                                                    sce->band_type[w*16 + i],
00428                                                    s->lambda);
00429             start += sce->ics.swb_sizes[i];
00430         }
00431     }
00432 }
00433 
00437 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
00438                                      SingleChannelElement *sce,
00439                                      int common_window)
00440 {
00441     put_bits(&s->pb, 8, sce->sf_idx[0]);
00442     if (!common_window)
00443         put_ics_info(s, &sce->ics);
00444     encode_band_info(s, sce);
00445     encode_scale_factors(avctx, s, sce);
00446     encode_pulses(s, &sce->pulse);
00447     put_bits(&s->pb, 1, 0); //tns
00448     put_bits(&s->pb, 1, 0); //ssr
00449     encode_spectral_coeffs(s, sce);
00450     return 0;
00451 }
00452 
00456 static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s,
00457                                const char *name)
00458 {
00459     int i, namelen, padbits;
00460 
00461     namelen = strlen(name) + 2;
00462     put_bits(&s->pb, 3, TYPE_FIL);
00463     put_bits(&s->pb, 4, FFMIN(namelen, 15));
00464     if (namelen >= 15)
00465         put_bits(&s->pb, 8, namelen - 14);
00466     put_bits(&s->pb, 4, 0); //extension type - filler
00467     padbits = -put_bits_count(&s->pb) & 7;
00468     avpriv_align_put_bits(&s->pb);
00469     for (i = 0; i < namelen - 2; i++)
00470         put_bits(&s->pb, 8, name[i]);
00471     put_bits(&s->pb, 12 - padbits, 0);
00472 }
00473 
00474 /*
00475  * Deinterleave input samples.
00476  * Channels are reordered from Libav's default order to AAC order.
00477  */
00478 static void deinterleave_input_samples(AACEncContext *s,
00479                                        const float *samples)
00480 {
00481     int ch, i;
00482     const int sinc = s->channels;
00483     const uint8_t *channel_map = aac_chan_maps[sinc - 1];
00484 
00485     /* deinterleave and remap input samples */
00486     for (ch = 0; ch < sinc; ch++) {
00487         const float *sptr = samples + channel_map[ch];
00488 
00489         /* copy last 1024 samples of previous frame to the start of the current frame */
00490         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
00491 
00492         /* deinterleave */
00493         for (i = 2048; i < 3072; i++) {
00494             s->planar_samples[ch][i] = *sptr;
00495             sptr += sinc;
00496         }
00497     }
00498 }
00499 
00500 static int aac_encode_frame(AVCodecContext *avctx,
00501                             uint8_t *frame, int buf_size, void *data)
00502 {
00503     AACEncContext *s = avctx->priv_data;
00504     float **samples = s->planar_samples, *samples2, *la, *overlap;
00505     ChannelElement *cpe;
00506     int i, ch, w, g, chans, tag, start_ch;
00507     int chan_el_counter[4];
00508     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
00509 
00510     if (s->last_frame)
00511         return 0;
00512 
00513     if (data) {
00514         deinterleave_input_samples(s, data);
00515         if (s->psypp)
00516             ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
00517     }
00518 
00519     if (!avctx->frame_number)
00520         return 0;
00521 
00522     start_ch = 0;
00523     for (i = 0; i < s->chan_map[0]; i++) {
00524         FFPsyWindowInfo* wi = windows + start_ch;
00525         tag      = s->chan_map[i+1];
00526         chans    = tag == TYPE_CPE ? 2 : 1;
00527         cpe      = &s->cpe[i];
00528         for (ch = 0; ch < chans; ch++) {
00529             IndividualChannelStream *ics = &cpe->ch[ch].ics;
00530             int cur_channel = start_ch + ch;
00531             overlap  = &samples[cur_channel][0];
00532             samples2 = overlap + 1024;
00533             la       = samples2 + (448+64);
00534             if (!data)
00535                 la = NULL;
00536             if (tag == TYPE_LFE) {
00537                 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
00538                 wi[ch].window_shape   = 0;
00539                 wi[ch].num_windows    = 1;
00540                 wi[ch].grouping[0]    = 1;
00541 
00542                 /* Only the lowest 12 coefficients are used in a LFE channel.
00543                  * The expression below results in only the bottom 8 coefficients
00544                  * being used for 11.025kHz to 16kHz sample rates.
00545                  */
00546                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
00547             } else {
00548                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
00549                                               ics->window_sequence[0]);
00550             }
00551             ics->window_sequence[1] = ics->window_sequence[0];
00552             ics->window_sequence[0] = wi[ch].window_type[0];
00553             ics->use_kb_window[1]   = ics->use_kb_window[0];
00554             ics->use_kb_window[0]   = wi[ch].window_shape;
00555             ics->num_windows        = wi[ch].num_windows;
00556             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
00557             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
00558             for (w = 0; w < ics->num_windows; w++)
00559                 ics->group_len[w] = wi[ch].grouping[w];
00560 
00561             apply_window_and_mdct(s, &cpe->ch[ch], overlap);
00562         }
00563         start_ch += chans;
00564     }
00565     do {
00566         int frame_bits;
00567         init_put_bits(&s->pb, frame, buf_size*8);
00568         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
00569             put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
00570         start_ch = 0;
00571         memset(chan_el_counter, 0, sizeof(chan_el_counter));
00572         for (i = 0; i < s->chan_map[0]; i++) {
00573             FFPsyWindowInfo* wi = windows + start_ch;
00574             const float *coeffs[2];
00575             tag      = s->chan_map[i+1];
00576             chans    = tag == TYPE_CPE ? 2 : 1;
00577             cpe      = &s->cpe[i];
00578             put_bits(&s->pb, 3, tag);
00579             put_bits(&s->pb, 4, chan_el_counter[tag]++);
00580             for (ch = 0; ch < chans; ch++)
00581                 coeffs[ch] = cpe->ch[ch].coeffs;
00582             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
00583             for (ch = 0; ch < chans; ch++) {
00584                 s->cur_channel = start_ch * 2 + ch;
00585                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
00586             }
00587             cpe->common_window = 0;
00588             if (chans > 1
00589                 && wi[0].window_type[0] == wi[1].window_type[0]
00590                 && wi[0].window_shape   == wi[1].window_shape) {
00591 
00592                 cpe->common_window = 1;
00593                 for (w = 0; w < wi[0].num_windows; w++) {
00594                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
00595                         cpe->common_window = 0;
00596                         break;
00597                     }
00598                 }
00599             }
00600             s->cur_channel = start_ch * 2;
00601             if (s->options.stereo_mode && cpe->common_window) {
00602                 if (s->options.stereo_mode > 0) {
00603                     IndividualChannelStream *ics = &cpe->ch[0].ics;
00604                     for (w = 0; w < ics->num_windows; w += ics->group_len[w])
00605                         for (g = 0;  g < ics->num_swb; g++)
00606                             cpe->ms_mask[w*16+g] = 1;
00607                 } else if (s->coder->search_for_ms) {
00608                     s->coder->search_for_ms(s, cpe, s->lambda);
00609                 }
00610             }
00611             adjust_frame_information(s, cpe, chans);
00612             if (chans == 2) {
00613                 put_bits(&s->pb, 1, cpe->common_window);
00614                 if (cpe->common_window) {
00615                     put_ics_info(s, &cpe->ch[0].ics);
00616                     encode_ms_info(&s->pb, cpe);
00617                 }
00618             }
00619             for (ch = 0; ch < chans; ch++) {
00620                 s->cur_channel = start_ch + ch;
00621                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
00622             }
00623             start_ch += chans;
00624         }
00625 
00626         frame_bits = put_bits_count(&s->pb);
00627         if (frame_bits <= 6144 * s->channels - 3) {
00628             s->psy.bitres.bits = frame_bits / s->channels;
00629             break;
00630         }
00631 
00632         s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
00633 
00634     } while (1);
00635 
00636     put_bits(&s->pb, 3, TYPE_END);
00637     flush_put_bits(&s->pb);
00638     avctx->frame_bits = put_bits_count(&s->pb);
00639 
00640     // rate control stuff
00641     if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
00642         float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
00643         s->lambda *= ratio;
00644         s->lambda = FFMIN(s->lambda, 65536.f);
00645     }
00646 
00647     if (!data)
00648         s->last_frame = 1;
00649 
00650     return put_bits_count(&s->pb)>>3;
00651 }
00652 
00653 static av_cold int aac_encode_end(AVCodecContext *avctx)
00654 {
00655     AACEncContext *s = avctx->priv_data;
00656 
00657     ff_mdct_end(&s->mdct1024);
00658     ff_mdct_end(&s->mdct128);
00659     ff_psy_end(&s->psy);
00660     if (s->psypp)
00661         ff_psy_preprocess_end(s->psypp);
00662     av_freep(&s->buffer.samples);
00663     av_freep(&s->cpe);
00664     return 0;
00665 }
00666 
00667 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
00668 {
00669     int ret = 0;
00670 
00671     dsputil_init(&s->dsp, avctx);
00672 
00673     // window init
00674     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
00675     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
00676     ff_init_ff_sine_windows(10);
00677     ff_init_ff_sine_windows(7);
00678 
00679     if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
00680         return ret;
00681     if (ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0))
00682         return ret;
00683 
00684     return 0;
00685 }
00686 
00687 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
00688 {
00689     FF_ALLOCZ_OR_GOTO(avctx, s->buffer.samples, 3 * 1024 * s->channels * sizeof(s->buffer.samples[0]), alloc_fail);
00690     FF_ALLOCZ_OR_GOTO(avctx, s->cpe, sizeof(ChannelElement) * s->chan_map[0], alloc_fail);
00691     FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
00692 
00693     for(int ch = 0; ch < s->channels; ch++)
00694         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
00695 
00696     return 0;
00697 alloc_fail:
00698     return AVERROR(ENOMEM);
00699 }
00700 
00701 static av_cold int aac_encode_init(AVCodecContext *avctx)
00702 {
00703     AACEncContext *s = avctx->priv_data;
00704     int i, ret = 0;
00705     const uint8_t *sizes[2];
00706     uint8_t grouping[AAC_MAX_CHANNELS];
00707     int lengths[2];
00708 
00709     avctx->frame_size = 1024;
00710 
00711     for (i = 0; i < 16; i++)
00712         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
00713             break;
00714 
00715     s->channels = avctx->channels;
00716 
00717     ERROR_IF(i == 16,
00718              "Unsupported sample rate %d\n", avctx->sample_rate);
00719     ERROR_IF(s->channels > AAC_MAX_CHANNELS,
00720              "Unsupported number of channels: %d\n", s->channels);
00721     ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
00722              "Unsupported profile %d\n", avctx->profile);
00723     ERROR_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
00724              "Too many bits per frame requested\n");
00725 
00726     s->samplerate_index = i;
00727 
00728     s->chan_map = aac_chan_configs[s->channels-1];
00729 
00730     if (ret = dsp_init(avctx, s))
00731         goto fail;
00732 
00733     if (ret = alloc_buffers(avctx, s))
00734         goto fail;
00735 
00736     avctx->extradata_size = 5;
00737     put_audio_specific_config(avctx);
00738 
00739     sizes[0]   = swb_size_1024[i];
00740     sizes[1]   = swb_size_128[i];
00741     lengths[0] = ff_aac_num_swb_1024[i];
00742     lengths[1] = ff_aac_num_swb_128[i];
00743     for (i = 0; i < s->chan_map[0]; i++)
00744         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
00745     if (ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], grouping))
00746         goto fail;
00747     s->psypp = ff_psy_preprocess_init(avctx);
00748     s->coder = &ff_aac_coders[s->options.aac_coder];
00749 
00750     s->lambda = avctx->global_quality ? avctx->global_quality : 120;
00751 
00752     ff_aac_tableinit();
00753 
00754     for (i = 0; i < 428; i++)
00755         ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
00756 
00757     return 0;
00758 fail:
00759     aac_encode_end(avctx);
00760     return ret;
00761 }
00762 
00763 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
00764 static const AVOption aacenc_options[] = {
00765     {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.dbl = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
00766         {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
00767         {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.dbl =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
00768         {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.dbl =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
00769     {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.dbl = 2}, 0, AAC_CODER_NB-1, AACENC_FLAGS},
00770     {NULL}
00771 };
00772 
00773 static const AVClass aacenc_class = {
00774     "AAC encoder",
00775     av_default_item_name,
00776     aacenc_options,
00777     LIBAVUTIL_VERSION_INT,
00778 };
00779 
00780 AVCodec ff_aac_encoder = {
00781     .name           = "aac",
00782     .type           = AVMEDIA_TYPE_AUDIO,
00783     .id             = CODEC_ID_AAC,
00784     .priv_data_size = sizeof(AACEncContext),
00785     .init           = aac_encode_init,
00786     .encode         = aac_encode_frame,
00787     .close          = aac_encode_end,
00788     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL,
00789     .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
00790     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
00791     .priv_class = &aacenc_class,
00792 };