FFmpeg: libavcodec/aacenc.c Source File

00001 /*
00002  * AAC encoder
00003  * Copyright (C) 2008 Konstantin Shishkov
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00027 /***********************************
00028  *              TODOs:
00029  * add sane pulse detection
00030  * add temporal noise shaping
00031  ***********************************/
00032 
00033 #include "libavutil/opt.h"
00034 #include "avcodec.h"
00035 #include "put_bits.h"
00036 #include "dsputil.h"
00037 #include "internal.h"
00038 #include "mpeg4audio.h"
00039 #include "kbdwin.h"
00040 #include "sinewin.h"
00041 
00042 #include "aac.h"
00043 #include "aactab.h"
00044 #include "aacenc.h"
00045 
00046 #include "psymodel.h"
00047 
00048 #define AAC_MAX_CHANNELS 6
00049 
00050 #define ERROR_IF(cond, ...) \
00051     if (cond) { \
00052         av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
00053         return AVERROR(EINVAL); \
00054     }
00055 
00056 float ff_aac_pow34sf_tab[428];
00057 
00058 static const uint8_t swb_size_1024_96[] = {
00059     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
00060     12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
00061     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
00062 };
00063 
00064 static const uint8_t swb_size_1024_64[] = {
00065     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
00066     12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
00067     40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
00068 };
00069 
00070 static const uint8_t swb_size_1024_48[] = {
00071     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
00072     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
00073     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
00074     96
00075 };
00076 
00077 static const uint8_t swb_size_1024_32[] = {
00078     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
00079     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
00080     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
00081 };
00082 
00083 static const uint8_t swb_size_1024_24[] = {
00084     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00085     12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
00086     32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
00087 };
00088 
00089 static const uint8_t swb_size_1024_16[] = {
00090     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
00091     12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
00092     32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
00093 };
00094 
00095 static const uint8_t swb_size_1024_8[] = {
00096     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
00097     16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
00098     32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
00099 };
00100 
00101 static const uint8_t *swb_size_1024[] = {
00102     swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
00103     swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
00104     swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
00105     swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
00106 };
00107 
00108 static const uint8_t swb_size_128_96[] = {
00109     4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
00110 };
00111 
00112 static const uint8_t swb_size_128_48[] = {
00113     4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
00114 };
00115 
00116 static const uint8_t swb_size_128_24[] = {
00117     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
00118 };
00119 
00120 static const uint8_t swb_size_128_16[] = {
00121     4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
00122 };
00123 
00124 static const uint8_t swb_size_128_8[] = {
00125     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
00126 };
00127 
00128 static const uint8_t *swb_size_128[] = {
00129     /* the last entry on the following row is swb_size_128_64 but is a
00130        duplicate of swb_size_128_96 */
00131     swb_size_128_96, swb_size_128_96, swb_size_128_96,
00132     swb_size_128_48, swb_size_128_48, swb_size_128_48,
00133     swb_size_128_24, swb_size_128_24, swb_size_128_16,
00134     swb_size_128_16, swb_size_128_16, swb_size_128_8
00135 };
00136 
00138 static const uint8_t aac_chan_configs[6][5] = {
00139  {1, TYPE_SCE},                               // 1 channel  - single channel element
00140  {1, TYPE_CPE},                               // 2 channels - channel pair
00141  {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
00142  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
00143  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
00144  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
00145 };
00146 
00150 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
00151     { 0 },
00152     { 0, 1 },
00153     { 2, 0, 1 },
00154     { 2, 0, 1, 3 },
00155     { 2, 0, 1, 3, 4 },
00156     { 2, 0, 1, 4, 5, 3 },
00157 };
00158 
00163 static void put_audio_specific_config(AVCodecContext *avctx)
00164 {
00165     PutBitContext pb;
00166     AACEncContext *s = avctx->priv_data;
00167 
00168     init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
00169     put_bits(&pb, 5, 2); //object type - AAC-LC
00170     put_bits(&pb, 4, s->samplerate_index); //sample rate index
00171     put_bits(&pb, 4, s->channels);
00172     //GASpecificConfig
00173     put_bits(&pb, 1, 0); //frame length - 1024 samples
00174     put_bits(&pb, 1, 0); //does not depend on core coder
00175     put_bits(&pb, 1, 0); //is not extension
00176 
00177     //Explicitly Mark SBR absent
00178     put_bits(&pb, 11, 0x2b7); //sync extension
00179     put_bits(&pb, 5,  AOT_SBR);
00180     put_bits(&pb, 1,  0);
00181     flush_put_bits(&pb);
00182 }
00183 
00184 #define WINDOW_FUNC(type) \
00185 static void apply_ ##type ##_window(DSPContext *dsp, SingleChannelElement *sce, const float *audio)
00186 
00187 WINDOW_FUNC(only_long)
00188 {
00189     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
00190     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
00191     float *out = sce->ret;
00192 
00193     dsp->vector_fmul        (out,        audio,        lwindow, 1024);
00194     dsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
00195 }
00196 
00197 WINDOW_FUNC(long_start)
00198 {
00199     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
00200     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
00201     float *out = sce->ret;
00202 
00203     dsp->vector_fmul(out, audio, lwindow, 1024);
00204     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
00205     dsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
00206     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
00207 }
00208 
00209 WINDOW_FUNC(long_stop)
00210 {
00211     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
00212     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
00213     float *out = sce->ret;
00214 
00215     memset(out, 0, sizeof(out[0]) * 448);
00216     dsp->vector_fmul(out + 448, audio + 448, swindow, 128);
00217     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
00218     dsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
00219 }
00220 
00221 WINDOW_FUNC(eight_short)
00222 {
00223     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
00224     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
00225     const float *in = audio + 448;
00226     float *out = sce->ret;
00227     int w;
00228 
00229     for (w = 0; w < 8; w++) {
00230         dsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
00231         out += 128;
00232         in  += 128;
00233         dsp->vector_fmul_reverse(out, in, swindow, 128);
00234         out += 128;
00235     }
00236 }
00237 
00238 static void (*const apply_window[4])(DSPContext *dsp, SingleChannelElement *sce, const float *audio) = {
00239     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
00240     [LONG_START_SEQUENCE]  = apply_long_start_window,
00241     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
00242     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
00243 };
00244 
00245 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
00246                                   float *audio)
00247 {
00248     int i;
00249     float *output = sce->ret;
00250 
00251     apply_window[sce->ics.window_sequence[0]](&s->dsp, sce, audio);
00252 
00253     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
00254         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
00255     else
00256         for (i = 0; i < 1024; i += 128)
00257             s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
00258     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
00259 }
00260 
00265 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
00266 {
00267     int w;
00268 
00269     put_bits(&s->pb, 1, 0);                // ics_reserved bit
00270     put_bits(&s->pb, 2, info->window_sequence[0]);
00271     put_bits(&s->pb, 1, info->use_kb_window[0]);
00272     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
00273         put_bits(&s->pb, 6, info->max_sfb);
00274         put_bits(&s->pb, 1, 0);            // no prediction
00275     } else {
00276         put_bits(&s->pb, 4, info->max_sfb);
00277         for (w = 1; w < 8; w++)
00278             put_bits(&s->pb, 1, !info->group_len[w]);
00279     }
00280 }
00281 
00286 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
00287 {
00288     int i, w;
00289 
00290     put_bits(pb, 2, cpe->ms_mode);
00291     if (cpe->ms_mode == 1)
00292         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
00293             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
00294                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
00295 }
00296 
00300 static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, int chans)
00301 {
00302     int i, w, w2, g, ch;
00303     int start, maxsfb, cmaxsfb;
00304 
00305     for (ch = 0; ch < chans; ch++) {
00306         IndividualChannelStream *ics = &cpe->ch[ch].ics;
00307         start = 0;
00308         maxsfb = 0;
00309         cpe->ch[ch].pulse.num_pulse = 0;
00310         for (w = 0; w < ics->num_windows*16; w += 16) {
00311             for (g = 0; g < ics->num_swb; g++) {
00312                 //apply M/S
00313                 if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
00314                     for (i = 0; i < ics->swb_sizes[g]; i++) {
00315                         cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
00316                         cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
00317                     }
00318                 }
00319                 start += ics->swb_sizes[g];
00320             }
00321             for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--)
00322                 ;
00323             maxsfb = FFMAX(maxsfb, cmaxsfb);
00324         }
00325         ics->max_sfb = maxsfb;
00326 
00327         //adjust zero bands for window groups
00328         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
00329             for (g = 0; g < ics->max_sfb; g++) {
00330                 i = 1;
00331                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
00332                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
00333                         i = 0;
00334                         break;
00335                     }
00336                 }
00337                 cpe->ch[ch].zeroes[w*16 + g] = i;
00338             }
00339         }
00340     }
00341 
00342     if (chans > 1 && cpe->common_window) {
00343         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
00344         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
00345         int msc = 0;
00346         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
00347         ics1->max_sfb = ics0->max_sfb;
00348         for (w = 0; w < ics0->num_windows*16; w += 16)
00349             for (i = 0; i < ics0->max_sfb; i++)
00350                 if (cpe->ms_mask[w+i])
00351                     msc++;
00352         if (msc == 0 || ics0->max_sfb == 0)
00353             cpe->ms_mode = 0;
00354         else
00355             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
00356     }
00357 }
00358 
00362 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
00363 {
00364     int w;
00365 
00366     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
00367         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
00368 }
00369 
00373 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
00374                                  SingleChannelElement *sce)
00375 {
00376     int off = sce->sf_idx[0], diff;
00377     int i, w;
00378 
00379     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
00380         for (i = 0; i < sce->ics.max_sfb; i++) {
00381             if (!sce->zeroes[w*16 + i]) {
00382                 diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
00383                 if (diff < 0 || diff > 120)
00384                     av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
00385                 off = sce->sf_idx[w*16 + i];
00386                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
00387             }
00388         }
00389     }
00390 }
00391 
00395 static void encode_pulses(AACEncContext *s, Pulse *pulse)
00396 {
00397     int i;
00398 
00399     put_bits(&s->pb, 1, !!pulse->num_pulse);
00400     if (!pulse->num_pulse)
00401         return;
00402 
00403     put_bits(&s->pb, 2, pulse->num_pulse - 1);
00404     put_bits(&s->pb, 6, pulse->start);
00405     for (i = 0; i < pulse->num_pulse; i++) {
00406         put_bits(&s->pb, 5, pulse->pos[i]);
00407         put_bits(&s->pb, 4, pulse->amp[i]);
00408     }
00409 }
00410 
00414 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
00415 {
00416     int start, i, w, w2;
00417 
00418     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
00419         start = 0;
00420         for (i = 0; i < sce->ics.max_sfb; i++) {
00421             if (sce->zeroes[w*16 + i]) {
00422                 start += sce->ics.swb_sizes[i];
00423                 continue;
00424             }
00425             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
00426                 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
00427                                                    sce->ics.swb_sizes[i],
00428                                                    sce->sf_idx[w*16 + i],
00429                                                    sce->band_type[w*16 + i],
00430                                                    s->lambda);
00431             start += sce->ics.swb_sizes[i];
00432         }
00433     }
00434 }
00435 
00439 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
00440                                      SingleChannelElement *sce,
00441                                      int common_window)
00442 {
00443     put_bits(&s->pb, 8, sce->sf_idx[0]);
00444     if (!common_window)
00445         put_ics_info(s, &sce->ics);
00446     encode_band_info(s, sce);
00447     encode_scale_factors(avctx, s, sce);
00448     encode_pulses(s, &sce->pulse);
00449     put_bits(&s->pb, 1, 0); //tns
00450     put_bits(&s->pb, 1, 0); //ssr
00451     encode_spectral_coeffs(s, sce);
00452     return 0;
00453 }
00454 
00458 static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s,
00459                                const char *name)
00460 {
00461     int i, namelen, padbits;
00462 
00463     namelen = strlen(name) + 2;
00464     put_bits(&s->pb, 3, TYPE_FIL);
00465     put_bits(&s->pb, 4, FFMIN(namelen, 15));
00466     if (namelen >= 15)
00467         put_bits(&s->pb, 8, namelen - 14);
00468     put_bits(&s->pb, 4, 0); //extension type - filler
00469     padbits = -put_bits_count(&s->pb) & 7;
00470     avpriv_align_put_bits(&s->pb);
00471     for (i = 0; i < namelen - 2; i++)
00472         put_bits(&s->pb, 8, name[i]);
00473     put_bits(&s->pb, 12 - padbits, 0);
00474 }
00475 
00476 /*
00477  * Deinterleave input samples.
00478  * Channels are reordered from libavcodec's default order to AAC order.
00479  */
00480 static void deinterleave_input_samples(AACEncContext *s, const AVFrame *frame)
00481 {
00482     int ch, i;
00483     const int sinc = s->channels;
00484     const uint8_t *channel_map = aac_chan_maps[sinc - 1];
00485 
00486     /* deinterleave and remap input samples */
00487     for (ch = 0; ch < sinc; ch++) {
00488         /* copy last 1024 samples of previous frame to the start of the current frame */
00489         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
00490 
00491         /* deinterleave */
00492         i = 2048;
00493         if (frame) {
00494             const float *sptr = ((const float *)frame->data[0]) + channel_map[ch];
00495             for (; i < 2048 + frame->nb_samples; i++) {
00496                 s->planar_samples[ch][i] = *sptr;
00497                 sptr += sinc;
00498             }
00499         }
00500         memset(&s->planar_samples[ch][i], 0,
00501                (3072 - i) * sizeof(s->planar_samples[0][0]));
00502     }
00503 }
00504 
00505 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
00506                             const AVFrame *frame, int *got_packet_ptr)
00507 {
00508     AACEncContext *s = avctx->priv_data;
00509     float **samples = s->planar_samples, *samples2, *la, *overlap;
00510     ChannelElement *cpe;
00511     int i, ch, w, g, chans, tag, start_ch, ret;
00512     int chan_el_counter[4];
00513     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
00514 
00515     if (s->last_frame == 2)
00516         return 0;
00517 
00518     /* add current frame to queue */
00519     if (frame) {
00520         if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
00521             return ret;
00522     }
00523 
00524     deinterleave_input_samples(s, frame);
00525     if (s->psypp)
00526         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
00527 
00528     if (!avctx->frame_number)
00529         return 0;
00530 
00531     start_ch = 0;
00532     for (i = 0; i < s->chan_map[0]; i++) {
00533         FFPsyWindowInfo* wi = windows + start_ch;
00534         tag      = s->chan_map[i+1];
00535         chans    = tag == TYPE_CPE ? 2 : 1;
00536         cpe      = &s->cpe[i];
00537         for (ch = 0; ch < chans; ch++) {
00538             IndividualChannelStream *ics = &cpe->ch[ch].ics;
00539             int cur_channel = start_ch + ch;
00540             overlap  = &samples[cur_channel][0];
00541             samples2 = overlap + 1024;
00542             la       = samples2 + (448+64);
00543             if (!frame)
00544                 la = NULL;
00545             if (tag == TYPE_LFE) {
00546                 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
00547                 wi[ch].window_shape   = 0;
00548                 wi[ch].num_windows    = 1;
00549                 wi[ch].grouping[0]    = 1;
00550 
00551                 /* Only the lowest 12 coefficients are used in a LFE channel.
00552                  * The expression below results in only the bottom 8 coefficients
00553                  * being used for 11.025kHz to 16kHz sample rates.
00554                  */
00555                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
00556             } else {
00557                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
00558                                               ics->window_sequence[0]);
00559             }
00560             ics->window_sequence[1] = ics->window_sequence[0];
00561             ics->window_sequence[0] = wi[ch].window_type[0];
00562             ics->use_kb_window[1]   = ics->use_kb_window[0];
00563             ics->use_kb_window[0]   = wi[ch].window_shape;
00564             ics->num_windows        = wi[ch].num_windows;
00565             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
00566             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
00567             for (w = 0; w < ics->num_windows; w++)
00568                 ics->group_len[w] = wi[ch].grouping[w];
00569 
00570             apply_window_and_mdct(s, &cpe->ch[ch], overlap);
00571         }
00572         start_ch += chans;
00573     }
00574     if ((ret = ff_alloc_packet2(avctx, avpkt, 768 * s->channels))) {
00575         av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
00576         return ret;
00577     }
00578     do {
00579         int frame_bits;
00580 
00581         init_put_bits(&s->pb, avpkt->data, avpkt->size);
00582 
00583         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
00584             put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
00585         start_ch = 0;
00586         memset(chan_el_counter, 0, sizeof(chan_el_counter));
00587         for (i = 0; i < s->chan_map[0]; i++) {
00588             FFPsyWindowInfo* wi = windows + start_ch;
00589             const float *coeffs[2];
00590             tag      = s->chan_map[i+1];
00591             chans    = tag == TYPE_CPE ? 2 : 1;
00592             cpe      = &s->cpe[i];
00593             put_bits(&s->pb, 3, tag);
00594             put_bits(&s->pb, 4, chan_el_counter[tag]++);
00595             for (ch = 0; ch < chans; ch++)
00596                 coeffs[ch] = cpe->ch[ch].coeffs;
00597             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
00598             for (ch = 0; ch < chans; ch++) {
00599                 s->cur_channel = start_ch * 2 + ch;
00600                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
00601             }
00602             cpe->common_window = 0;
00603             if (chans > 1
00604                 && wi[0].window_type[0] == wi[1].window_type[0]
00605                 && wi[0].window_shape   == wi[1].window_shape) {
00606 
00607                 cpe->common_window = 1;
00608                 for (w = 0; w < wi[0].num_windows; w++) {
00609                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
00610                         cpe->common_window = 0;
00611                         break;
00612                     }
00613                 }
00614             }
00615             s->cur_channel = start_ch * 2;
00616             if (s->options.stereo_mode && cpe->common_window) {
00617                 if (s->options.stereo_mode > 0) {
00618                     IndividualChannelStream *ics = &cpe->ch[0].ics;
00619                     for (w = 0; w < ics->num_windows; w += ics->group_len[w])
00620                         for (g = 0;  g < ics->num_swb; g++)
00621                             cpe->ms_mask[w*16+g] = 1;
00622                 } else if (s->coder->search_for_ms) {
00623                     s->coder->search_for_ms(s, cpe, s->lambda);
00624                 }
00625             }
00626             adjust_frame_information(s, cpe, chans);
00627             if (chans == 2) {
00628                 put_bits(&s->pb, 1, cpe->common_window);
00629                 if (cpe->common_window) {
00630                     put_ics_info(s, &cpe->ch[0].ics);
00631                     encode_ms_info(&s->pb, cpe);
00632                 }
00633             }
00634             for (ch = 0; ch < chans; ch++) {
00635                 s->cur_channel = start_ch + ch;
00636                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
00637             }
00638             start_ch += chans;
00639         }
00640 
00641         frame_bits = put_bits_count(&s->pb);
00642         if (frame_bits <= 6144 * s->channels - 3) {
00643             s->psy.bitres.bits = frame_bits / s->channels;
00644             break;
00645         }
00646 
00647         s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
00648 
00649     } while (1);
00650 
00651     put_bits(&s->pb, 3, TYPE_END);
00652     flush_put_bits(&s->pb);
00653     avctx->frame_bits = put_bits_count(&s->pb);
00654 
00655     // rate control stuff
00656     if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
00657         float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
00658         s->lambda *= ratio;
00659         s->lambda = FFMIN(s->lambda, 65536.f);
00660     }
00661 
00662     if (!frame)
00663         s->last_frame++;
00664 
00665     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
00666                        &avpkt->duration);
00667 
00668     avpkt->size = put_bits_count(&s->pb) >> 3;
00669     *got_packet_ptr = 1;
00670     return 0;
00671 }
00672 
00673 static av_cold int aac_encode_end(AVCodecContext *avctx)
00674 {
00675     AACEncContext *s = avctx->priv_data;
00676 
00677     ff_mdct_end(&s->mdct1024);
00678     ff_mdct_end(&s->mdct128);
00679     ff_psy_end(&s->psy);
00680     if (s->psypp)
00681         ff_psy_preprocess_end(s->psypp);
00682     av_freep(&s->buffer.samples);
00683     av_freep(&s->cpe);
00684     ff_af_queue_close(&s->afq);
00685 #if FF_API_OLD_ENCODE_AUDIO
00686     av_freep(&avctx->coded_frame);
00687 #endif
00688     return 0;
00689 }
00690 
00691 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
00692 {
00693     int ret = 0;
00694 
00695     ff_dsputil_init(&s->dsp, avctx);
00696 
00697     // window init
00698     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
00699     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
00700     ff_init_ff_sine_windows(10);
00701     ff_init_ff_sine_windows(7);
00702 
00703     if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
00704         return ret;
00705     if (ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0))
00706         return ret;
00707 
00708     return 0;
00709 }
00710 
00711 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
00712 {
00713     int ch;
00714     FF_ALLOCZ_OR_GOTO(avctx, s->buffer.samples, 3 * 1024 * s->channels * sizeof(s->buffer.samples[0]), alloc_fail);
00715     FF_ALLOCZ_OR_GOTO(avctx, s->cpe, sizeof(ChannelElement) * s->chan_map[0], alloc_fail);
00716     FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
00717 
00718     for(ch = 0; ch < s->channels; ch++)
00719         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
00720 
00721 #if FF_API_OLD_ENCODE_AUDIO
00722     if (!(avctx->coded_frame = avcodec_alloc_frame()))
00723         goto alloc_fail;
00724 #endif
00725 
00726     return 0;
00727 alloc_fail:
00728     return AVERROR(ENOMEM);
00729 }
00730 
00731 static av_cold int aac_encode_init(AVCodecContext *avctx)
00732 {
00733     AACEncContext *s = avctx->priv_data;
00734     int i, ret = 0;
00735     const uint8_t *sizes[2];
00736     uint8_t grouping[AAC_MAX_CHANNELS];
00737     int lengths[2];
00738 
00739     avctx->frame_size = 1024;
00740 
00741     for (i = 0; i < 16; i++)
00742         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
00743             break;
00744 
00745     s->channels = avctx->channels;
00746 
00747     ERROR_IF(i == 16,
00748              "Unsupported sample rate %d\n", avctx->sample_rate);
00749     ERROR_IF(s->channels > AAC_MAX_CHANNELS,
00750              "Unsupported number of channels: %d\n", s->channels);
00751     ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
00752              "Unsupported profile %d\n", avctx->profile);
00753     ERROR_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
00754              "Too many bits per frame requested\n");
00755 
00756     s->samplerate_index = i;
00757 
00758     s->chan_map = aac_chan_configs[s->channels-1];
00759 
00760     if (ret = dsp_init(avctx, s))
00761         goto fail;
00762 
00763     if (ret = alloc_buffers(avctx, s))
00764         goto fail;
00765 
00766     avctx->extradata_size = 5;
00767     put_audio_specific_config(avctx);
00768 
00769     sizes[0]   = swb_size_1024[i];
00770     sizes[1]   = swb_size_128[i];
00771     lengths[0] = ff_aac_num_swb_1024[i];
00772     lengths[1] = ff_aac_num_swb_128[i];
00773     for (i = 0; i < s->chan_map[0]; i++)
00774         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
00775     if (ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], grouping))
00776         goto fail;
00777     s->psypp = ff_psy_preprocess_init(avctx);
00778     s->coder = &ff_aac_coders[s->options.aac_coder];
00779 
00780     s->lambda = avctx->global_quality ? avctx->global_quality : 120;
00781 
00782     ff_aac_tableinit();
00783 
00784     for (i = 0; i < 428; i++)
00785         ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
00786 
00787     avctx->delay = 1024;
00788     ff_af_queue_init(avctx, &s->afq);
00789 
00790     return 0;
00791 fail:
00792     aac_encode_end(avctx);
00793     return ret;
00794 }
00795 
00796 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
00797 static const AVOption aacenc_options[] = {
00798     {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.dbl = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
00799         {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
00800         {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.dbl =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
00801         {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.dbl =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
00802     {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.dbl = 2}, 0, AAC_CODER_NB-1, AACENC_FLAGS},
00803     {NULL}
00804 };
00805 
00806 static const AVClass aacenc_class = {
00807     "AAC encoder",
00808     av_default_item_name,
00809     aacenc_options,
00810     LIBAVUTIL_VERSION_INT,
00811 };
00812 
00813 AVCodec ff_aac_encoder = {
00814     .name           = "aac",
00815     .type           = AVMEDIA_TYPE_AUDIO,
00816     .id             = CODEC_ID_AAC,
00817     .priv_data_size = sizeof(AACEncContext),
00818     .init           = aac_encode_init,
00819     .encode2        = aac_encode_frame,
00820     .close          = aac_encode_end,
00821     .supported_samplerates = avpriv_mpeg4audio_sample_rates,
00822     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
00823                       CODEC_CAP_EXPERIMENTAL,
00824     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
00825                                                      AV_SAMPLE_FMT_NONE },
00826     .long_name      = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
00827     .priv_class     = &aacenc_class,
00828 };