FFmpeg: libavcodec/sonic.c Source File

00001 /*
00002  * Simple free lossless/lossy audio codec
00003  * Copyright (c) 2004 Alex Beregszaszi
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 #include "avcodec.h"
00022 #include "get_bits.h"
00023 #include "golomb.h"
00024 #include "internal.h"
00025 
00041 #define MAX_CHANNELS 2
00042 
00043 #define MID_SIDE 0
00044 #define LEFT_SIDE 1
00045 #define RIGHT_SIDE 2
00046 
00047 typedef struct SonicContext {
00048     AVFrame frame;
00049     int lossless, decorrelation;
00050 
00051     int num_taps, downsampling;
00052     double quantization;
00053 
00054     int channels, samplerate, block_align, frame_size;
00055 
00056     int *tap_quant;
00057     int *int_samples;
00058     int *coded_samples[MAX_CHANNELS];
00059 
00060     // for encoding
00061     int *tail;
00062     int tail_size;
00063     int *window;
00064     int window_size;
00065 
00066     // for decoding
00067     int *predictor_k;
00068     int *predictor_state[MAX_CHANNELS];
00069 } SonicContext;
00070 
00071 #define LATTICE_SHIFT   10
00072 #define SAMPLE_SHIFT    4
00073 #define LATTICE_FACTOR  (1 << LATTICE_SHIFT)
00074 #define SAMPLE_FACTOR   (1 << SAMPLE_SHIFT)
00075 
00076 #define BASE_QUANT      0.6
00077 #define RATE_VARIATION  3.0
00078 
00079 static inline int divide(int a, int b)
00080 {
00081     if (a < 0)
00082         return -( (-a + b/2)/b );
00083     else
00084         return (a + b/2)/b;
00085 }
00086 
00087 static inline int shift(int a,int b)
00088 {
00089     return (a+(1<<(b-1))) >> b;
00090 }
00091 
00092 static inline int shift_down(int a,int b)
00093 {
00094     return (a>>b)+((a<0)?1:0);
00095 }
00096 
00097 #if 1
00098 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
00099 {
00100     int i;
00101 
00102     for (i = 0; i < entries; i++)
00103         set_se_golomb(pb, buf[i]);
00104 
00105     return 1;
00106 }
00107 
00108 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
00109 {
00110     int i;
00111 
00112     for (i = 0; i < entries; i++)
00113         buf[i] = get_se_golomb(gb);
00114 
00115     return 1;
00116 }
00117 
00118 #else
00119 
00120 #define ADAPT_LEVEL 8
00121 
00122 static int bits_to_store(uint64_t x)
00123 {
00124     int res = 0;
00125 
00126     while(x)
00127     {
00128         res++;
00129         x >>= 1;
00130     }
00131     return res;
00132 }
00133 
00134 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
00135 {
00136     int i, bits;
00137 
00138     if (!max)
00139         return;
00140 
00141     bits = bits_to_store(max);
00142 
00143     for (i = 0; i < bits-1; i++)
00144         put_bits(pb, 1, value & (1 << i));
00145 
00146     if ( (value | (1 << (bits-1))) <= max)
00147         put_bits(pb, 1, value & (1 << (bits-1)));
00148 }
00149 
00150 static unsigned int read_uint_max(GetBitContext *gb, int max)
00151 {
00152     int i, bits, value = 0;
00153 
00154     if (!max)
00155         return 0;
00156 
00157     bits = bits_to_store(max);
00158 
00159     for (i = 0; i < bits-1; i++)
00160         if (get_bits1(gb))
00161             value += 1 << i;
00162 
00163     if ( (value | (1<<(bits-1))) <= max)
00164         if (get_bits1(gb))
00165             value += 1 << (bits-1);
00166 
00167     return value;
00168 }
00169 
00170 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
00171 {
00172     int i, j, x = 0, low_bits = 0, max = 0;
00173     int step = 256, pos = 0, dominant = 0, any = 0;
00174     int *copy, *bits;
00175 
00176     copy = av_mallocz(4* entries);
00177     if (!copy)
00178         return -1;
00179 
00180     if (base_2_part)
00181     {
00182         int energy = 0;
00183 
00184         for (i = 0; i < entries; i++)
00185             energy += abs(buf[i]);
00186 
00187         low_bits = bits_to_store(energy / (entries * 2));
00188         if (low_bits > 15)
00189             low_bits = 15;
00190 
00191         put_bits(pb, 4, low_bits);
00192     }
00193 
00194     for (i = 0; i < entries; i++)
00195     {
00196         put_bits(pb, low_bits, abs(buf[i]));
00197         copy[i] = abs(buf[i]) >> low_bits;
00198         if (copy[i] > max)
00199             max = abs(copy[i]);
00200     }
00201 
00202     bits = av_mallocz(4* entries*max);
00203     if (!bits)
00204     {
00205 //        av_free(copy);
00206         return -1;
00207     }
00208 
00209     for (i = 0; i <= max; i++)
00210     {
00211         for (j = 0; j < entries; j++)
00212             if (copy[j] >= i)
00213                 bits[x++] = copy[j] > i;
00214     }
00215 
00216     // store bitstream
00217     while (pos < x)
00218     {
00219         int steplet = step >> 8;
00220 
00221         if (pos + steplet > x)
00222             steplet = x - pos;
00223 
00224         for (i = 0; i < steplet; i++)
00225             if (bits[i+pos] != dominant)
00226                 any = 1;
00227 
00228         put_bits(pb, 1, any);
00229 
00230         if (!any)
00231         {
00232             pos += steplet;
00233             step += step / ADAPT_LEVEL;
00234         }
00235         else
00236         {
00237             int interloper = 0;
00238 
00239             while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
00240                 interloper++;
00241 
00242             // note change
00243             write_uint_max(pb, interloper, (step >> 8) - 1);
00244 
00245             pos += interloper + 1;
00246             step -= step / ADAPT_LEVEL;
00247         }
00248 
00249         if (step < 256)
00250         {
00251             step = 65536 / step;
00252             dominant = !dominant;
00253         }
00254     }
00255 
00256     // store signs
00257     for (i = 0; i < entries; i++)
00258         if (buf[i])
00259             put_bits(pb, 1, buf[i] < 0);
00260 
00261 //    av_free(bits);
00262 //    av_free(copy);
00263 
00264     return 0;
00265 }
00266 
00267 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
00268 {
00269     int i, low_bits = 0, x = 0;
00270     int n_zeros = 0, step = 256, dominant = 0;
00271     int pos = 0, level = 0;
00272     int *bits = av_mallocz(4* entries);
00273 
00274     if (!bits)
00275         return -1;
00276 
00277     if (base_2_part)
00278     {
00279         low_bits = get_bits(gb, 4);
00280 
00281         if (low_bits)
00282             for (i = 0; i < entries; i++)
00283                 buf[i] = get_bits(gb, low_bits);
00284     }
00285 
00286 //    av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
00287 
00288     while (n_zeros < entries)
00289     {
00290         int steplet = step >> 8;
00291 
00292         if (!get_bits1(gb))
00293         {
00294             for (i = 0; i < steplet; i++)
00295                 bits[x++] = dominant;
00296 
00297             if (!dominant)
00298                 n_zeros += steplet;
00299 
00300             step += step / ADAPT_LEVEL;
00301         }
00302         else
00303         {
00304             int actual_run = read_uint_max(gb, steplet-1);
00305 
00306 //            av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
00307 
00308             for (i = 0; i < actual_run; i++)
00309                 bits[x++] = dominant;
00310 
00311             bits[x++] = !dominant;
00312 
00313             if (!dominant)
00314                 n_zeros += actual_run;
00315             else
00316                 n_zeros++;
00317 
00318             step -= step / ADAPT_LEVEL;
00319         }
00320 
00321         if (step < 256)
00322         {
00323             step = 65536 / step;
00324             dominant = !dominant;
00325         }
00326     }
00327 
00328     // reconstruct unsigned values
00329     n_zeros = 0;
00330     for (i = 0; n_zeros < entries; i++)
00331     {
00332         while(1)
00333         {
00334             if (pos >= entries)
00335             {
00336                 pos = 0;
00337                 level += 1 << low_bits;
00338             }
00339 
00340             if (buf[pos] >= level)
00341                 break;
00342 
00343             pos++;
00344         }
00345 
00346         if (bits[i])
00347             buf[pos] += 1 << low_bits;
00348         else
00349             n_zeros++;
00350 
00351         pos++;
00352     }
00353 //    av_free(bits);
00354 
00355     // read signs
00356     for (i = 0; i < entries; i++)
00357         if (buf[i] && get_bits1(gb))
00358             buf[i] = -buf[i];
00359 
00360 //    av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
00361 
00362     return 0;
00363 }
00364 #endif
00365 
00366 static void predictor_init_state(int *k, int *state, int order)
00367 {
00368     int i;
00369 
00370     for (i = order-2; i >= 0; i--)
00371     {
00372         int j, p, x = state[i];
00373 
00374         for (j = 0, p = i+1; p < order; j++,p++)
00375             {
00376             int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
00377             state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
00378             x = tmp;
00379         }
00380     }
00381 }
00382 
00383 static int predictor_calc_error(int *k, int *state, int order, int error)
00384 {
00385     int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
00386 
00387 #if 1
00388     int *k_ptr = &(k[order-2]),
00389         *state_ptr = &(state[order-2]);
00390     for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
00391     {
00392         int k_value = *k_ptr, state_value = *state_ptr;
00393         x -= shift_down(k_value * state_value, LATTICE_SHIFT);
00394         state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
00395     }
00396 #else
00397     for (i = order-2; i >= 0; i--)
00398     {
00399         x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
00400         state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
00401     }
00402 #endif
00403 
00404     // don't drift too far, to avoid overflows
00405     if (x >  (SAMPLE_FACTOR<<16)) x =  (SAMPLE_FACTOR<<16);
00406     if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
00407 
00408     state[0] = x;
00409 
00410     return x;
00411 }
00412 
00413 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
00414 // Heavily modified Levinson-Durbin algorithm which
00415 // copes better with quantization, and calculates the
00416 // actual whitened result as it goes.
00417 
00418 static void modified_levinson_durbin(int *window, int window_entries,
00419         int *out, int out_entries, int channels, int *tap_quant)
00420 {
00421     int i;
00422     int *state = av_mallocz(4* window_entries);
00423 
00424     memcpy(state, window, 4* window_entries);
00425 
00426     for (i = 0; i < out_entries; i++)
00427     {
00428         int step = (i+1)*channels, k, j;
00429         double xx = 0.0, xy = 0.0;
00430 #if 1
00431         int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
00432         j = window_entries - step;
00433         for (;j>=0;j--,x_ptr++,state_ptr++)
00434         {
00435             double x_value = *x_ptr, state_value = *state_ptr;
00436             xx += state_value*state_value;
00437             xy += x_value*state_value;
00438         }
00439 #else
00440         for (j = 0; j <= (window_entries - step); j++);
00441         {
00442             double stepval = window[step+j], stateval = window[j];
00443 //            xx += (double)window[j]*(double)window[j];
00444 //            xy += (double)window[step+j]*(double)window[j];
00445             xx += stateval*stateval;
00446             xy += stepval*stateval;
00447         }
00448 #endif
00449         if (xx == 0.0)
00450             k = 0;
00451         else
00452             k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
00453 
00454         if (k > (LATTICE_FACTOR/tap_quant[i]))
00455             k = LATTICE_FACTOR/tap_quant[i];
00456         if (-k > (LATTICE_FACTOR/tap_quant[i]))
00457             k = -(LATTICE_FACTOR/tap_quant[i]);
00458 
00459         out[i] = k;
00460         k *= tap_quant[i];
00461 
00462 #if 1
00463         x_ptr = &(window[step]);
00464         state_ptr = &(state[0]);
00465         j = window_entries - step;
00466         for (;j>=0;j--,x_ptr++,state_ptr++)
00467         {
00468             int x_value = *x_ptr, state_value = *state_ptr;
00469             *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
00470             *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
00471         }
00472 #else
00473         for (j=0; j <= (window_entries - step); j++)
00474         {
00475             int stepval = window[step+j], stateval=state[j];
00476             window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
00477             state[j] += shift_down(k * stepval, LATTICE_SHIFT);
00478         }
00479 #endif
00480     }
00481 
00482     av_free(state);
00483 }
00484 
00485 static inline int code_samplerate(int samplerate)
00486 {
00487     switch (samplerate)
00488     {
00489         case 44100: return 0;
00490         case 22050: return 1;
00491         case 11025: return 2;
00492         case 96000: return 3;
00493         case 48000: return 4;
00494         case 32000: return 5;
00495         case 24000: return 6;
00496         case 16000: return 7;
00497         case 8000: return 8;
00498     }
00499     return -1;
00500 }
00501 
00502 static av_cold int sonic_encode_init(AVCodecContext *avctx)
00503 {
00504     SonicContext *s = avctx->priv_data;
00505     PutBitContext pb;
00506     int i, version = 0;
00507 
00508     if (avctx->channels > MAX_CHANNELS)
00509     {
00510         av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
00511         return -1; /* only stereo or mono for now */
00512     }
00513 
00514     if (avctx->channels == 2)
00515         s->decorrelation = MID_SIDE;
00516 
00517     if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
00518     {
00519         s->lossless = 1;
00520         s->num_taps = 32;
00521         s->downsampling = 1;
00522         s->quantization = 0.0;
00523     }
00524     else
00525     {
00526         s->num_taps = 128;
00527         s->downsampling = 2;
00528         s->quantization = 1.0;
00529     }
00530 
00531     // max tap 2048
00532     if ((s->num_taps < 32) || (s->num_taps > 1024) ||
00533         ((s->num_taps>>5)<<5 != s->num_taps))
00534     {
00535         av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
00536         return -1;
00537     }
00538 
00539     // generate taps
00540     s->tap_quant = av_mallocz(4* s->num_taps);
00541     for (i = 0; i < s->num_taps; i++)
00542         s->tap_quant[i] = (int)(sqrt(i+1));
00543 
00544     s->channels = avctx->channels;
00545     s->samplerate = avctx->sample_rate;
00546 
00547     s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
00548     s->frame_size = s->channels*s->block_align*s->downsampling;
00549 
00550     s->tail_size = s->num_taps*s->channels;
00551     s->tail = av_mallocz(4 * s->tail_size);
00552     if (!s->tail)
00553         return -1;
00554 
00555     s->predictor_k = av_mallocz(4 * s->num_taps);
00556     if (!s->predictor_k)
00557         return -1;
00558 
00559     for (i = 0; i < s->channels; i++)
00560     {
00561         s->coded_samples[i] = av_mallocz(4* s->block_align);
00562         if (!s->coded_samples[i])
00563             return -1;
00564     }
00565 
00566     s->int_samples = av_mallocz(4* s->frame_size);
00567 
00568     s->window_size = ((2*s->tail_size)+s->frame_size);
00569     s->window = av_mallocz(4* s->window_size);
00570     if (!s->window)
00571         return -1;
00572 
00573     avctx->extradata = av_mallocz(16);
00574     if (!avctx->extradata)
00575         return -1;
00576     init_put_bits(&pb, avctx->extradata, 16*8);
00577 
00578     put_bits(&pb, 2, version); // version
00579     if (version == 1)
00580     {
00581         put_bits(&pb, 2, s->channels);
00582         put_bits(&pb, 4, code_samplerate(s->samplerate));
00583     }
00584     put_bits(&pb, 1, s->lossless);
00585     if (!s->lossless)
00586         put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
00587     put_bits(&pb, 2, s->decorrelation);
00588     put_bits(&pb, 2, s->downsampling);
00589     put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
00590     put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
00591 
00592     flush_put_bits(&pb);
00593     avctx->extradata_size = put_bits_count(&pb)/8;
00594 
00595     av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
00596         version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
00597 
00598     avctx->coded_frame = avcodec_alloc_frame();
00599     if (!avctx->coded_frame)
00600         return AVERROR(ENOMEM);
00601     avctx->coded_frame->key_frame = 1;
00602     avctx->frame_size = s->block_align*s->downsampling;
00603 
00604     return 0;
00605 }
00606 
00607 static av_cold int sonic_encode_close(AVCodecContext *avctx)
00608 {
00609     SonicContext *s = avctx->priv_data;
00610     int i;
00611 
00612     av_freep(&avctx->coded_frame);
00613 
00614     for (i = 0; i < s->channels; i++)
00615         av_free(s->coded_samples[i]);
00616 
00617     av_free(s->predictor_k);
00618     av_free(s->tail);
00619     av_free(s->tap_quant);
00620     av_free(s->window);
00621     av_free(s->int_samples);
00622 
00623     return 0;
00624 }
00625 
00626 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
00627                               const AVFrame *frame, int *got_packet_ptr)
00628 {
00629     SonicContext *s = avctx->priv_data;
00630     PutBitContext pb;
00631     int i, j, ch, quant = 0, x = 0;
00632     int ret;
00633     const short *samples = (const int16_t*)frame->data[0];
00634 
00635     if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)))
00636         return ret;
00637 
00638     init_put_bits(&pb, avpkt->data, avpkt->size);
00639 
00640     // short -> internal
00641     for (i = 0; i < s->frame_size; i++)
00642         s->int_samples[i] = samples[i];
00643 
00644     if (!s->lossless)
00645         for (i = 0; i < s->frame_size; i++)
00646             s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
00647 
00648     switch(s->decorrelation)
00649     {
00650         case MID_SIDE:
00651             for (i = 0; i < s->frame_size; i += s->channels)
00652             {
00653                 s->int_samples[i] += s->int_samples[i+1];
00654                 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
00655             }
00656             break;
00657         case LEFT_SIDE:
00658             for (i = 0; i < s->frame_size; i += s->channels)
00659                 s->int_samples[i+1] -= s->int_samples[i];
00660             break;
00661         case RIGHT_SIDE:
00662             for (i = 0; i < s->frame_size; i += s->channels)
00663                 s->int_samples[i] -= s->int_samples[i+1];
00664             break;
00665     }
00666 
00667     memset(s->window, 0, 4* s->window_size);
00668 
00669     for (i = 0; i < s->tail_size; i++)
00670         s->window[x++] = s->tail[i];
00671 
00672     for (i = 0; i < s->frame_size; i++)
00673         s->window[x++] = s->int_samples[i];
00674 
00675     for (i = 0; i < s->tail_size; i++)
00676         s->window[x++] = 0;
00677 
00678     for (i = 0; i < s->tail_size; i++)
00679         s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
00680 
00681     // generate taps
00682     modified_levinson_durbin(s->window, s->window_size,
00683                 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
00684     if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
00685         return -1;
00686 
00687     for (ch = 0; ch < s->channels; ch++)
00688     {
00689         x = s->tail_size+ch;
00690         for (i = 0; i < s->block_align; i++)
00691         {
00692             int sum = 0;
00693             for (j = 0; j < s->downsampling; j++, x += s->channels)
00694                 sum += s->window[x];
00695             s->coded_samples[ch][i] = sum;
00696         }
00697     }
00698 
00699     // simple rate control code
00700     if (!s->lossless)
00701     {
00702         double energy1 = 0.0, energy2 = 0.0;
00703         for (ch = 0; ch < s->channels; ch++)
00704         {
00705             for (i = 0; i < s->block_align; i++)
00706             {
00707                 double sample = s->coded_samples[ch][i];
00708                 energy2 += sample*sample;
00709                 energy1 += fabs(sample);
00710             }
00711         }
00712 
00713         energy2 = sqrt(energy2/(s->channels*s->block_align));
00714         energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
00715 
00716         // increase bitrate when samples are like a gaussian distribution
00717         // reduce bitrate when samples are like a two-tailed exponential distribution
00718 
00719         if (energy2 > energy1)
00720             energy2 += (energy2-energy1)*RATE_VARIATION;
00721 
00722         quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
00723 //        av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
00724 
00725         if (quant < 1)
00726             quant = 1;
00727         if (quant > 65534)
00728             quant = 65534;
00729 
00730         set_ue_golomb(&pb, quant);
00731 
00732         quant *= SAMPLE_FACTOR;
00733     }
00734 
00735     // write out coded samples
00736     for (ch = 0; ch < s->channels; ch++)
00737     {
00738         if (!s->lossless)
00739             for (i = 0; i < s->block_align; i++)
00740                 s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
00741 
00742         if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
00743             return -1;
00744     }
00745 
00746 //    av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
00747 
00748     flush_put_bits(&pb);
00749     avpkt->size = (put_bits_count(&pb)+7)/8;
00750     *got_packet_ptr = 1;
00751     return 0;
00752 }
00753 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
00754 
00755 #if CONFIG_SONIC_DECODER
00756 static const int samplerate_table[] =
00757     { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
00758 
00759 static av_cold int sonic_decode_init(AVCodecContext *avctx)
00760 {
00761     SonicContext *s = avctx->priv_data;
00762     GetBitContext gb;
00763     int i, version;
00764 
00765     s->channels = avctx->channels;
00766     s->samplerate = avctx->sample_rate;
00767 
00768     avcodec_get_frame_defaults(&s->frame);
00769     avctx->coded_frame = &s->frame;
00770 
00771     if (!avctx->extradata)
00772     {
00773         av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
00774         return -1;
00775     }
00776 
00777     init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
00778 
00779     version = get_bits(&gb, 2);
00780     if (version > 1)
00781     {
00782         av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
00783         return -1;
00784     }
00785 
00786     if (version == 1)
00787     {
00788         s->channels = get_bits(&gb, 2);
00789         s->samplerate = samplerate_table[get_bits(&gb, 4)];
00790         av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
00791             s->channels, s->samplerate);
00792     }
00793 
00794     if (s->channels > MAX_CHANNELS)
00795     {
00796         av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
00797         return -1;
00798     }
00799 
00800     s->lossless = get_bits1(&gb);
00801     if (!s->lossless)
00802         skip_bits(&gb, 3); // XXX FIXME
00803     s->decorrelation = get_bits(&gb, 2);
00804 
00805     s->downsampling = get_bits(&gb, 2);
00806     if (!s->downsampling) {
00807         av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
00808         return AVERROR_INVALIDDATA;
00809     }
00810 
00811     s->num_taps = (get_bits(&gb, 5)+1)<<5;
00812     if (get_bits1(&gb)) // XXX FIXME
00813         av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
00814 
00815     s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
00816     s->frame_size = s->channels*s->block_align*s->downsampling;
00817 //    avctx->frame_size = s->block_align;
00818 
00819     av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
00820         version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
00821 
00822     // generate taps
00823     s->tap_quant = av_mallocz(4* s->num_taps);
00824     for (i = 0; i < s->num_taps; i++)
00825         s->tap_quant[i] = (int)(sqrt(i+1));
00826 
00827     s->predictor_k = av_mallocz(4* s->num_taps);
00828 
00829     for (i = 0; i < s->channels; i++)
00830     {
00831         s->predictor_state[i] = av_mallocz(4* s->num_taps);
00832         if (!s->predictor_state[i])
00833             return -1;
00834     }
00835 
00836     for (i = 0; i < s->channels; i++)
00837     {
00838         s->coded_samples[i] = av_mallocz(4* s->block_align);
00839         if (!s->coded_samples[i])
00840             return -1;
00841     }
00842     s->int_samples = av_mallocz(4* s->frame_size);
00843 
00844     avctx->sample_fmt = AV_SAMPLE_FMT_S16;
00845     return 0;
00846 }
00847 
00848 static av_cold int sonic_decode_close(AVCodecContext *avctx)
00849 {
00850     SonicContext *s = avctx->priv_data;
00851     int i;
00852 
00853     av_free(s->int_samples);
00854     av_free(s->tap_quant);
00855     av_free(s->predictor_k);
00856 
00857     for (i = 0; i < s->channels; i++)
00858     {
00859         av_free(s->predictor_state[i]);
00860         av_free(s->coded_samples[i]);
00861     }
00862 
00863     return 0;
00864 }
00865 
00866 static int sonic_decode_frame(AVCodecContext *avctx,
00867                             void *data, int *got_frame_ptr,
00868                             AVPacket *avpkt)
00869 {
00870     const uint8_t *buf = avpkt->data;
00871     int buf_size = avpkt->size;
00872     SonicContext *s = avctx->priv_data;
00873     GetBitContext gb;
00874     int i, quant, ch, j, ret;
00875     int16_t *samples;
00876 
00877     if (buf_size == 0) return 0;
00878 
00879     s->frame.nb_samples = s->frame_size;
00880     if ((ret = avctx->get_buffer(avctx, &s->frame)) < 0) {
00881         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
00882         return ret;
00883     }
00884     samples = (int16_t *)s->frame.data[0];
00885 
00886 //    av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
00887 
00888     init_get_bits(&gb, buf, buf_size*8);
00889 
00890     intlist_read(&gb, s->predictor_k, s->num_taps, 0);
00891 
00892     // dequantize
00893     for (i = 0; i < s->num_taps; i++)
00894         s->predictor_k[i] *= s->tap_quant[i];
00895 
00896     if (s->lossless)
00897         quant = 1;
00898     else
00899         quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
00900 
00901 //    av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
00902 
00903     for (ch = 0; ch < s->channels; ch++)
00904     {
00905         int x = ch;
00906 
00907         predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
00908 
00909         intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
00910 
00911         for (i = 0; i < s->block_align; i++)
00912         {
00913             for (j = 0; j < s->downsampling - 1; j++)
00914             {
00915                 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
00916                 x += s->channels;
00917             }
00918 
00919             s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
00920             x += s->channels;
00921         }
00922 
00923         for (i = 0; i < s->num_taps; i++)
00924             s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
00925     }
00926 
00927     switch(s->decorrelation)
00928     {
00929         case MID_SIDE:
00930             for (i = 0; i < s->frame_size; i += s->channels)
00931             {
00932                 s->int_samples[i+1] += shift(s->int_samples[i], 1);
00933                 s->int_samples[i] -= s->int_samples[i+1];
00934             }
00935             break;
00936         case LEFT_SIDE:
00937             for (i = 0; i < s->frame_size; i += s->channels)
00938                 s->int_samples[i+1] += s->int_samples[i];
00939             break;
00940         case RIGHT_SIDE:
00941             for (i = 0; i < s->frame_size; i += s->channels)
00942                 s->int_samples[i] += s->int_samples[i+1];
00943             break;
00944     }
00945 
00946     if (!s->lossless)
00947         for (i = 0; i < s->frame_size; i++)
00948             s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
00949 
00950     // internal -> short
00951     for (i = 0; i < s->frame_size; i++)
00952         samples[i] = av_clip_int16(s->int_samples[i]);
00953 
00954     align_get_bits(&gb);
00955 
00956     *got_frame_ptr = 1;
00957     *(AVFrame*)data = s->frame;
00958 
00959     return (get_bits_count(&gb)+7)/8;
00960 }
00961 
00962 AVCodec ff_sonic_decoder = {
00963     .name           = "sonic",
00964     .type           = AVMEDIA_TYPE_AUDIO,
00965     .id             = AV_CODEC_ID_SONIC,
00966     .priv_data_size = sizeof(SonicContext),
00967     .init           = sonic_decode_init,
00968     .close          = sonic_decode_close,
00969     .decode         = sonic_decode_frame,
00970     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL,
00971     .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
00972 };
00973 #endif /* CONFIG_SONIC_DECODER */
00974 
00975 #if CONFIG_SONIC_ENCODER
00976 AVCodec ff_sonic_encoder = {
00977     .name           = "sonic",
00978     .type           = AVMEDIA_TYPE_AUDIO,
00979     .id             = AV_CODEC_ID_SONIC,
00980     .priv_data_size = sizeof(SonicContext),
00981     .init           = sonic_encode_init,
00982     .encode2        = sonic_encode_frame,
00983     .capabilities   = CODEC_CAP_EXPERIMENTAL,
00984     .close          = sonic_encode_close,
00985     .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
00986 };
00987 #endif
00988 
00989 #if CONFIG_SONIC_LS_ENCODER
00990 AVCodec ff_sonic_ls_encoder = {
00991     .name           = "sonicls",
00992     .type           = AVMEDIA_TYPE_AUDIO,
00993     .id             = AV_CODEC_ID_SONIC_LS,
00994     .priv_data_size = sizeof(SonicContext),
00995     .init           = sonic_encode_init,
00996     .encode2        = sonic_encode_frame,
00997     .capabilities   = CODEC_CAP_EXPERIMENTAL,
00998     .close          = sonic_encode_close,
00999     .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
01000 };
01001 #endif