FFmpeg: libavcodec/truespeech.c Source File

00001 /*
00002  * DSP Group TrueSpeech compatible decoder
00003  * Copyright (c) 2005 Konstantin Shishkov
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 #include "libavutil/intreadwrite.h"
00023 #include "avcodec.h"
00024 #include "dsputil.h"
00025 #include "get_bits.h"
00026 
00027 #include "truespeech_data.h"
00036 typedef struct {
00037     AVFrame frame;
00038     DSPContext dsp;
00039     /* input data */
00040     DECLARE_ALIGNED(16, uint8_t, buffer)[32];
00041     int16_t vector[8];  
00042     int offset1[2];     
00043     int offset2[4];     
00044     int pulseoff[4];    
00045     int pulsepos[4];    
00046     int pulseval[4];    
00047     int flag;           
00048     /* temporary data */
00049     int filtbuf[146];   // some big vector used for storing filters
00050     int prevfilt[8];    // filter from previous frame
00051     int16_t tmp1[8];    // coefficients for adding to out
00052     int16_t tmp2[8];    // coefficients for adding to out
00053     int16_t tmp3[8];    // coefficients for adding to out
00054     int16_t cvector[8]; // correlated input vector
00055     int filtval;        // gain value for one function
00056     int16_t newvec[60]; // tmp vector
00057     int16_t filters[32]; // filters for every subframe
00058 } TSContext;
00059 
00060 static av_cold int truespeech_decode_init(AVCodecContext * avctx)
00061 {
00062     TSContext *c = avctx->priv_data;
00063 
00064     if (avctx->channels != 1) {
00065         av_log_ask_for_sample(avctx, "Unsupported channel count: %d\n", avctx->channels);
00066         return AVERROR(EINVAL);
00067     }
00068 
00069     avctx->sample_fmt = AV_SAMPLE_FMT_S16;
00070 
00071     ff_dsputil_init(&c->dsp, avctx);
00072 
00073     avcodec_get_frame_defaults(&c->frame);
00074     avctx->coded_frame = &c->frame;
00075 
00076     return 0;
00077 }
00078 
00079 static void truespeech_read_frame(TSContext *dec, const uint8_t *input)
00080 {
00081     GetBitContext gb;
00082 
00083     dec->dsp.bswap_buf((uint32_t *)dec->buffer, (const uint32_t *)input, 8);
00084     init_get_bits(&gb, dec->buffer, 32 * 8);
00085 
00086     dec->vector[7] = ts_codebook[7][get_bits(&gb, 3)];
00087     dec->vector[6] = ts_codebook[6][get_bits(&gb, 3)];
00088     dec->vector[5] = ts_codebook[5][get_bits(&gb, 3)];
00089     dec->vector[4] = ts_codebook[4][get_bits(&gb, 4)];
00090     dec->vector[3] = ts_codebook[3][get_bits(&gb, 4)];
00091     dec->vector[2] = ts_codebook[2][get_bits(&gb, 4)];
00092     dec->vector[1] = ts_codebook[1][get_bits(&gb, 5)];
00093     dec->vector[0] = ts_codebook[0][get_bits(&gb, 5)];
00094     dec->flag      = get_bits1(&gb);
00095 
00096     dec->offset1[0] = get_bits(&gb, 4) << 4;
00097     dec->offset2[3] = get_bits(&gb, 7);
00098     dec->offset2[2] = get_bits(&gb, 7);
00099     dec->offset2[1] = get_bits(&gb, 7);
00100     dec->offset2[0] = get_bits(&gb, 7);
00101 
00102     dec->offset1[1]  = get_bits(&gb, 4);
00103     dec->pulseval[1] = get_bits(&gb, 14);
00104     dec->pulseval[0] = get_bits(&gb, 14);
00105 
00106     dec->offset1[1] |= get_bits(&gb, 4) << 4;
00107     dec->pulseval[3] = get_bits(&gb, 14);
00108     dec->pulseval[2] = get_bits(&gb, 14);
00109 
00110     dec->offset1[0] |= get_bits1(&gb);
00111     dec->pulsepos[0] = get_bits_long(&gb, 27);
00112     dec->pulseoff[0] = get_bits(&gb, 4);
00113 
00114     dec->offset1[0] |= get_bits1(&gb) << 1;
00115     dec->pulsepos[1] = get_bits_long(&gb, 27);
00116     dec->pulseoff[1] = get_bits(&gb, 4);
00117 
00118     dec->offset1[0] |= get_bits1(&gb) << 2;
00119     dec->pulsepos[2] = get_bits_long(&gb, 27);
00120     dec->pulseoff[2] = get_bits(&gb, 4);
00121 
00122     dec->offset1[0] |= get_bits1(&gb) << 3;
00123     dec->pulsepos[3] = get_bits_long(&gb, 27);
00124     dec->pulseoff[3] = get_bits(&gb, 4);
00125 }
00126 
00127 static void truespeech_correlate_filter(TSContext *dec)
00128 {
00129     int16_t tmp[8];
00130     int i, j;
00131 
00132     for(i = 0; i < 8; i++){
00133         if(i > 0){
00134             memcpy(tmp, dec->cvector, i * sizeof(*tmp));
00135             for(j = 0; j < i; j++)
00136                 dec->cvector[j] = ((tmp[i - j - 1] * dec->vector[i]) +
00137                                    (dec->cvector[j] << 15) + 0x4000) >> 15;
00138         }
00139         dec->cvector[i] = (8 - dec->vector[i]) >> 3;
00140     }
00141     for(i = 0; i < 8; i++)
00142         dec->cvector[i] = (dec->cvector[i] * ts_decay_994_1000[i]) >> 15;
00143 
00144     dec->filtval = dec->vector[0];
00145 }
00146 
00147 static void truespeech_filters_merge(TSContext *dec)
00148 {
00149     int i;
00150 
00151     if(!dec->flag){
00152         for(i = 0; i < 8; i++){
00153             dec->filters[i + 0] = dec->prevfilt[i];
00154             dec->filters[i + 8] = dec->prevfilt[i];
00155         }
00156     }else{
00157         for(i = 0; i < 8; i++){
00158             dec->filters[i + 0]=(dec->cvector[i] * 21846 + dec->prevfilt[i] * 10923 + 16384) >> 15;
00159             dec->filters[i + 8]=(dec->cvector[i] * 10923 + dec->prevfilt[i] * 21846 + 16384) >> 15;
00160         }
00161     }
00162     for(i = 0; i < 8; i++){
00163         dec->filters[i + 16] = dec->cvector[i];
00164         dec->filters[i + 24] = dec->cvector[i];
00165     }
00166 }
00167 
00168 static void truespeech_apply_twopoint_filter(TSContext *dec, int quart)
00169 {
00170     int16_t tmp[146 + 60], *ptr0, *ptr1;
00171     const int16_t *filter;
00172     int i, t, off;
00173 
00174     t = dec->offset2[quart];
00175     if(t == 127){
00176         memset(dec->newvec, 0, 60 * sizeof(*dec->newvec));
00177         return;
00178     }
00179     for(i = 0; i < 146; i++)
00180         tmp[i] = dec->filtbuf[i];
00181     off = (t / 25) + dec->offset1[quart >> 1] + 18;
00182     off = av_clip(off, 0, 145);
00183     ptr0 = tmp + 145 - off;
00184     ptr1 = tmp + 146;
00185     filter = (const int16_t*)ts_order2_coeffs + (t % 25) * 2;
00186     for(i = 0; i < 60; i++){
00187         t = (ptr0[0] * filter[0] + ptr0[1] * filter[1] + 0x2000) >> 14;
00188         ptr0++;
00189         dec->newvec[i] = t;
00190         ptr1[i] = t;
00191     }
00192 }
00193 
00194 static void truespeech_place_pulses(TSContext *dec, int16_t *out, int quart)
00195 {
00196     int16_t tmp[7];
00197     int i, j, t;
00198     const int16_t *ptr1;
00199     int16_t *ptr2;
00200     int coef;
00201 
00202     memset(out, 0, 60 * sizeof(*out));
00203     for(i = 0; i < 7; i++) {
00204         t = dec->pulseval[quart] & 3;
00205         dec->pulseval[quart] >>= 2;
00206         tmp[6 - i] = ts_pulse_scales[dec->pulseoff[quart] * 4 + t];
00207     }
00208 
00209     coef = dec->pulsepos[quart] >> 15;
00210     ptr1 = (const int16_t*)ts_pulse_values + 30;
00211     ptr2 = tmp;
00212     for(i = 0, j = 3; (i < 30) && (j > 0); i++){
00213         t = *ptr1++;
00214         if(coef >= t)
00215             coef -= t;
00216         else{
00217             out[i] = *ptr2++;
00218             ptr1 += 30;
00219             j--;
00220         }
00221     }
00222     coef = dec->pulsepos[quart] & 0x7FFF;
00223     ptr1 = (const int16_t*)ts_pulse_values;
00224     for(i = 30, j = 4; (i < 60) && (j > 0); i++){
00225         t = *ptr1++;
00226         if(coef >= t)
00227             coef -= t;
00228         else{
00229             out[i] = *ptr2++;
00230             ptr1 += 30;
00231             j--;
00232         }
00233     }
00234 
00235 }
00236 
00237 static void truespeech_update_filters(TSContext *dec, int16_t *out, int quart)
00238 {
00239     int i;
00240 
00241     memmove(dec->filtbuf, &dec->filtbuf[60], 86 * sizeof(*dec->filtbuf));
00242     for(i = 0; i < 60; i++){
00243         dec->filtbuf[i + 86] = out[i] + dec->newvec[i] - (dec->newvec[i] >> 3);
00244         out[i] += dec->newvec[i];
00245     }
00246 }
00247 
00248 static void truespeech_synth(TSContext *dec, int16_t *out, int quart)
00249 {
00250     int i,k;
00251     int t[8];
00252     int16_t *ptr0, *ptr1;
00253 
00254     ptr0 = dec->tmp1;
00255     ptr1 = dec->filters + quart * 8;
00256     for(i = 0; i < 60; i++){
00257         int sum = 0;
00258         for(k = 0; k < 8; k++)
00259             sum += ptr0[k] * ptr1[k];
00260         sum = (sum + (out[i] << 12) + 0x800) >> 12;
00261         out[i] = av_clip(sum, -0x7FFE, 0x7FFE);
00262         for(k = 7; k > 0; k--)
00263             ptr0[k] = ptr0[k - 1];
00264         ptr0[0] = out[i];
00265     }
00266 
00267     for(i = 0; i < 8; i++)
00268         t[i] = (ts_decay_35_64[i] * ptr1[i]) >> 15;
00269 
00270     ptr0 = dec->tmp2;
00271     for(i = 0; i < 60; i++){
00272         int sum = 0;
00273         for(k = 0; k < 8; k++)
00274             sum += ptr0[k] * t[k];
00275         for(k = 7; k > 0; k--)
00276             ptr0[k] = ptr0[k - 1];
00277         ptr0[0] = out[i];
00278         out[i] = ((out[i] << 12) - sum) >> 12;
00279     }
00280 
00281     for(i = 0; i < 8; i++)
00282         t[i] = (ts_decay_3_4[i] * ptr1[i]) >> 15;
00283 
00284     ptr0 = dec->tmp3;
00285     for(i = 0; i < 60; i++){
00286         int sum = out[i] << 12;
00287         for(k = 0; k < 8; k++)
00288             sum += ptr0[k] * t[k];
00289         for(k = 7; k > 0; k--)
00290             ptr0[k] = ptr0[k - 1];
00291         ptr0[0] = av_clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE);
00292 
00293         sum = ((ptr0[1] * (dec->filtval - (dec->filtval >> 2))) >> 4) + sum;
00294         sum = sum - (sum >> 3);
00295         out[i] = av_clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE);
00296     }
00297 }
00298 
00299 static void truespeech_save_prevvec(TSContext *c)
00300 {
00301     int i;
00302 
00303     for(i = 0; i < 8; i++)
00304         c->prevfilt[i] = c->cvector[i];
00305 }
00306 
00307 static int truespeech_decode_frame(AVCodecContext *avctx, void *data,
00308                                    int *got_frame_ptr, AVPacket *avpkt)
00309 {
00310     const uint8_t *buf = avpkt->data;
00311     int buf_size = avpkt->size;
00312     TSContext *c = avctx->priv_data;
00313 
00314     int i, j;
00315     int16_t *samples;
00316     int iterations, ret;
00317 
00318     iterations = buf_size / 32;
00319 
00320     if (!iterations) {
00321         av_log(avctx, AV_LOG_ERROR,
00322                "Too small input buffer (%d bytes), need at least 32 bytes\n", buf_size);
00323         return -1;
00324     }
00325 
00326     /* get output buffer */
00327     c->frame.nb_samples = iterations * 240;
00328     if ((ret = avctx->get_buffer(avctx, &c->frame)) < 0) {
00329         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
00330         return ret;
00331     }
00332     samples = (int16_t *)c->frame.data[0];
00333 
00334     memset(samples, 0, iterations * 240 * sizeof(*samples));
00335 
00336     for(j = 0; j < iterations; j++) {
00337         truespeech_read_frame(c, buf);
00338         buf += 32;
00339 
00340         truespeech_correlate_filter(c);
00341         truespeech_filters_merge(c);
00342 
00343         for(i = 0; i < 4; i++) {
00344             truespeech_apply_twopoint_filter(c, i);
00345             truespeech_place_pulses  (c, samples, i);
00346             truespeech_update_filters(c, samples, i);
00347             truespeech_synth         (c, samples, i);
00348             samples += 60;
00349         }
00350 
00351         truespeech_save_prevvec(c);
00352     }
00353 
00354     *got_frame_ptr   = 1;
00355     *(AVFrame *)data = c->frame;
00356 
00357     return buf_size;
00358 }
00359 
00360 AVCodec ff_truespeech_decoder = {
00361     .name           = "truespeech",
00362     .type           = AVMEDIA_TYPE_AUDIO,
00363     .id             = CODEC_ID_TRUESPEECH,
00364     .priv_data_size = sizeof(TSContext),
00365     .init           = truespeech_decode_init,
00366     .decode         = truespeech_decode_frame,
00367     .capabilities   = CODEC_CAP_DR1,
00368     .long_name      = NULL_IF_CONFIG_SMALL("DSP Group TrueSpeech"),
00369 };