FFmpeg: libavcodec/wmavoice.c Source File

00001 /*
00002  * Windows Media Audio Voice decoder.
00003  * Copyright (c) 2009 Ronald S. Bultje
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #define UNCHECKED_BITSTREAM_READER 1
00029 
00030 #include <math.h>
00031 #include "avcodec.h"
00032 #include "get_bits.h"
00033 #include "put_bits.h"
00034 #include "wmavoice_data.h"
00035 #include "celp_math.h"
00036 #include "celp_filters.h"
00037 #include "acelp_vectors.h"
00038 #include "acelp_filters.h"
00039 #include "lsp.h"
00040 #include "libavutil/lzo.h"
00041 #include "dct.h"
00042 #include "rdft.h"
00043 #include "sinewin.h"
00044 
00045 #define MAX_BLOCKS           8   
00046 #define MAX_LSPS             16  
00047 #define MAX_LSPS_ALIGN16     16  
00048 
00049 #define MAX_FRAMES           3   
00050 #define MAX_FRAMESIZE        160 
00051 #define MAX_SIGNAL_HISTORY   416 
00052 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00054 #define SFRAME_CACHE_MAXSIZE 256 
00055 
00056 #define VLC_NBITS            6   
00057 
00058 
00061 static VLC frame_type_vlc;
00062 
00066 enum {
00067     ACB_TYPE_NONE       = 0, 
00068     ACB_TYPE_ASYMMETRIC = 1, 
00069 
00070 
00071 
00072 
00073     ACB_TYPE_HAMMING    = 2  
00074 
00075 
00076 };
00077 
00081 enum {
00082     FCB_TYPE_SILENCE    = 0, 
00083 
00084 
00085     FCB_TYPE_HARDCODED  = 1, 
00086 
00087     FCB_TYPE_AW_PULSES  = 2, 
00088 
00089     FCB_TYPE_EXC_PULSES = 3, 
00090 
00091 
00092 };
00093 
00097 static const struct frame_type_desc {
00098     uint8_t n_blocks;     
00099 
00100     uint8_t log_n_blocks; 
00101     uint8_t acb_type;     
00102     uint8_t fcb_type;     
00103     uint8_t dbl_pulses;   
00104 
00105 
00106     uint16_t frame_size;  
00107 
00108 } frame_descs[17] = {
00109     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00110     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00111     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00112     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00113     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00114     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00115     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00116     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00117     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00118     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00119     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00120     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00121     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00122     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00123     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00124     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00125     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00126 };
00127 
00131 typedef struct {
00136     AVFrame frame;
00137     GetBitContext gb;             
00138 
00139 
00140 
00141     int8_t vbm_tree[25];          
00142 
00143     int spillover_bitsize;        
00144 
00145 
00146     int history_nsamples;         
00147 
00148 
00149     /* postfilter specific values */
00150     int do_apf;                   
00151 
00152     int denoise_strength;         
00153 
00154     int denoise_tilt_corr;        
00155 
00156     int dc_level;                 
00157 
00158 
00159     int lsps;                     
00160     int lsp_q_mode;               
00161     int lsp_def_mode;             
00162 
00163     int frame_lsp_bitsize;        
00164 
00165     int sframe_lsp_bitsize;       
00166 
00167 
00168     int min_pitch_val;            
00169     int max_pitch_val;            
00170     int pitch_nbits;              
00171 
00172     int block_pitch_nbits;        
00173 
00174     int block_pitch_range;        
00175     int block_delta_pitch_nbits;  
00176 
00177 
00178 
00179     int block_delta_pitch_hrange; 
00180 
00181     uint16_t block_conv_table[4]; 
00182 
00183 
00193     int spillover_nbits;          
00194 
00195 
00196 
00197     int has_residual_lsps;        
00198 
00199 
00200 
00201 
00202     int skip_bits_next;           
00203 
00204 
00205 
00206     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00209     int sframe_cache_size;        
00210 
00211 
00212 
00213 
00214     PutBitContext pb;             
00215 
00225     double prev_lsps[MAX_LSPS];   
00226 
00227     int last_pitch_val;           
00228     int last_acb_type;            
00229     int pitch_diff_sh16;          
00230 
00231     float silence_gain;           
00232 
00233     int aw_idx_is_ext;            
00234 
00235     int aw_pulse_range;           
00236 
00237 
00238 
00239 
00240 
00241     int aw_n_pulses[2];           
00242 
00243 
00244     int aw_first_pulse_off[2];    
00245 
00246     int aw_next_pulse_off_cache;  
00247 
00248 
00249 
00250 
00251 
00252     int frame_cntr;               
00253 
00254     float gain_pred_err[6];       
00255     float excitation_history[MAX_SIGNAL_HISTORY];
00259     float synth_history[MAX_LSPS]; 
00260 
00269     RDFTContext rdft, irdft;      
00270 
00271     DCTContext dct, dst;          
00272 
00273     float sin[511], cos[511];     
00274 
00275     float postfilter_agc;         
00276 
00277     float dcf_mem[2];             
00278     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00281     float denoise_filter_cache[MAX_FRAMESIZE];
00282     int   denoise_filter_cache_size; 
00283     DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00285     DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00287     DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00290 
00293 } WMAVoiceContext;
00294 
00304 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00305 {
00306     static const uint8_t bits[] = {
00307          2,  2,  2,  4,  4,  4,
00308          6,  6,  6,  8,  8,  8,
00309         10, 10, 10, 12, 12, 12,
00310         14, 14, 14, 14
00311     };
00312     static const uint16_t codes[] = {
00313           0x0000, 0x0001, 0x0002,        //              00/01/10
00314           0x000c, 0x000d, 0x000e,        //           11+00/01/10
00315           0x003c, 0x003d, 0x003e,        //         1111+00/01/10
00316           0x00fc, 0x00fd, 0x00fe,        //       111111+00/01/10
00317           0x03fc, 0x03fd, 0x03fe,        //     11111111+00/01/10
00318           0x0ffc, 0x0ffd, 0x0ffe,        //   1111111111+00/01/10
00319           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
00320     };
00321     int cntr[8], n, res;
00322 
00323     memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00324     memset(cntr,     0,    sizeof(cntr));
00325     for (n = 0; n < 17; n++) {
00326         res = get_bits(gb, 3);
00327         if (cntr[res] > 3) // should be >= 3 + (res == 7))
00328             return -1;
00329         vbm_tree[res * 3 + cntr[res]++] = n;
00330     }
00331     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00332                     bits, 1, 1, codes, 2, 2, 132);
00333     return 0;
00334 }
00335 
00339 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00340 {
00341     int n, flags, pitch_range, lsp16_flag;
00342     WMAVoiceContext *s = ctx->priv_data;
00343 
00352     if (ctx->extradata_size != 46) {
00353         av_log(ctx, AV_LOG_ERROR,
00354                "Invalid extradata size %d (should be 46)\n",
00355                ctx->extradata_size);
00356         return -1;
00357     }
00358     flags                = AV_RL32(ctx->extradata + 18);
00359     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00360     s->do_apf            =    flags & 0x1;
00361     if (s->do_apf) {
00362         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00363         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00364         ff_dct_init(&s->dct,  6, DCT_I);
00365         ff_dct_init(&s->dst,  6, DST_I);
00366 
00367         ff_sine_window_init(s->cos, 256);
00368         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00369         for (n = 0; n < 255; n++) {
00370             s->sin[n]       = -s->sin[510 - n];
00371             s->cos[510 - n] =  s->cos[n];
00372         }
00373     }
00374     s->denoise_strength  =   (flags >> 2) & 0xF;
00375     if (s->denoise_strength >= 12) {
00376         av_log(ctx, AV_LOG_ERROR,
00377                "Invalid denoise filter strength %d (max=11)\n",
00378                s->denoise_strength);
00379         return -1;
00380     }
00381     s->denoise_tilt_corr = !!(flags & 0x40);
00382     s->dc_level          =   (flags >> 7) & 0xF;
00383     s->lsp_q_mode        = !!(flags & 0x2000);
00384     s->lsp_def_mode      = !!(flags & 0x4000);
00385     lsp16_flag           =    flags & 0x1000;
00386     if (lsp16_flag) {
00387         s->lsps               = 16;
00388         s->frame_lsp_bitsize  = 34;
00389         s->sframe_lsp_bitsize = 60;
00390     } else {
00391         s->lsps               = 10;
00392         s->frame_lsp_bitsize  = 24;
00393         s->sframe_lsp_bitsize = 48;
00394     }
00395     for (n = 0; n < s->lsps; n++)
00396         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00397 
00398     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00399     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00400         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00401         return -1;
00402     }
00403 
00404     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00405     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00406     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00407     if (pitch_range <= 0) {
00408         av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00409         return -1;
00410     }
00411     s->pitch_nbits      = av_ceil_log2(pitch_range);
00412     s->last_pitch_val   = 40;
00413     s->last_acb_type    = ACB_TYPE_NONE;
00414     s->history_nsamples = s->max_pitch_val + 8;
00415 
00416     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00417         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00418             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00419 
00420         av_log(ctx, AV_LOG_ERROR,
00421                "Unsupported samplerate %d (min=%d, max=%d)\n",
00422                ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
00423 
00424         return -1;
00425     }
00426 
00427     s->block_conv_table[0]      = s->min_pitch_val;
00428     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00429     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00430     s->block_conv_table[3]      = s->max_pitch_val - 1;
00431     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00432     if (s->block_delta_pitch_hrange <= 0) {
00433         av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00434         return -1;
00435     }
00436     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00437     s->block_pitch_range        = s->block_conv_table[2] +
00438                                   s->block_conv_table[3] + 1 +
00439                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00440     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00441 
00442     ctx->sample_fmt             = AV_SAMPLE_FMT_FLT;
00443 
00444     avcodec_get_frame_defaults(&s->frame);
00445     ctx->coded_frame = &s->frame;
00446 
00447     return 0;
00448 }
00449 
00471 static void adaptive_gain_control(float *out, const float *in,
00472                                   const float *speech_synth,
00473                                   int size, float alpha, float *gain_mem)
00474 {
00475     int i;
00476     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00477     float mem = *gain_mem;
00478 
00479     for (i = 0; i < size; i++) {
00480         speech_energy     += fabsf(speech_synth[i]);
00481         postfilter_energy += fabsf(in[i]);
00482     }
00483     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00484 
00485     for (i = 0; i < size; i++) {
00486         mem = alpha * mem + gain_scale_factor;
00487         out[i] = in[i] * mem;
00488     }
00489 
00490     *gain_mem = mem;
00491 }
00492 
00511 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00512                            const float *in, float *out, int size)
00513 {
00514     int n;
00515     float optimal_gain = 0, dot;
00516     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00517                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00518                 *best_hist_ptr;
00519 
00520     /* find best fitting point in history */
00521     do {
00522         dot = ff_dot_productf(in, ptr, size);
00523         if (dot > optimal_gain) {
00524             optimal_gain  = dot;
00525             best_hist_ptr = ptr;
00526         }
00527     } while (--ptr >= end);
00528 
00529     if (optimal_gain <= 0)
00530         return -1;
00531     dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00532     if (dot <= 0) // would be 1.0
00533         return -1;
00534 
00535     if (optimal_gain <= dot) {
00536         dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
00537     } else
00538         dot = 0.625;
00539 
00540     /* actual smoothing */
00541     for (n = 0; n < size; n++)
00542         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00543 
00544     return 0;
00545 }
00546 
00557 static float tilt_factor(const float *lpcs, int n_lpcs)
00558 {
00559     float rh0, rh1;
00560 
00561     rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
00562     rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00563 
00564     return rh1 / rh0;
00565 }
00566 
00570 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00571                                 int fcb_type, float *coeffs, int remainder)
00572 {
00573     float last_coeff, min = 15.0, max = -15.0;
00574     float irange, angle_mul, gain_mul, range, sq;
00575     int n, idx;
00576 
00577     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
00578     s->rdft.rdft_calc(&s->rdft, lpcs);
00579 #define log_range(var, assign) do { \
00580         float tmp = log10f(assign);  var = tmp; \
00581         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00582     } while (0)
00583     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00584     for (n = 1; n < 64; n++)
00585         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00586                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00587     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00588 #undef log_range
00589     range    = max - min;
00590     lpcs[64] = last_coeff;
00591 
00592     /* Now, use this spectrum to pick out these frequencies with higher
00593      * (relative) power/energy (which we then take to be "not noise"),
00594      * and set up a table (still in lpc[]) of (relative) gains per frequency.
00595      * These frequencies will be maintained, while others ("noise") will be
00596      * decreased in the filter output. */
00597     irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
00598     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00599                                                           (5.0 / 14.7));
00600     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00601     for (n = 0; n <= 64; n++) {
00602         float pwr;
00603 
00604         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00605         pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00606         lpcs[n] = angle_mul * pwr;
00607 
00608         /* 70.57 =~ 1/log10(1.0331663) */
00609         idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00610         if (idx > 127) { // fallback if index falls outside table range
00611             coeffs[n] = wmavoice_energy_table[127] *
00612                         powf(1.0331663, idx - 127);
00613         } else
00614             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00615     }
00616 
00617     /* calculate the Hilbert transform of the gains, which we do (since this
00618      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
00619      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
00620      * "moment" of the LPCs in this filter. */
00621     s->dct.dct_calc(&s->dct, lpcs);
00622     s->dst.dct_calc(&s->dst, lpcs);
00623 
00624     /* Split out the coefficient indexes into phase/magnitude pairs */
00625     idx = 255 + av_clip(lpcs[64],               -255, 255);
00626     coeffs[0]  = coeffs[0]  * s->cos[idx];
00627     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00628     last_coeff = coeffs[64] * s->cos[idx];
00629     for (n = 63;; n--) {
00630         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00633 
00634         if (!--n) break;
00635 
00636         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00637         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00638         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00639     }
00640     coeffs[1] = last_coeff;
00641 
00642     /* move into real domain */
00643     s->irdft.rdft_calc(&s->irdft, coeffs);
00644 
00645     /* tilt correction and normalize scale */
00646     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00647     if (s->denoise_tilt_corr) {
00648         float tilt_mem = 0;
00649 
00650         coeffs[remainder - 1] = 0;
00651         ff_tilt_compensation(&tilt_mem,
00652                              -1.8 * tilt_factor(coeffs, remainder - 1),
00653                              coeffs, remainder);
00654     }
00655     sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00656     for (n = 0; n < remainder; n++)
00657         coeffs[n] *= sq;
00658 }
00659 
00686 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00687                            float *synth_pf, int size,
00688                            const float *lpcs)
00689 {
00690     int remainder, lim, n;
00691 
00692     if (fcb_type != FCB_TYPE_SILENCE) {
00693         float *tilted_lpcs = s->tilted_lpcs_pf,
00694               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00695 
00696         tilted_lpcs[0]           = 1.0;
00697         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00698         memset(&tilted_lpcs[s->lsps + 1], 0,
00699                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00700         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00701                              tilted_lpcs, s->lsps + 2);
00702 
00703         /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
00704          * size is applied to the next frame. All input beyond this is zero,
00705          * and thus all output beyond this will go towards zero, hence we can
00706          * limit to min(size-1, 127-size) as a performance consideration. */
00707         remainder = FFMIN(127 - size, size - 1);
00708         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00709 
00710         /* apply coefficients (in frequency spectrum domain), i.e. complex
00711          * number multiplication */
00712         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00713         s->rdft.rdft_calc(&s->rdft, synth_pf);
00714         s->rdft.rdft_calc(&s->rdft, coeffs);
00715         synth_pf[0] *= coeffs[0];
00716         synth_pf[1] *= coeffs[1];
00717         for (n = 1; n < 64; n++) {
00718             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00719             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00720             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00721         }
00722         s->irdft.rdft_calc(&s->irdft, synth_pf);
00723     }
00724 
00725     /* merge filter output with the history of previous runs */
00726     if (s->denoise_filter_cache_size) {
00727         lim = FFMIN(s->denoise_filter_cache_size, size);
00728         for (n = 0; n < lim; n++)
00729             synth_pf[n] += s->denoise_filter_cache[n];
00730         s->denoise_filter_cache_size -= lim;
00731         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00732                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00733     }
00734 
00735     /* move remainder of filter output into a cache for future runs */
00736     if (fcb_type != FCB_TYPE_SILENCE) {
00737         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00738         for (n = 0; n < lim; n++)
00739             s->denoise_filter_cache[n] += synth_pf[size + n];
00740         if (lim < remainder) {
00741             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00742                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00743             s->denoise_filter_cache_size = remainder;
00744         }
00745     }
00746 }
00747 
00768 static void postfilter(WMAVoiceContext *s, const float *synth,
00769                        float *samples,    int size,
00770                        const float *lpcs, float *zero_exc_pf,
00771                        int fcb_type,      int pitch)
00772 {
00773     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00774           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00775           *synth_filter_in = zero_exc_pf;
00776 
00777     assert(size <= MAX_FRAMESIZE / 2);
00778 
00779     /* generate excitation from input signal */
00780     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00781 
00782     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00783         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00784         synth_filter_in = synth_filter_in_buf;
00785 
00786     /* re-synthesize speech after smoothening, and keep history */
00787     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00788                                  synth_filter_in, size, s->lsps);
00789     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00790            sizeof(synth_pf[0]) * s->lsps);
00791 
00792     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00793 
00794     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00795                           &s->postfilter_agc);
00796 
00797     if (s->dc_level > 8) {
00798         /* remove ultra-low frequency DC noise / highpass filter;
00799          * coefficients are identical to those used in SIPR decoding,
00800          * and very closely resemble those used in AMR-NB decoding. */
00801         ff_acelp_apply_order_2_transfer_function(samples, samples,
00802             (const float[2]) { -1.99997,      1.0 },
00803             (const float[2]) { -1.9330735188, 0.93589198496 },
00804             0.93980580475, s->dcf_mem, size);
00805     }
00806 }
00822 static void dequant_lsps(double *lsps, int num,
00823                          const uint16_t *values,
00824                          const uint16_t *sizes,
00825                          int n_stages, const uint8_t *table,
00826                          const double *mul_q,
00827                          const double *base_q)
00828 {
00829     int n, m;
00830 
00831     memset(lsps, 0, num * sizeof(*lsps));
00832     for (n = 0; n < n_stages; n++) {
00833         const uint8_t *t_off = &table[values[n] * num];
00834         double base = base_q[n], mul = mul_q[n];
00835 
00836         for (m = 0; m < num; m++)
00837             lsps[m] += base + mul * t_off[m];
00838 
00839         table += sizes[n] * num;
00840     }
00841 }
00842 
00854 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00855 {
00856     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00857     static const double mul_lsf[4] = {
00858         5.2187144800e-3,    1.4626986422e-3,
00859         9.6179549166e-4,    1.1325736225e-3
00860     };
00861     static const double base_lsf[4] = {
00862         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00863         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00864     };
00865     uint16_t v[4];
00866 
00867     v[0] = get_bits(gb, 8);
00868     v[1] = get_bits(gb, 6);
00869     v[2] = get_bits(gb, 5);
00870     v[3] = get_bits(gb, 5);
00871 
00872     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00873                  mul_lsf, base_lsf);
00874 }
00875 
00880 static void dequant_lsp10r(GetBitContext *gb,
00881                            double *i_lsps, const double *old,
00882                            double *a1, double *a2, int q_mode)
00883 {
00884     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00885     static const double mul_lsf[3] = {
00886         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00887     };
00888     static const double base_lsf[3] = {
00889         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00890     };
00891     const float (*ipol_tab)[2][10] = q_mode ?
00892         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00893     uint16_t interpol, v[3];
00894     int n;
00895 
00896     dequant_lsp10i(gb, i_lsps);
00897 
00898     interpol = get_bits(gb, 5);
00899     v[0]     = get_bits(gb, 7);
00900     v[1]     = get_bits(gb, 6);
00901     v[2]     = get_bits(gb, 6);
00902 
00903     for (n = 0; n < 10; n++) {
00904         double delta = old[n] - i_lsps[n];
00905         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00906         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00907     }
00908 
00909     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00910                  mul_lsf, base_lsf);
00911 }
00912 
00916 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00917 {
00918     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00919     static const double mul_lsf[5] = {
00920         3.3439586280e-3,    6.9908173703e-4,
00921         3.3216608306e-3,    1.0334960326e-3,
00922         3.1899104283e-3
00923     };
00924     static const double base_lsf[5] = {
00925         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00926         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00927         M_PI * -1.29816e-1
00928     };
00929     uint16_t v[5];
00930 
00931     v[0] = get_bits(gb, 8);
00932     v[1] = get_bits(gb, 6);
00933     v[2] = get_bits(gb, 7);
00934     v[3] = get_bits(gb, 6);
00935     v[4] = get_bits(gb, 7);
00936 
00937     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00938                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00939     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00940                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00941     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00942                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00943 }
00944 
00949 static void dequant_lsp16r(GetBitContext *gb,
00950                            double *i_lsps, const double *old,
00951                            double *a1, double *a2, int q_mode)
00952 {
00953     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00954     static const double mul_lsf[3] = {
00955         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00956     };
00957     static const double base_lsf[3] = {
00958         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00959     };
00960     const float (*ipol_tab)[2][16] = q_mode ?
00961         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00962     uint16_t interpol, v[3];
00963     int n;
00964 
00965     dequant_lsp16i(gb, i_lsps);
00966 
00967     interpol = get_bits(gb, 5);
00968     v[0]     = get_bits(gb, 7);
00969     v[1]     = get_bits(gb, 7);
00970     v[2]     = get_bits(gb, 7);
00971 
00972     for (n = 0; n < 16; n++) {
00973         double delta = old[n] - i_lsps[n];
00974         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00975         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00976     }
00977 
00978     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00979                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00980     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00981                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00982     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00983                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00984 }
00985 
00999 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01000                             const int *pitch)
01001 {
01002     static const int16_t start_offset[94] = {
01003         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
01004          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
01005          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
01006          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
01007          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
01008          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
01009         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01010         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01011     };
01012     int bits, offset;
01013 
01014     /* position of pulse */
01015     s->aw_idx_is_ext = 0;
01016     if ((bits = get_bits(gb, 6)) >= 54) {
01017         s->aw_idx_is_ext = 1;
01018         bits += (bits - 54) * 3 + get_bits(gb, 2);
01019     }
01020 
01021     /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
01022      * the distribution of the pulses in each block contained in this frame. */
01023     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01024     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01025     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01026     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01027     offset                  += s->aw_n_pulses[0] * pitch[0];
01028     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01029     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01030 
01031     /* if continuing from a position before the block, reset position to
01032      * start of block (when corrected for the range over which it can be
01033      * spread in aw_pulse_set1()). */
01034     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01035         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01036             s->aw_first_pulse_off[1] -= pitch[1];
01037         if (start_offset[bits] < 0)
01038             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01039                 s->aw_first_pulse_off[0] -= pitch[0];
01040     }
01041 }
01042 
01050 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01051                           int block_idx, AMRFixed *fcb)
01052 {
01053     uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
01054     uint16_t *use_mask = use_mask_mem + 2;
01055     /* in this function, idx is the index in the 80-bit (+ padding) use_mask
01056      * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
01057      * of idx are the position of the bit within a particular item in the
01058      * array (0 being the most significant bit, and 15 being the least
01059      * significant bit), and the remainder (>> 4) is the index in the
01060      * use_mask[]-array. This is faster and uses less memory than using a
01061      * 80-byte/80-int array. */
01062     int pulse_off = s->aw_first_pulse_off[block_idx],
01063         pulse_start, n, idx, range, aidx, start_off = 0;
01064 
01065     /* set offset of first pulse to within this block */
01066     if (s->aw_n_pulses[block_idx] > 0)
01067         while (pulse_off + s->aw_pulse_range < 1)
01068             pulse_off += fcb->pitch_lag;
01069 
01070     /* find range per pulse */
01071     if (s->aw_n_pulses[0] > 0) {
01072         if (block_idx == 0) {
01073             range = 32;
01074         } else /* block_idx = 1 */ {
01075             range = 8;
01076             if (s->aw_n_pulses[block_idx] > 0)
01077                 pulse_off = s->aw_next_pulse_off_cache;
01078         }
01079     } else
01080         range = 16;
01081     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01082 
01083     /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
01084      * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
01085      * we exclude that range from being pulsed again in this function. */
01086     memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01087     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01088     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01089     if (s->aw_n_pulses[block_idx] > 0)
01090         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01091             int excl_range         = s->aw_pulse_range; // always 16 or 24
01092             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01093             int first_sh           = 16 - (idx & 15);
01094             *use_mask_ptr++       &= 0xFFFFu << first_sh;
01095             excl_range            -= first_sh;
01096             if (excl_range >= 16) {
01097                 *use_mask_ptr++    = 0;
01098                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01099             } else
01100                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01101         }
01102 
01103     /* find the 'aidx'th offset that is not excluded */
01104     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01105     for (n = 0; n <= aidx; pulse_start++) {
01106         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01107         if (idx >= MAX_FRAMESIZE / 2) { // find from zero
01108             if (use_mask[0])      idx = 0x0F;
01109             else if (use_mask[1]) idx = 0x1F;
01110             else if (use_mask[2]) idx = 0x2F;
01111             else if (use_mask[3]) idx = 0x3F;
01112             else if (use_mask[4]) idx = 0x4F;
01113             else                  return;
01114             idx -= av_log2_16bit(use_mask[idx >> 4]);
01115         }
01116         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01117             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01118             n++;
01119             start_off = idx;
01120         }
01121     }
01122 
01123     fcb->x[fcb->n] = start_off;
01124     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01125     fcb->n++;
01126 
01127     /* set offset for next block, relative to start of that block */
01128     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01129     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01130 }
01131 
01139 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01140                           int block_idx, AMRFixed *fcb)
01141 {
01142     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01143     float v;
01144 
01145     if (s->aw_n_pulses[block_idx] > 0) {
01146         int n, v_mask, i_mask, sh, n_pulses;
01147 
01148         if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
01149             n_pulses = 3;
01150             v_mask   = 8;
01151             i_mask   = 7;
01152             sh       = 4;
01153         } else { // 4 pulses, 1:sign + 2:index each
01154             n_pulses = 4;
01155             v_mask   = 4;
01156             i_mask   = 3;
01157             sh       = 3;
01158         }
01159 
01160         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01161             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01162             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01163                                  s->aw_first_pulse_off[block_idx];
01164             while (fcb->x[fcb->n] < 0)
01165                 fcb->x[fcb->n] += fcb->pitch_lag;
01166             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01167                 fcb->n++;
01168         }
01169     } else {
01170         int num2 = (val & 0x1FF) >> 1, delta, idx;
01171 
01172         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01173         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01174         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01175         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01176         v = (val & 0x200) ? -1.0 : 1.0;
01177 
01178         fcb->no_repeat_mask |= 3 << fcb->n;
01179         fcb->x[fcb->n]       = idx - delta;
01180         fcb->y[fcb->n]       = v;
01181         fcb->x[fcb->n + 1]   = idx;
01182         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01183         fcb->n              += 2;
01184     }
01185 }
01186 
01200 static int pRNG(int frame_cntr, int block_num, int block_size)
01201 {
01202     /* array to simplify the calculation of z:
01203      * y = (x % 9) * 5 + 6;
01204      * z = (49995 * x) / y;
01205      * Since y only has 9 values, we can remove the division by using a
01206      * LUT and using FASTDIV-style divisions. For each of the 9 values
01207      * of y, we can rewrite z as:
01208      * z = x * (49995 / y) + x * ((49995 % y) / y)
01209      * In this table, each col represents one possible value of y, the
01210      * first number is 49995 / y, and the second is the FASTDIV variant
01211      * of 49995 % y / y. */
01212     static const unsigned int div_tbl[9][2] = {
01213         { 8332,  3 * 715827883U }, // y =  6
01214         { 4545,  0 * 390451573U }, // y = 11
01215         { 3124, 11 * 268435456U }, // y = 16
01216         { 2380, 15 * 204522253U }, // y = 21
01217         { 1922, 23 * 165191050U }, // y = 26
01218         { 1612, 23 * 138547333U }, // y = 31
01219         { 1388, 27 * 119304648U }, // y = 36
01220         { 1219, 16 * 104755300U }, // y = 41
01221         { 1086, 39 *  93368855U }  // y = 46
01222     };
01223     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01224     if (x >= 0xFFFF) x -= 0xFFFF;   // max value of x is 8*1877+0xFFFE=0x13AA6,
01225                                     // so this is effectively a modulo (%)
01226     y = x - 9 * MULH(477218589, x); // x % 9
01227     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01228                                     // z = x * 49995 / (y * 5 + 6)
01229     return z % (1000 - block_size);
01230 }
01231 
01236 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01237                                  int block_idx, int size,
01238                                  const struct frame_type_desc *frame_desc,
01239                                  float *excitation)
01240 {
01241     float gain;
01242     int n, r_idx;
01243 
01244     assert(size <= MAX_FRAMESIZE);
01245 
01246     /* Set the offset from which we start reading wmavoice_std_codebook */
01247     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01248         r_idx = pRNG(s->frame_cntr, block_idx, size);
01249         gain  = s->silence_gain;
01250     } else /* FCB_TYPE_HARDCODED */ {
01251         r_idx = get_bits(gb, 8);
01252         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01253     }
01254 
01255     /* Clear gain prediction parameters */
01256     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01257 
01258     /* Apply gain to hardcoded codebook and use that as excitation signal */
01259     for (n = 0; n < size; n++)
01260         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01261 }
01262 
01267 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01268                                 int block_idx, int size,
01269                                 int block_pitch_sh2,
01270                                 const struct frame_type_desc *frame_desc,
01271                                 float *excitation)
01272 {
01273     static const float gain_coeff[6] = {
01274         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01275     };
01276     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01277     int n, idx, gain_weight;
01278     AMRFixed fcb;
01279 
01280     assert(size <= MAX_FRAMESIZE / 2);
01281     memset(pulses, 0, sizeof(*pulses) * size);
01282 
01283     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01284     fcb.pitch_fac      = 1.0;
01285     fcb.no_repeat_mask = 0;
01286     fcb.n              = 0;
01287 
01288     /* For the other frame types, this is where we apply the innovation
01289      * (fixed) codebook pulses of the speech signal. */
01290     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01291         aw_pulse_set1(s, gb, block_idx, &fcb);
01292         aw_pulse_set2(s, gb, block_idx, &fcb);
01293     } else /* FCB_TYPE_EXC_PULSES */ {
01294         int offset_nbits = 5 - frame_desc->log_n_blocks;
01295 
01296         fcb.no_repeat_mask = -1;
01297         /* similar to ff_decode_10_pulses_35bits(), but with single pulses
01298          * (instead of double) for a subset of pulses */
01299         for (n = 0; n < 5; n++) {
01300             float sign;
01301             int pos1, pos2;
01302 
01303             sign           = get_bits1(gb) ? 1.0 : -1.0;
01304             pos1           = get_bits(gb, offset_nbits);
01305             fcb.x[fcb.n]   = n + 5 * pos1;
01306             fcb.y[fcb.n++] = sign;
01307             if (n < frame_desc->dbl_pulses) {
01308                 pos2           = get_bits(gb, offset_nbits);
01309                 fcb.x[fcb.n]   = n + 5 * pos2;
01310                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01311             }
01312         }
01313     }
01314     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01315 
01316     /* Calculate gain for adaptive & fixed codebook signal.
01317      * see ff_amr_set_fixed_gain(). */
01318     idx = get_bits(gb, 7);
01319     fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01320                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01321     acb_gain = wmavoice_gain_codebook_acb[idx];
01322     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01323                         -2.9957322736 /* log(0.05) */,
01324                          1.6094379124 /* log(5.0)  */);
01325 
01326     gain_weight = 8 >> frame_desc->log_n_blocks;
01327     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01328             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01329     for (n = 0; n < gain_weight; n++)
01330         s->gain_pred_err[n] = pred_err;
01331 
01332     /* Calculation of adaptive codebook */
01333     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01334         int len;
01335         for (n = 0; n < size; n += len) {
01336             int next_idx_sh16;
01337             int abs_idx    = block_idx * size + n;
01338             int pitch_sh16 = (s->last_pitch_val << 16) +
01339                              s->pitch_diff_sh16 * abs_idx;
01340             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01341             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01342             idx            = idx_sh16 >> 16;
01343             if (s->pitch_diff_sh16) {
01344                 if (s->pitch_diff_sh16 > 0) {
01345                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01346                 } else
01347                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01348                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01349                               1, size - n);
01350             } else
01351                 len = size;
01352 
01353             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01354                                   wmavoice_ipol1_coeffs, 17,
01355                                   idx, 9, len);
01356         }
01357     } else /* ACB_TYPE_HAMMING */ {
01358         int block_pitch = block_pitch_sh2 >> 2;
01359         idx             = block_pitch_sh2 & 3;
01360         if (idx) {
01361             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01362                                   wmavoice_ipol2_coeffs, 4,
01363                                   idx, 8, size);
01364         } else
01365             av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01366                               sizeof(float) * size);
01367     }
01368 
01369     /* Interpolate ACB/FCB and use as excitation signal */
01370     ff_weighted_vector_sumf(excitation, excitation, pulses,
01371                             acb_gain, fcb_gain, size);
01372 }
01373 
01390 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01391                         int block_idx, int size,
01392                         int block_pitch_sh2,
01393                         const double *lsps, const double *prev_lsps,
01394                         const struct frame_type_desc *frame_desc,
01395                         float *excitation, float *synth)
01396 {
01397     double i_lsps[MAX_LSPS];
01398     float lpcs[MAX_LSPS];
01399     float fac;
01400     int n;
01401 
01402     if (frame_desc->acb_type == ACB_TYPE_NONE)
01403         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01404     else
01405         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01406                             frame_desc, excitation);
01407 
01408     /* convert interpolated LSPs to LPCs */
01409     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01410     for (n = 0; n < s->lsps; n++) // LSF -> LSP
01411         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01412     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01413 
01414     /* Speech synthesis */
01415     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01416 }
01417 
01433 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01434                        float *samples,
01435                        const double *lsps, const double *prev_lsps,
01436                        float *excitation, float *synth)
01437 {
01438     WMAVoiceContext *s = ctx->priv_data;
01439     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01440     int pitch[MAX_BLOCKS], last_block_pitch;
01441 
01442     /* Parse frame type ("frame header"), see frame_descs */
01443     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01444 
01445     if (bd_idx < 0) {
01446         av_log(ctx, AV_LOG_ERROR,
01447                "Invalid frame type VLC code, skipping\n");
01448         return -1;
01449     }
01450 
01451     block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01452 
01453     /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
01454     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01455         /* Pitch is provided per frame, which is interpreted as the pitch of
01456          * the last sample of the last block of this frame. We can interpolate
01457          * the pitch of other blocks (and even pitch-per-sample) by gradually
01458          * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
01459         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01460         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01461         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01462         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01463         if (s->last_acb_type == ACB_TYPE_NONE ||
01464             20 * abs(cur_pitch_val - s->last_pitch_val) >
01465                 (cur_pitch_val + s->last_pitch_val))
01466             s->last_pitch_val = cur_pitch_val;
01467 
01468         /* pitch per block */
01469         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01470             int fac = n * 2 + 1;
01471 
01472             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01473                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01474                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01475         }
01476 
01477         /* "pitch-diff-per-sample" for calculation of pitch per sample */
01478         s->pitch_diff_sh16 =
01479             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01480     }
01481 
01482     /* Global gain (if silence) and pitch-adaptive window coordinates */
01483     switch (frame_descs[bd_idx].fcb_type) {
01484     case FCB_TYPE_SILENCE:
01485         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01486         break;
01487     case FCB_TYPE_AW_PULSES:
01488         aw_parse_coords(s, gb, pitch);
01489         break;
01490     }
01491 
01492     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01493         int bl_pitch_sh2;
01494 
01495         /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
01496         switch (frame_descs[bd_idx].acb_type) {
01497         case ACB_TYPE_HAMMING: {
01498             /* Pitch is given per block. Per-block pitches are encoded as an
01499              * absolute value for the first block, and then delta values
01500              * relative to this value) for all subsequent blocks. The scale of
01501              * this pitch value is semi-logaritmic compared to its use in the
01502              * decoder, so we convert it to normal scale also. */
01503             int block_pitch,
01504                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01505                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01506                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01507 
01508             if (n == 0) {
01509                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01510             } else
01511                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01512                                  get_bits(gb, s->block_delta_pitch_nbits);
01513             /* Convert last_ so that any next delta is within _range */
01514             last_block_pitch = av_clip(block_pitch,
01515                                        s->block_delta_pitch_hrange,
01516                                        s->block_pitch_range -
01517                                            s->block_delta_pitch_hrange);
01518 
01519             /* Convert semi-log-style scale back to normal scale */
01520             if (block_pitch < t1) {
01521                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01522             } else {
01523                 block_pitch -= t1;
01524                 if (block_pitch < t2) {
01525                     bl_pitch_sh2 =
01526                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01527                 } else {
01528                     block_pitch -= t2;
01529                     if (block_pitch < t3) {
01530                         bl_pitch_sh2 =
01531                             (s->block_conv_table[2] + block_pitch) << 2;
01532                     } else
01533                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01534                 }
01535             }
01536             pitch[n] = bl_pitch_sh2 >> 2;
01537             break;
01538         }
01539 
01540         case ACB_TYPE_ASYMMETRIC: {
01541             bl_pitch_sh2 = pitch[n] << 2;
01542             break;
01543         }
01544 
01545         default: // ACB_TYPE_NONE has no pitch
01546             bl_pitch_sh2 = 0;
01547             break;
01548         }
01549 
01550         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01551                     lsps, prev_lsps, &frame_descs[bd_idx],
01552                     &excitation[n * block_nsamples],
01553                     &synth[n * block_nsamples]);
01554     }
01555 
01556     /* Averaging projection filter, if applicable. Else, just copy samples
01557      * from synthesis buffer */
01558     if (s->do_apf) {
01559         double i_lsps[MAX_LSPS];
01560         float lpcs[MAX_LSPS];
01561 
01562         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01563             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01564         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01565         postfilter(s, synth, samples, 80, lpcs,
01566                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01567                    frame_descs[bd_idx].fcb_type, pitch[0]);
01568 
01569         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01570             i_lsps[n] = cos(lsps[n]);
01571         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01572         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01573                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01574                    frame_descs[bd_idx].fcb_type, pitch[0]);
01575     } else
01576         memcpy(samples, synth, 160 * sizeof(synth[0]));
01577 
01578     /* Cache values for next frame */
01579     s->frame_cntr++;
01580     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
01581     s->last_acb_type = frame_descs[bd_idx].acb_type;
01582     switch (frame_descs[bd_idx].acb_type) {
01583     case ACB_TYPE_NONE:
01584         s->last_pitch_val = 0;
01585         break;
01586     case ACB_TYPE_ASYMMETRIC:
01587         s->last_pitch_val = cur_pitch_val;
01588         break;
01589     case ACB_TYPE_HAMMING:
01590         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01591         break;
01592     }
01593 
01594     return 0;
01595 }
01596 
01609 static void stabilize_lsps(double *lsps, int num)
01610 {
01611     int n, m, l;
01612 
01613     /* set minimum value for first, maximum value for last and minimum
01614      * spacing between LSF values.
01615      * Very similar to ff_set_min_dist_lsf(), but in double. */
01616     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01617     for (n = 1; n < num; n++)
01618         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01619     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01620 
01621     /* reorder (looks like one-time / non-recursed bubblesort).
01622      * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
01623     for (n = 1; n < num; n++) {
01624         if (lsps[n] < lsps[n - 1]) {
01625             for (m = 1; m < num; m++) {
01626                 double tmp = lsps[m];
01627                 for (l = m - 1; l >= 0; l--) {
01628                     if (lsps[l] <= tmp) break;
01629                     lsps[l + 1] = lsps[l];
01630                 }
01631                 lsps[l + 1] = tmp;
01632             }
01633             break;
01634         }
01635     }
01636 }
01637 
01647 static int check_bits_for_superframe(GetBitContext *orig_gb,
01648                                      WMAVoiceContext *s)
01649 {
01650     GetBitContext s_gb, *gb = &s_gb;
01651     int n, need_bits, bd_idx;
01652     const struct frame_type_desc *frame_desc;
01653 
01654     /* initialize a copy */
01655     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01656     skip_bits_long(gb, get_bits_count(orig_gb));
01657     assert(get_bits_left(gb) == get_bits_left(orig_gb));
01658 
01659     /* superframe header */
01660     if (get_bits_left(gb) < 14)
01661         return 1;
01662     if (!get_bits1(gb))
01663         return -1;                        // WMAPro-in-WMAVoice superframe
01664     if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
01665     if (s->has_residual_lsps) {           // residual LSPs (for all frames)
01666         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01667             return 1;
01668         skip_bits_long(gb, s->sframe_lsp_bitsize);
01669     }
01670 
01671     /* frames */
01672     for (n = 0; n < MAX_FRAMES; n++) {
01673         int aw_idx_is_ext = 0;
01674 
01675         if (!s->has_residual_lsps) {     // independent LSPs (per-frame)
01676            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01677            skip_bits_long(gb, s->frame_lsp_bitsize);
01678         }
01679         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01680         if (bd_idx < 0)
01681             return -1;                   // invalid frame type VLC code
01682         frame_desc = &frame_descs[bd_idx];
01683         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01684             if (get_bits_left(gb) < s->pitch_nbits)
01685                 return 1;
01686             skip_bits_long(gb, s->pitch_nbits);
01687         }
01688         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01689             skip_bits(gb, 8);
01690         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01691             int tmp = get_bits(gb, 6);
01692             if (tmp >= 0x36) {
01693                 skip_bits(gb, 2);
01694                 aw_idx_is_ext = 1;
01695             }
01696         }
01697 
01698         /* blocks */
01699         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01700             need_bits = s->block_pitch_nbits +
01701                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01702         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01703             need_bits = 2 * !aw_idx_is_ext;
01704         } else
01705             need_bits = 0;
01706         need_bits += frame_desc->frame_size;
01707         if (get_bits_left(gb) < need_bits)
01708             return 1;
01709         skip_bits_long(gb, need_bits);
01710     }
01711 
01712     return 0;
01713 }
01714 
01735 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01736 {
01737     WMAVoiceContext *s = ctx->priv_data;
01738     GetBitContext *gb = &s->gb, s_gb;
01739     int n, res, n_samples = 480;
01740     double lsps[MAX_FRAMES][MAX_LSPS];
01741     const double *mean_lsf = s->lsps == 16 ?
01742         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01743     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01744     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01745     float *samples;
01746 
01747     memcpy(synth,      s->synth_history,
01748            s->lsps             * sizeof(*synth));
01749     memcpy(excitation, s->excitation_history,
01750            s->history_nsamples * sizeof(*excitation));
01751 
01752     if (s->sframe_cache_size > 0) {
01753         gb = &s_gb;
01754         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01755         s->sframe_cache_size = 0;
01756     }
01757 
01758     if ((res = check_bits_for_superframe(gb, s)) == 1) {
01759         *got_frame_ptr = 0;
01760         return 1;
01761     }
01762 
01763     /* First bit is speech/music bit, it differentiates between WMAVoice
01764      * speech samples (the actual codec) and WMAVoice music samples, which
01765      * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
01766      * the wild yet. */
01767     if (!get_bits1(gb)) {
01768         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01769         return -1;
01770     }
01771 
01772     /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
01773     if (get_bits1(gb)) {
01774         if ((n_samples = get_bits(gb, 12)) > 480) {
01775             av_log(ctx, AV_LOG_ERROR,
01776                    "Superframe encodes >480 samples (%d), not allowed\n",
01777                    n_samples);
01778             return -1;
01779         }
01780     }
01781     /* Parse LSPs, if global for the superframe (can also be per-frame). */
01782     if (s->has_residual_lsps) {
01783         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01784 
01785         for (n = 0; n < s->lsps; n++)
01786             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01787 
01788         if (s->lsps == 10) {
01789             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01790         } else /* s->lsps == 16 */
01791             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01792 
01793         for (n = 0; n < s->lsps; n++) {
01794             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01795             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01796             lsps[2][n] += mean_lsf[n];
01797         }
01798         for (n = 0; n < 3; n++)
01799             stabilize_lsps(lsps[n], s->lsps);
01800     }
01801 
01802     /* get output buffer */
01803     s->frame.nb_samples = 480;
01804     if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
01805         av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01806         return res;
01807     }
01808     s->frame.nb_samples = n_samples;
01809     samples = (float *)s->frame.data[0];
01810 
01811     /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
01812     for (n = 0; n < 3; n++) {
01813         if (!s->has_residual_lsps) {
01814             int m;
01815 
01816             if (s->lsps == 10) {
01817                 dequant_lsp10i(gb, lsps[n]);
01818             } else /* s->lsps == 16 */
01819                 dequant_lsp16i(gb, lsps[n]);
01820 
01821             for (m = 0; m < s->lsps; m++)
01822                 lsps[n][m] += mean_lsf[m];
01823             stabilize_lsps(lsps[n], s->lsps);
01824         }
01825 
01826         if ((res = synth_frame(ctx, gb, n,
01827                                &samples[n * MAX_FRAMESIZE],
01828                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01829                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01830                                &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01831             *got_frame_ptr = 0;
01832             return res;
01833         }
01834     }
01835 
01836     /* Statistics? FIXME - we don't check for length, a slight overrun
01837      * will be caught by internal buffer padding, and anything else
01838      * will be skipped, not read. */
01839     if (get_bits1(gb)) {
01840         res = get_bits(gb, 4);
01841         skip_bits(gb, 10 * (res + 1));
01842     }
01843 
01844     *got_frame_ptr = 1;
01845 
01846     /* Update history */
01847     memcpy(s->prev_lsps,           lsps[2],
01848            s->lsps             * sizeof(*s->prev_lsps));
01849     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01850            s->lsps             * sizeof(*synth));
01851     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01852            s->history_nsamples * sizeof(*excitation));
01853     if (s->do_apf)
01854         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01855                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01856 
01857     return 0;
01858 }
01859 
01867 static int parse_packet_header(WMAVoiceContext *s)
01868 {
01869     GetBitContext *gb = &s->gb;
01870     unsigned int res;
01871 
01872     if (get_bits_left(gb) < 11)
01873         return 1;
01874     skip_bits(gb, 4);          // packet sequence number
01875     s->has_residual_lsps = get_bits1(gb);
01876     do {
01877         res = get_bits(gb, 6); // number of superframes per packet
01878                                // (minus first one if there is spillover)
01879         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01880             return 1;
01881     } while (res == 0x3F);
01882     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01883 
01884     return 0;
01885 }
01886 
01902 static void copy_bits(PutBitContext *pb,
01903                       const uint8_t *data, int size,
01904                       GetBitContext *gb, int nbits)
01905 {
01906     int rmn_bytes, rmn_bits;
01907 
01908     rmn_bits = rmn_bytes = get_bits_left(gb);
01909     if (rmn_bits < nbits)
01910         return;
01911     if (nbits > pb->size_in_bits - put_bits_count(pb))
01912         return;
01913     rmn_bits &= 7; rmn_bytes >>= 3;
01914     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01915         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01916     avpriv_copy_bits(pb, data + size - rmn_bytes,
01917                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01918 }
01919 
01931 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01932                                   int *got_frame_ptr, AVPacket *avpkt)
01933 {
01934     WMAVoiceContext *s = ctx->priv_data;
01935     GetBitContext *gb = &s->gb;
01936     int size, res, pos;
01937 
01938     /* Packets are sometimes a multiple of ctx->block_align, with a packet
01939      * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
01940      * feeds us ASF packets, which may concatenate multiple "codec" packets
01941      * in a single "muxer" packet, so we artificially emulate that by
01942      * capping the packet size at ctx->block_align. */
01943     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01944     if (!size) {
01945         *got_frame_ptr = 0;
01946         return 0;
01947     }
01948     init_get_bits(&s->gb, avpkt->data, size << 3);
01949 
01950     /* size == ctx->block_align is used to indicate whether we are dealing with
01951      * a new packet or a packet of which we already read the packet header
01952      * previously. */
01953     if (size == ctx->block_align) { // new packet header
01954         if ((res = parse_packet_header(s)) < 0)
01955             return res;
01956 
01957         /* If the packet header specifies a s->spillover_nbits, then we want
01958          * to push out all data of the previous packet (+ spillover) before
01959          * continuing to parse new superframes in the current packet. */
01960         if (s->spillover_nbits > 0) {
01961             if (s->sframe_cache_size > 0) {
01962                 int cnt = get_bits_count(gb);
01963                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01964                 flush_put_bits(&s->pb);
01965                 s->sframe_cache_size += s->spillover_nbits;
01966                 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01967                     *got_frame_ptr) {
01968                     cnt += s->spillover_nbits;
01969                     s->skip_bits_next = cnt & 7;
01970                     *(AVFrame *)data = s->frame;
01971                     return cnt >> 3;
01972                 } else
01973                     skip_bits_long (gb, s->spillover_nbits - cnt +
01974                                     get_bits_count(gb)); // resync
01975             } else
01976                 skip_bits_long(gb, s->spillover_nbits);  // resync
01977         }
01978     } else if (s->skip_bits_next)
01979         skip_bits(gb, s->skip_bits_next);
01980 
01981     /* Try parsing superframes in current packet */
01982     s->sframe_cache_size = 0;
01983     s->skip_bits_next = 0;
01984     pos = get_bits_left(gb);
01985     if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01986         return res;
01987     } else if (*got_frame_ptr) {
01988         int cnt = get_bits_count(gb);
01989         s->skip_bits_next = cnt & 7;
01990         *(AVFrame *)data = s->frame;
01991         return cnt >> 3;
01992     } else if ((s->sframe_cache_size = pos) > 0) {
01993         /* rewind bit reader to start of last (incomplete) superframe... */
01994         init_get_bits(gb, avpkt->data, size << 3);
01995         skip_bits_long(gb, (size << 3) - pos);
01996         assert(get_bits_left(gb) == pos);
01997 
01998         /* ...and cache it for spillover in next packet */
01999         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
02000         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02001         // FIXME bad - just copy bytes as whole and add use the
02002         // skip_bits_next field
02003     }
02004 
02005     return size;
02006 }
02007 
02008 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02009 {
02010     WMAVoiceContext *s = ctx->priv_data;
02011 
02012     if (s->do_apf) {
02013         ff_rdft_end(&s->rdft);
02014         ff_rdft_end(&s->irdft);
02015         ff_dct_end(&s->dct);
02016         ff_dct_end(&s->dst);
02017     }
02018 
02019     return 0;
02020 }
02021 
02022 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02023 {
02024     WMAVoiceContext *s = ctx->priv_data;
02025     int n;
02026 
02027     s->postfilter_agc    = 0;
02028     s->sframe_cache_size = 0;
02029     s->skip_bits_next    = 0;
02030     for (n = 0; n < s->lsps; n++)
02031         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02032     memset(s->excitation_history, 0,
02033            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02034     memset(s->synth_history,      0,
02035            sizeof(*s->synth_history)      * MAX_LSPS);
02036     memset(s->gain_pred_err,      0,
02037            sizeof(s->gain_pred_err));
02038 
02039     if (s->do_apf) {
02040         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02041                sizeof(*s->synth_filter_out_buf) * s->lsps);
02042         memset(s->dcf_mem,              0,
02043                sizeof(*s->dcf_mem)              * 2);
02044         memset(s->zero_exc_pf,          0,
02045                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02046         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02047     }
02048 }
02049 
02050 AVCodec ff_wmavoice_decoder = {
02051     .name           = "wmavoice",
02052     .type           = AVMEDIA_TYPE_AUDIO,
02053     .id             = CODEC_ID_WMAVOICE,
02054     .priv_data_size = sizeof(WMAVoiceContext),
02055     .init           = wmavoice_decode_init,
02056     .close          = wmavoice_decode_end,
02057     .decode         = wmavoice_decode_packet,
02058     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02059     .flush     = wmavoice_flush,
02060     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02061 };