FFmpeg: libavcodec/wmavoice.c Source File

00001 /*
00002  * Windows Media Audio Voice decoder.
00003  * Copyright (c) 2009 Ronald S. Bultje
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #define UNCHECKED_BITSTREAM_READER 1
00029 
00030 #include <math.h>
00031 
00032 #include "dsputil.h"
00033 #include "avcodec.h"
00034 #include "get_bits.h"
00035 #include "put_bits.h"
00036 #include "wmavoice_data.h"
00037 #include "celp_filters.h"
00038 #include "acelp_vectors.h"
00039 #include "acelp_filters.h"
00040 #include "lsp.h"
00041 #include "libavutil/lzo.h"
00042 #include "dct.h"
00043 #include "rdft.h"
00044 #include "sinewin.h"
00045 
00046 #define MAX_BLOCKS           8   
00047 #define MAX_LSPS             16  
00048 #define MAX_LSPS_ALIGN16     16  
00049 
00050 #define MAX_FRAMES           3   
00051 #define MAX_FRAMESIZE        160 
00052 #define MAX_SIGNAL_HISTORY   416 
00053 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00055 #define SFRAME_CACHE_MAXSIZE 256 
00056 
00057 #define VLC_NBITS            6   
00058 
00059 
00062 static VLC frame_type_vlc;
00063 
00067 enum {
00068     ACB_TYPE_NONE       = 0, 
00069     ACB_TYPE_ASYMMETRIC = 1, 
00070 
00071 
00072 
00073 
00074     ACB_TYPE_HAMMING    = 2  
00075 
00076 
00077 };
00078 
00082 enum {
00083     FCB_TYPE_SILENCE    = 0, 
00084 
00085 
00086     FCB_TYPE_HARDCODED  = 1, 
00087 
00088     FCB_TYPE_AW_PULSES  = 2, 
00089 
00090     FCB_TYPE_EXC_PULSES = 3, 
00091 
00092 
00093 };
00094 
00098 static const struct frame_type_desc {
00099     uint8_t n_blocks;     
00100 
00101     uint8_t log_n_blocks; 
00102     uint8_t acb_type;     
00103     uint8_t fcb_type;     
00104     uint8_t dbl_pulses;   
00105 
00106 
00107     uint16_t frame_size;  
00108 
00109 } frame_descs[17] = {
00110     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00111     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00112     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00113     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00114     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00115     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00116     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00117     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00118     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00119     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00120     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00121     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00122     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00123     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00124     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00125     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00126     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00127 };
00128 
00132 typedef struct {
00137     AVFrame frame;
00138     GetBitContext gb;             
00139 
00140 
00141 
00142     int8_t vbm_tree[25];          
00143 
00144     int spillover_bitsize;        
00145 
00146 
00147     int history_nsamples;         
00148 
00149 
00150     /* postfilter specific values */
00151     int do_apf;                   
00152 
00153     int denoise_strength;         
00154 
00155     int denoise_tilt_corr;        
00156 
00157     int dc_level;                 
00158 
00159 
00160     int lsps;                     
00161     int lsp_q_mode;               
00162     int lsp_def_mode;             
00163 
00164     int frame_lsp_bitsize;        
00165 
00166     int sframe_lsp_bitsize;       
00167 
00168 
00169     int min_pitch_val;            
00170     int max_pitch_val;            
00171     int pitch_nbits;              
00172 
00173     int block_pitch_nbits;        
00174 
00175     int block_pitch_range;        
00176     int block_delta_pitch_nbits;  
00177 
00178 
00179 
00180     int block_delta_pitch_hrange; 
00181 
00182     uint16_t block_conv_table[4]; 
00183 
00184 
00194     int spillover_nbits;          
00195 
00196 
00197 
00198     int has_residual_lsps;        
00199 
00200 
00201 
00202 
00203     int skip_bits_next;           
00204 
00205 
00206 
00207     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00210     int sframe_cache_size;        
00211 
00212 
00213 
00214 
00215     PutBitContext pb;             
00216 
00226     double prev_lsps[MAX_LSPS];   
00227 
00228     int last_pitch_val;           
00229     int last_acb_type;            
00230     int pitch_diff_sh16;          
00231 
00232     float silence_gain;           
00233 
00234     int aw_idx_is_ext;            
00235 
00236     int aw_pulse_range;           
00237 
00238 
00239 
00240 
00241 
00242     int aw_n_pulses[2];           
00243 
00244 
00245     int aw_first_pulse_off[2];    
00246 
00247     int aw_next_pulse_off_cache;  
00248 
00249 
00250 
00251 
00252 
00253     int frame_cntr;               
00254 
00255     float gain_pred_err[6];       
00256     float excitation_history[MAX_SIGNAL_HISTORY];
00260     float synth_history[MAX_LSPS]; 
00261 
00270     RDFTContext rdft, irdft;      
00271 
00272     DCTContext dct, dst;          
00273 
00274     float sin[511], cos[511];     
00275 
00276     float postfilter_agc;         
00277 
00278     float dcf_mem[2];             
00279     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00282     float denoise_filter_cache[MAX_FRAMESIZE];
00283     int   denoise_filter_cache_size; 
00284     DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00286     DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00288     DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00291 
00294 } WMAVoiceContext;
00295 
00305 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00306 {
00307     static const uint8_t bits[] = {
00308          2,  2,  2,  4,  4,  4,
00309          6,  6,  6,  8,  8,  8,
00310         10, 10, 10, 12, 12, 12,
00311         14, 14, 14, 14
00312     };
00313     static const uint16_t codes[] = {
00314           0x0000, 0x0001, 0x0002,        //              00/01/10
00315           0x000c, 0x000d, 0x000e,        //           11+00/01/10
00316           0x003c, 0x003d, 0x003e,        //         1111+00/01/10
00317           0x00fc, 0x00fd, 0x00fe,        //       111111+00/01/10
00318           0x03fc, 0x03fd, 0x03fe,        //     11111111+00/01/10
00319           0x0ffc, 0x0ffd, 0x0ffe,        //   1111111111+00/01/10
00320           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
00321     };
00322     int cntr[8] = { 0 }, n, res;
00323 
00324     memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00325     for (n = 0; n < 17; n++) {
00326         res = get_bits(gb, 3);
00327         if (cntr[res] > 3) // should be >= 3 + (res == 7))
00328             return -1;
00329         vbm_tree[res * 3 + cntr[res]++] = n;
00330     }
00331     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00332                     bits, 1, 1, codes, 2, 2, 132);
00333     return 0;
00334 }
00335 
00339 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00340 {
00341     int n, flags, pitch_range, lsp16_flag;
00342     WMAVoiceContext *s = ctx->priv_data;
00343 
00352     if (ctx->extradata_size != 46) {
00353         av_log(ctx, AV_LOG_ERROR,
00354                "Invalid extradata size %d (should be 46)\n",
00355                ctx->extradata_size);
00356         return -1;
00357     }
00358     flags                = AV_RL32(ctx->extradata + 18);
00359     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00360     s->do_apf            =    flags & 0x1;
00361     if (s->do_apf) {
00362         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00363         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00364         ff_dct_init(&s->dct,  6, DCT_I);
00365         ff_dct_init(&s->dst,  6, DST_I);
00366 
00367         ff_sine_window_init(s->cos, 256);
00368         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00369         for (n = 0; n < 255; n++) {
00370             s->sin[n]       = -s->sin[510 - n];
00371             s->cos[510 - n] =  s->cos[n];
00372         }
00373     }
00374     s->denoise_strength  =   (flags >> 2) & 0xF;
00375     if (s->denoise_strength >= 12) {
00376         av_log(ctx, AV_LOG_ERROR,
00377                "Invalid denoise filter strength %d (max=11)\n",
00378                s->denoise_strength);
00379         return -1;
00380     }
00381     s->denoise_tilt_corr = !!(flags & 0x40);
00382     s->dc_level          =   (flags >> 7) & 0xF;
00383     s->lsp_q_mode        = !!(flags & 0x2000);
00384     s->lsp_def_mode      = !!(flags & 0x4000);
00385     lsp16_flag           =    flags & 0x1000;
00386     if (lsp16_flag) {
00387         s->lsps               = 16;
00388         s->frame_lsp_bitsize  = 34;
00389         s->sframe_lsp_bitsize = 60;
00390     } else {
00391         s->lsps               = 10;
00392         s->frame_lsp_bitsize  = 24;
00393         s->sframe_lsp_bitsize = 48;
00394     }
00395     for (n = 0; n < s->lsps; n++)
00396         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00397 
00398     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00399     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00400         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00401         return -1;
00402     }
00403 
00404     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00405     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00406     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00407     if (pitch_range <= 0) {
00408         av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00409         return -1;
00410     }
00411     s->pitch_nbits      = av_ceil_log2(pitch_range);
00412     s->last_pitch_val   = 40;
00413     s->last_acb_type    = ACB_TYPE_NONE;
00414     s->history_nsamples = s->max_pitch_val + 8;
00415 
00416     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00417         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00418             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00419 
00420         av_log(ctx, AV_LOG_ERROR,
00421                "Unsupported samplerate %d (min=%d, max=%d)\n",
00422                ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
00423 
00424         return -1;
00425     }
00426 
00427     s->block_conv_table[0]      = s->min_pitch_val;
00428     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00429     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00430     s->block_conv_table[3]      = s->max_pitch_val - 1;
00431     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00432     if (s->block_delta_pitch_hrange <= 0) {
00433         av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00434         return -1;
00435     }
00436     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00437     s->block_pitch_range        = s->block_conv_table[2] +
00438                                   s->block_conv_table[3] + 1 +
00439                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00440     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00441 
00442     ctx->sample_fmt             = AV_SAMPLE_FMT_FLT;
00443 
00444     avcodec_get_frame_defaults(&s->frame);
00445     ctx->coded_frame = &s->frame;
00446 
00447     return 0;
00448 }
00449 
00471 static void adaptive_gain_control(float *out, const float *in,
00472                                   const float *speech_synth,
00473                                   int size, float alpha, float *gain_mem)
00474 {
00475     int i;
00476     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00477     float mem = *gain_mem;
00478 
00479     for (i = 0; i < size; i++) {
00480         speech_energy     += fabsf(speech_synth[i]);
00481         postfilter_energy += fabsf(in[i]);
00482     }
00483     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00484 
00485     for (i = 0; i < size; i++) {
00486         mem = alpha * mem + gain_scale_factor;
00487         out[i] = in[i] * mem;
00488     }
00489 
00490     *gain_mem = mem;
00491 }
00492 
00511 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00512                            const float *in, float *out, int size)
00513 {
00514     int n;
00515     float optimal_gain = 0, dot;
00516     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00517                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00518                 *best_hist_ptr = NULL;
00519 
00520     /* find best fitting point in history */
00521     do {
00522         dot = ff_scalarproduct_float_c(in, ptr, size);
00523         if (dot > optimal_gain) {
00524             optimal_gain  = dot;
00525             best_hist_ptr = ptr;
00526         }
00527     } while (--ptr >= end);
00528 
00529     if (optimal_gain <= 0)
00530         return -1;
00531     dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
00532     if (dot <= 0) // would be 1.0
00533         return -1;
00534 
00535     if (optimal_gain <= dot) {
00536         dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
00537     } else
00538         dot = 0.625;
00539 
00540     /* actual smoothing */
00541     for (n = 0; n < size; n++)
00542         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00543 
00544     return 0;
00545 }
00546 
00557 static float tilt_factor(const float *lpcs, int n_lpcs)
00558 {
00559     float rh0, rh1;
00560 
00561     rh0 = 1.0     + ff_scalarproduct_float_c(lpcs,  lpcs,    n_lpcs);
00562     rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
00563 
00564     return rh1 / rh0;
00565 }
00566 
00570 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00571                                 int fcb_type, float *coeffs, int remainder)
00572 {
00573     float last_coeff, min = 15.0, max = -15.0;
00574     float irange, angle_mul, gain_mul, range, sq;
00575     int n, idx;
00576 
00577     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
00578     s->rdft.rdft_calc(&s->rdft, lpcs);
00579 #define log_range(var, assign) do { \
00580         float tmp = log10f(assign);  var = tmp; \
00581         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00582     } while (0)
00583     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00584     for (n = 1; n < 64; n++)
00585         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00586                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00587     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00588 #undef log_range
00589     range    = max - min;
00590     lpcs[64] = last_coeff;
00591 
00592     /* Now, use this spectrum to pick out these frequencies with higher
00593      * (relative) power/energy (which we then take to be "not noise"),
00594      * and set up a table (still in lpc[]) of (relative) gains per frequency.
00595      * These frequencies will be maintained, while others ("noise") will be
00596      * decreased in the filter output. */
00597     irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
00598     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00599                                                           (5.0 / 14.7));
00600     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00601     for (n = 0; n <= 64; n++) {
00602         float pwr;
00603 
00604         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00605         pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00606         lpcs[n] = angle_mul * pwr;
00607 
00608         /* 70.57 =~ 1/log10(1.0331663) */
00609         idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00610         if (idx > 127) { // fallback if index falls outside table range
00611             coeffs[n] = wmavoice_energy_table[127] *
00612                         powf(1.0331663, idx - 127);
00613         } else
00614             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00615     }
00616 
00617     /* calculate the Hilbert transform of the gains, which we do (since this
00618      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
00619      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
00620      * "moment" of the LPCs in this filter. */
00621     s->dct.dct_calc(&s->dct, lpcs);
00622     s->dst.dct_calc(&s->dst, lpcs);
00623 
00624     /* Split out the coefficient indexes into phase/magnitude pairs */
00625     idx = 255 + av_clip(lpcs[64],               -255, 255);
00626     coeffs[0]  = coeffs[0]  * s->cos[idx];
00627     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00628     last_coeff = coeffs[64] * s->cos[idx];
00629     for (n = 63;; n--) {
00630         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00633 
00634         if (!--n) break;
00635 
00636         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00637         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00638         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00639     }
00640     coeffs[1] = last_coeff;
00641 
00642     /* move into real domain */
00643     s->irdft.rdft_calc(&s->irdft, coeffs);
00644 
00645     /* tilt correction and normalize scale */
00646     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00647     if (s->denoise_tilt_corr) {
00648         float tilt_mem = 0;
00649 
00650         coeffs[remainder - 1] = 0;
00651         ff_tilt_compensation(&tilt_mem,
00652                              -1.8 * tilt_factor(coeffs, remainder - 1),
00653                              coeffs, remainder);
00654     }
00655     sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs, coeffs, remainder));
00656     for (n = 0; n < remainder; n++)
00657         coeffs[n] *= sq;
00658 }
00659 
00686 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00687                            float *synth_pf, int size,
00688                            const float *lpcs)
00689 {
00690     int remainder, lim, n;
00691 
00692     if (fcb_type != FCB_TYPE_SILENCE) {
00693         float *tilted_lpcs = s->tilted_lpcs_pf,
00694               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00695 
00696         tilted_lpcs[0]           = 1.0;
00697         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00698         memset(&tilted_lpcs[s->lsps + 1], 0,
00699                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00700         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00701                              tilted_lpcs, s->lsps + 2);
00702 
00703         /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
00704          * size is applied to the next frame. All input beyond this is zero,
00705          * and thus all output beyond this will go towards zero, hence we can
00706          * limit to min(size-1, 127-size) as a performance consideration. */
00707         remainder = FFMIN(127 - size, size - 1);
00708         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00709 
00710         /* apply coefficients (in frequency spectrum domain), i.e. complex
00711          * number multiplication */
00712         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00713         s->rdft.rdft_calc(&s->rdft, synth_pf);
00714         s->rdft.rdft_calc(&s->rdft, coeffs);
00715         synth_pf[0] *= coeffs[0];
00716         synth_pf[1] *= coeffs[1];
00717         for (n = 1; n < 64; n++) {
00718             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00719             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00720             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00721         }
00722         s->irdft.rdft_calc(&s->irdft, synth_pf);
00723     }
00724 
00725     /* merge filter output with the history of previous runs */
00726     if (s->denoise_filter_cache_size) {
00727         lim = FFMIN(s->denoise_filter_cache_size, size);
00728         for (n = 0; n < lim; n++)
00729             synth_pf[n] += s->denoise_filter_cache[n];
00730         s->denoise_filter_cache_size -= lim;
00731         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00732                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00733     }
00734 
00735     /* move remainder of filter output into a cache for future runs */
00736     if (fcb_type != FCB_TYPE_SILENCE) {
00737         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00738         for (n = 0; n < lim; n++)
00739             s->denoise_filter_cache[n] += synth_pf[size + n];
00740         if (lim < remainder) {
00741             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00742                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00743             s->denoise_filter_cache_size = remainder;
00744         }
00745     }
00746 }
00747 
00768 static void postfilter(WMAVoiceContext *s, const float *synth,
00769                        float *samples,    int size,
00770                        const float *lpcs, float *zero_exc_pf,
00771                        int fcb_type,      int pitch)
00772 {
00773     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00774           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00775           *synth_filter_in = zero_exc_pf;
00776 
00777     av_assert0(size <= MAX_FRAMESIZE / 2);
00778 
00779     /* generate excitation from input signal */
00780     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00781 
00782     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00783         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00784         synth_filter_in = synth_filter_in_buf;
00785 
00786     /* re-synthesize speech after smoothening, and keep history */
00787     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00788                                  synth_filter_in, size, s->lsps);
00789     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00790            sizeof(synth_pf[0]) * s->lsps);
00791 
00792     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00793 
00794     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00795                           &s->postfilter_agc);
00796 
00797     if (s->dc_level > 8) {
00798         /* remove ultra-low frequency DC noise / highpass filter;
00799          * coefficients are identical to those used in SIPR decoding,
00800          * and very closely resemble those used in AMR-NB decoding. */
00801         ff_acelp_apply_order_2_transfer_function(samples, samples,
00802             (const float[2]) { -1.99997,      1.0 },
00803             (const float[2]) { -1.9330735188, 0.93589198496 },
00804             0.93980580475, s->dcf_mem, size);
00805     }
00806 }
00822 static void dequant_lsps(double *lsps, int num,
00823                          const uint16_t *values,
00824                          const uint16_t *sizes,
00825                          int n_stages, const uint8_t *table,
00826                          const double *mul_q,
00827                          const double *base_q)
00828 {
00829     int n, m;
00830 
00831     memset(lsps, 0, num * sizeof(*lsps));
00832     for (n = 0; n < n_stages; n++) {
00833         const uint8_t *t_off = &table[values[n] * num];
00834         double base = base_q[n], mul = mul_q[n];
00835 
00836         for (m = 0; m < num; m++)
00837             lsps[m] += base + mul * t_off[m];
00838 
00839         table += sizes[n] * num;
00840     }
00841 }
00842 
00854 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00855 {
00856     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00857     static const double mul_lsf[4] = {
00858         5.2187144800e-3,    1.4626986422e-3,
00859         9.6179549166e-4,    1.1325736225e-3
00860     };
00861     static const double base_lsf[4] = {
00862         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00863         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00864     };
00865     uint16_t v[4];
00866 
00867     v[0] = get_bits(gb, 8);
00868     v[1] = get_bits(gb, 6);
00869     v[2] = get_bits(gb, 5);
00870     v[3] = get_bits(gb, 5);
00871 
00872     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00873                  mul_lsf, base_lsf);
00874 }
00875 
00880 static void dequant_lsp10r(GetBitContext *gb,
00881                            double *i_lsps, const double *old,
00882                            double *a1, double *a2, int q_mode)
00883 {
00884     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00885     static const double mul_lsf[3] = {
00886         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00887     };
00888     static const double base_lsf[3] = {
00889         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00890     };
00891     const float (*ipol_tab)[2][10] = q_mode ?
00892         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00893     uint16_t interpol, v[3];
00894     int n;
00895 
00896     dequant_lsp10i(gb, i_lsps);
00897 
00898     interpol = get_bits(gb, 5);
00899     v[0]     = get_bits(gb, 7);
00900     v[1]     = get_bits(gb, 6);
00901     v[2]     = get_bits(gb, 6);
00902 
00903     for (n = 0; n < 10; n++) {
00904         double delta = old[n] - i_lsps[n];
00905         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00906         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00907     }
00908 
00909     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00910                  mul_lsf, base_lsf);
00911 }
00912 
00916 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00917 {
00918     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00919     static const double mul_lsf[5] = {
00920         3.3439586280e-3,    6.9908173703e-4,
00921         3.3216608306e-3,    1.0334960326e-3,
00922         3.1899104283e-3
00923     };
00924     static const double base_lsf[5] = {
00925         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00926         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00927         M_PI * -1.29816e-1
00928     };
00929     uint16_t v[5];
00930 
00931     v[0] = get_bits(gb, 8);
00932     v[1] = get_bits(gb, 6);
00933     v[2] = get_bits(gb, 7);
00934     v[3] = get_bits(gb, 6);
00935     v[4] = get_bits(gb, 7);
00936 
00937     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00938                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00939     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00940                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00941     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00942                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00943 }
00944 
00949 static void dequant_lsp16r(GetBitContext *gb,
00950                            double *i_lsps, const double *old,
00951                            double *a1, double *a2, int q_mode)
00952 {
00953     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00954     static const double mul_lsf[3] = {
00955         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00956     };
00957     static const double base_lsf[3] = {
00958         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00959     };
00960     const float (*ipol_tab)[2][16] = q_mode ?
00961         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00962     uint16_t interpol, v[3];
00963     int n;
00964 
00965     dequant_lsp16i(gb, i_lsps);
00966 
00967     interpol = get_bits(gb, 5);
00968     v[0]     = get_bits(gb, 7);
00969     v[1]     = get_bits(gb, 7);
00970     v[2]     = get_bits(gb, 7);
00971 
00972     for (n = 0; n < 16; n++) {
00973         double delta = old[n] - i_lsps[n];
00974         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00975         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00976     }
00977 
00978     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00979                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00980     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00981                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00982     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00983                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00984 }
00985 
00999 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01000                             const int *pitch)
01001 {
01002     static const int16_t start_offset[94] = {
01003         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
01004          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
01005          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
01006          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
01007          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
01008          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
01009         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01010         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01011     };
01012     int bits, offset;
01013 
01014     /* position of pulse */
01015     s->aw_idx_is_ext = 0;
01016     if ((bits = get_bits(gb, 6)) >= 54) {
01017         s->aw_idx_is_ext = 1;
01018         bits += (bits - 54) * 3 + get_bits(gb, 2);
01019     }
01020 
01021     /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
01022      * the distribution of the pulses in each block contained in this frame. */
01023     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01024     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01025     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01026     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01027     offset                  += s->aw_n_pulses[0] * pitch[0];
01028     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01029     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01030 
01031     /* if continuing from a position before the block, reset position to
01032      * start of block (when corrected for the range over which it can be
01033      * spread in aw_pulse_set1()). */
01034     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01035         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01036             s->aw_first_pulse_off[1] -= pitch[1];
01037         if (start_offset[bits] < 0)
01038             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01039                 s->aw_first_pulse_off[0] -= pitch[0];
01040     }
01041 }
01042 
01050 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01051                           int block_idx, AMRFixed *fcb)
01052 {
01053     uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
01054     uint16_t *use_mask = use_mask_mem + 2;
01055     /* in this function, idx is the index in the 80-bit (+ padding) use_mask
01056      * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
01057      * of idx are the position of the bit within a particular item in the
01058      * array (0 being the most significant bit, and 15 being the least
01059      * significant bit), and the remainder (>> 4) is the index in the
01060      * use_mask[]-array. This is faster and uses less memory than using a
01061      * 80-byte/80-int array. */
01062     int pulse_off = s->aw_first_pulse_off[block_idx],
01063         pulse_start, n, idx, range, aidx, start_off = 0;
01064 
01065     /* set offset of first pulse to within this block */
01066     if (s->aw_n_pulses[block_idx] > 0)
01067         while (pulse_off + s->aw_pulse_range < 1)
01068             pulse_off += fcb->pitch_lag;
01069 
01070     /* find range per pulse */
01071     if (s->aw_n_pulses[0] > 0) {
01072         if (block_idx == 0) {
01073             range = 32;
01074         } else /* block_idx = 1 */ {
01075             range = 8;
01076             if (s->aw_n_pulses[block_idx] > 0)
01077                 pulse_off = s->aw_next_pulse_off_cache;
01078         }
01079     } else
01080         range = 16;
01081     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01082 
01083     /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
01084      * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
01085      * we exclude that range from being pulsed again in this function. */
01086     memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01087     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01088     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01089     if (s->aw_n_pulses[block_idx] > 0)
01090         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01091             int excl_range         = s->aw_pulse_range; // always 16 or 24
01092             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01093             int first_sh           = 16 - (idx & 15);
01094             *use_mask_ptr++       &= 0xFFFFu << first_sh;
01095             excl_range            -= first_sh;
01096             if (excl_range >= 16) {
01097                 *use_mask_ptr++    = 0;
01098                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01099             } else
01100                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01101         }
01102 
01103     /* find the 'aidx'th offset that is not excluded */
01104     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01105     for (n = 0; n <= aidx; pulse_start++) {
01106         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01107         if (idx >= MAX_FRAMESIZE / 2) { // find from zero
01108             if (use_mask[0])      idx = 0x0F;
01109             else if (use_mask[1]) idx = 0x1F;
01110             else if (use_mask[2]) idx = 0x2F;
01111             else if (use_mask[3]) idx = 0x3F;
01112             else if (use_mask[4]) idx = 0x4F;
01113             else                  return;
01114             idx -= av_log2_16bit(use_mask[idx >> 4]);
01115         }
01116         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01117             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01118             n++;
01119             start_off = idx;
01120         }
01121     }
01122 
01123     fcb->x[fcb->n] = start_off;
01124     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01125     fcb->n++;
01126 
01127     /* set offset for next block, relative to start of that block */
01128     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01129     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01130 }
01131 
01139 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01140                           int block_idx, AMRFixed *fcb)
01141 {
01142     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01143     float v;
01144 
01145     if (s->aw_n_pulses[block_idx] > 0) {
01146         int n, v_mask, i_mask, sh, n_pulses;
01147 
01148         if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
01149             n_pulses = 3;
01150             v_mask   = 8;
01151             i_mask   = 7;
01152             sh       = 4;
01153         } else { // 4 pulses, 1:sign + 2:index each
01154             n_pulses = 4;
01155             v_mask   = 4;
01156             i_mask   = 3;
01157             sh       = 3;
01158         }
01159 
01160         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01161             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01162             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01163                                  s->aw_first_pulse_off[block_idx];
01164             while (fcb->x[fcb->n] < 0)
01165                 fcb->x[fcb->n] += fcb->pitch_lag;
01166             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01167                 fcb->n++;
01168         }
01169     } else {
01170         int num2 = (val & 0x1FF) >> 1, delta, idx;
01171 
01172         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01173         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01174         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01175         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01176         v = (val & 0x200) ? -1.0 : 1.0;
01177 
01178         fcb->no_repeat_mask |= 3 << fcb->n;
01179         fcb->x[fcb->n]       = idx - delta;
01180         fcb->y[fcb->n]       = v;
01181         fcb->x[fcb->n + 1]   = idx;
01182         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01183         fcb->n              += 2;
01184     }
01185 }
01186 
01200 static int pRNG(int frame_cntr, int block_num, int block_size)
01201 {
01202     /* array to simplify the calculation of z:
01203      * y = (x % 9) * 5 + 6;
01204      * z = (49995 * x) / y;
01205      * Since y only has 9 values, we can remove the division by using a
01206      * LUT and using FASTDIV-style divisions. For each of the 9 values
01207      * of y, we can rewrite z as:
01208      * z = x * (49995 / y) + x * ((49995 % y) / y)
01209      * In this table, each col represents one possible value of y, the
01210      * first number is 49995 / y, and the second is the FASTDIV variant
01211      * of 49995 % y / y. */
01212     static const unsigned int div_tbl[9][2] = {
01213         { 8332,  3 * 715827883U }, // y =  6
01214         { 4545,  0 * 390451573U }, // y = 11
01215         { 3124, 11 * 268435456U }, // y = 16
01216         { 2380, 15 * 204522253U }, // y = 21
01217         { 1922, 23 * 165191050U }, // y = 26
01218         { 1612, 23 * 138547333U }, // y = 31
01219         { 1388, 27 * 119304648U }, // y = 36
01220         { 1219, 16 * 104755300U }, // y = 41
01221         { 1086, 39 *  93368855U }  // y = 46
01222     };
01223     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01224     if (x >= 0xFFFF) x -= 0xFFFF;   // max value of x is 8*1877+0xFFFE=0x13AA6,
01225                                     // so this is effectively a modulo (%)
01226     y = x - 9 * MULH(477218589, x); // x % 9
01227     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01228                                     // z = x * 49995 / (y * 5 + 6)
01229     return z % (1000 - block_size);
01230 }
01231 
01236 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01237                                  int block_idx, int size,
01238                                  const struct frame_type_desc *frame_desc,
01239                                  float *excitation)
01240 {
01241     float gain;
01242     int n, r_idx;
01243 
01244     av_assert0(size <= MAX_FRAMESIZE);
01245 
01246     /* Set the offset from which we start reading wmavoice_std_codebook */
01247     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01248         r_idx = pRNG(s->frame_cntr, block_idx, size);
01249         gain  = s->silence_gain;
01250     } else /* FCB_TYPE_HARDCODED */ {
01251         r_idx = get_bits(gb, 8);
01252         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01253     }
01254 
01255     /* Clear gain prediction parameters */
01256     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01257 
01258     /* Apply gain to hardcoded codebook and use that as excitation signal */
01259     for (n = 0; n < size; n++)
01260         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01261 }
01262 
01267 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01268                                 int block_idx, int size,
01269                                 int block_pitch_sh2,
01270                                 const struct frame_type_desc *frame_desc,
01271                                 float *excitation)
01272 {
01273     static const float gain_coeff[6] = {
01274         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01275     };
01276     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01277     int n, idx, gain_weight;
01278     AMRFixed fcb;
01279 
01280     av_assert0(size <= MAX_FRAMESIZE / 2);
01281     memset(pulses, 0, sizeof(*pulses) * size);
01282 
01283     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01284     fcb.pitch_fac      = 1.0;
01285     fcb.no_repeat_mask = 0;
01286     fcb.n              = 0;
01287 
01288     /* For the other frame types, this is where we apply the innovation
01289      * (fixed) codebook pulses of the speech signal. */
01290     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01291         aw_pulse_set1(s, gb, block_idx, &fcb);
01292         aw_pulse_set2(s, gb, block_idx, &fcb);
01293     } else /* FCB_TYPE_EXC_PULSES */ {
01294         int offset_nbits = 5 - frame_desc->log_n_blocks;
01295 
01296         fcb.no_repeat_mask = -1;
01297         /* similar to ff_decode_10_pulses_35bits(), but with single pulses
01298          * (instead of double) for a subset of pulses */
01299         for (n = 0; n < 5; n++) {
01300             float sign;
01301             int pos1, pos2;
01302 
01303             sign           = get_bits1(gb) ? 1.0 : -1.0;
01304             pos1           = get_bits(gb, offset_nbits);
01305             fcb.x[fcb.n]   = n + 5 * pos1;
01306             fcb.y[fcb.n++] = sign;
01307             if (n < frame_desc->dbl_pulses) {
01308                 pos2           = get_bits(gb, offset_nbits);
01309                 fcb.x[fcb.n]   = n + 5 * pos2;
01310                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01311             }
01312         }
01313     }
01314     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01315 
01316     /* Calculate gain for adaptive & fixed codebook signal.
01317      * see ff_amr_set_fixed_gain(). */
01318     idx = get_bits(gb, 7);
01319     fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err, gain_coeff, 6) -
01320                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01321     acb_gain = wmavoice_gain_codebook_acb[idx];
01322     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01323                         -2.9957322736 /* log(0.05) */,
01324                          1.6094379124 /* log(5.0)  */);
01325 
01326     gain_weight = 8 >> frame_desc->log_n_blocks;
01327     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01328             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01329     for (n = 0; n < gain_weight; n++)
01330         s->gain_pred_err[n] = pred_err;
01331 
01332     /* Calculation of adaptive codebook */
01333     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01334         int len;
01335         for (n = 0; n < size; n += len) {
01336             int next_idx_sh16;
01337             int abs_idx    = block_idx * size + n;
01338             int pitch_sh16 = (s->last_pitch_val << 16) +
01339                              s->pitch_diff_sh16 * abs_idx;
01340             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01341             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01342             idx            = idx_sh16 >> 16;
01343             if (s->pitch_diff_sh16) {
01344                 if (s->pitch_diff_sh16 > 0) {
01345                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01346                 } else
01347                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01348                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01349                               1, size - n);
01350             } else
01351                 len = size;
01352 
01353             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01354                                   wmavoice_ipol1_coeffs, 17,
01355                                   idx, 9, len);
01356         }
01357     } else /* ACB_TYPE_HAMMING */ {
01358         int block_pitch = block_pitch_sh2 >> 2;
01359         idx             = block_pitch_sh2 & 3;
01360         if (idx) {
01361             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01362                                   wmavoice_ipol2_coeffs, 4,
01363                                   idx, 8, size);
01364         } else
01365             av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01366                               sizeof(float) * size);
01367     }
01368 
01369     /* Interpolate ACB/FCB and use as excitation signal */
01370     ff_weighted_vector_sumf(excitation, excitation, pulses,
01371                             acb_gain, fcb_gain, size);
01372 }
01373 
01390 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01391                         int block_idx, int size,
01392                         int block_pitch_sh2,
01393                         const double *lsps, const double *prev_lsps,
01394                         const struct frame_type_desc *frame_desc,
01395                         float *excitation, float *synth)
01396 {
01397     double i_lsps[MAX_LSPS];
01398     float lpcs[MAX_LSPS];
01399     float fac;
01400     int n;
01401 
01402     if (frame_desc->acb_type == ACB_TYPE_NONE)
01403         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01404     else
01405         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01406                             frame_desc, excitation);
01407 
01408     /* convert interpolated LSPs to LPCs */
01409     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01410     for (n = 0; n < s->lsps; n++) // LSF -> LSP
01411         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01412     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01413 
01414     /* Speech synthesis */
01415     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01416 }
01417 
01433 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01434                        float *samples,
01435                        const double *lsps, const double *prev_lsps,
01436                        float *excitation, float *synth)
01437 {
01438     WMAVoiceContext *s = ctx->priv_data;
01439     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01440     int pitch[MAX_BLOCKS], last_block_pitch;
01441 
01442     /* Parse frame type ("frame header"), see frame_descs */
01443     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01444 
01445     if (bd_idx < 0) {
01446         av_log(ctx, AV_LOG_ERROR,
01447                "Invalid frame type VLC code, skipping\n");
01448         return -1;
01449     }
01450 
01451     block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01452 
01453     /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
01454     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01455         /* Pitch is provided per frame, which is interpreted as the pitch of
01456          * the last sample of the last block of this frame. We can interpolate
01457          * the pitch of other blocks (and even pitch-per-sample) by gradually
01458          * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
01459         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01460         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01461         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01462         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01463         if (s->last_acb_type == ACB_TYPE_NONE ||
01464             20 * abs(cur_pitch_val - s->last_pitch_val) >
01465                 (cur_pitch_val + s->last_pitch_val))
01466             s->last_pitch_val = cur_pitch_val;
01467 
01468         /* pitch per block */
01469         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01470             int fac = n * 2 + 1;
01471 
01472             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01473                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01474                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01475         }
01476 
01477         /* "pitch-diff-per-sample" for calculation of pitch per sample */
01478         s->pitch_diff_sh16 =
01479             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01480     }
01481 
01482     /* Global gain (if silence) and pitch-adaptive window coordinates */
01483     switch (frame_descs[bd_idx].fcb_type) {
01484     case FCB_TYPE_SILENCE:
01485         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01486         break;
01487     case FCB_TYPE_AW_PULSES:
01488         aw_parse_coords(s, gb, pitch);
01489         break;
01490     }
01491 
01492     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01493         int bl_pitch_sh2;
01494 
01495         /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
01496         switch (frame_descs[bd_idx].acb_type) {
01497         case ACB_TYPE_HAMMING: {
01498             /* Pitch is given per block. Per-block pitches are encoded as an
01499              * absolute value for the first block, and then delta values
01500              * relative to this value) for all subsequent blocks. The scale of
01501              * this pitch value is semi-logaritmic compared to its use in the
01502              * decoder, so we convert it to normal scale also. */
01503             int block_pitch,
01504                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01505                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01506                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01507 
01508             if (n == 0) {
01509                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01510             } else
01511                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01512                                  get_bits(gb, s->block_delta_pitch_nbits);
01513             /* Convert last_ so that any next delta is within _range */
01514             last_block_pitch = av_clip(block_pitch,
01515                                        s->block_delta_pitch_hrange,
01516                                        s->block_pitch_range -
01517                                            s->block_delta_pitch_hrange);
01518 
01519             /* Convert semi-log-style scale back to normal scale */
01520             if (block_pitch < t1) {
01521                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01522             } else {
01523                 block_pitch -= t1;
01524                 if (block_pitch < t2) {
01525                     bl_pitch_sh2 =
01526                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01527                 } else {
01528                     block_pitch -= t2;
01529                     if (block_pitch < t3) {
01530                         bl_pitch_sh2 =
01531                             (s->block_conv_table[2] + block_pitch) << 2;
01532                     } else
01533                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01534                 }
01535             }
01536             pitch[n] = bl_pitch_sh2 >> 2;
01537             break;
01538         }
01539 
01540         case ACB_TYPE_ASYMMETRIC: {
01541             bl_pitch_sh2 = pitch[n] << 2;
01542             break;
01543         }
01544 
01545         default: // ACB_TYPE_NONE has no pitch
01546             bl_pitch_sh2 = 0;
01547             break;
01548         }
01549 
01550         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01551                     lsps, prev_lsps, &frame_descs[bd_idx],
01552                     &excitation[n * block_nsamples],
01553                     &synth[n * block_nsamples]);
01554     }
01555 
01556     /* Averaging projection filter, if applicable. Else, just copy samples
01557      * from synthesis buffer */
01558     if (s->do_apf) {
01559         double i_lsps[MAX_LSPS];
01560         float lpcs[MAX_LSPS];
01561 
01562         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01563             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01564         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01565         postfilter(s, synth, samples, 80, lpcs,
01566                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01567                    frame_descs[bd_idx].fcb_type, pitch[0]);
01568 
01569         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01570             i_lsps[n] = cos(lsps[n]);
01571         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01572         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01573                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01574                    frame_descs[bd_idx].fcb_type, pitch[0]);
01575     } else
01576         memcpy(samples, synth, 160 * sizeof(synth[0]));
01577 
01578     /* Cache values for next frame */
01579     s->frame_cntr++;
01580     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
01581     s->last_acb_type = frame_descs[bd_idx].acb_type;
01582     switch (frame_descs[bd_idx].acb_type) {
01583     case ACB_TYPE_NONE:
01584         s->last_pitch_val = 0;
01585         break;
01586     case ACB_TYPE_ASYMMETRIC:
01587         s->last_pitch_val = cur_pitch_val;
01588         break;
01589     case ACB_TYPE_HAMMING:
01590         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01591         break;
01592     }
01593 
01594     return 0;
01595 }
01596 
01609 static void stabilize_lsps(double *lsps, int num)
01610 {
01611     int n, m, l;
01612 
01613     /* set minimum value for first, maximum value for last and minimum
01614      * spacing between LSF values.
01615      * Very similar to ff_set_min_dist_lsf(), but in double. */
01616     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01617     for (n = 1; n < num; n++)
01618         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01619     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01620 
01621     /* reorder (looks like one-time / non-recursed bubblesort).
01622      * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
01623     for (n = 1; n < num; n++) {
01624         if (lsps[n] < lsps[n - 1]) {
01625             for (m = 1; m < num; m++) {
01626                 double tmp = lsps[m];
01627                 for (l = m - 1; l >= 0; l--) {
01628                     if (lsps[l] <= tmp) break;
01629                     lsps[l + 1] = lsps[l];
01630                 }
01631                 lsps[l + 1] = tmp;
01632             }
01633             break;
01634         }
01635     }
01636 }
01637 
01647 static int check_bits_for_superframe(GetBitContext *orig_gb,
01648                                      WMAVoiceContext *s)
01649 {
01650     GetBitContext s_gb, *gb = &s_gb;
01651     int n, need_bits, bd_idx;
01652     const struct frame_type_desc *frame_desc;
01653 
01654     /* initialize a copy */
01655     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01656     skip_bits_long(gb, get_bits_count(orig_gb));
01657     av_assert1(get_bits_left(gb) == get_bits_left(orig_gb));
01658 
01659     /* superframe header */
01660     if (get_bits_left(gb) < 14)
01661         return 1;
01662     if (!get_bits1(gb))
01663         return -1;                        // WMAPro-in-WMAVoice superframe
01664     if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
01665     if (s->has_residual_lsps) {           // residual LSPs (for all frames)
01666         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01667             return 1;
01668         skip_bits_long(gb, s->sframe_lsp_bitsize);
01669     }
01670 
01671     /* frames */
01672     for (n = 0; n < MAX_FRAMES; n++) {
01673         int aw_idx_is_ext = 0;
01674 
01675         if (!s->has_residual_lsps) {     // independent LSPs (per-frame)
01676            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01677            skip_bits_long(gb, s->frame_lsp_bitsize);
01678         }
01679         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01680         if (bd_idx < 0)
01681             return -1;                   // invalid frame type VLC code
01682         frame_desc = &frame_descs[bd_idx];
01683         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01684             if (get_bits_left(gb) < s->pitch_nbits)
01685                 return 1;
01686             skip_bits_long(gb, s->pitch_nbits);
01687         }
01688         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01689             skip_bits(gb, 8);
01690         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01691             int tmp = get_bits(gb, 6);
01692             if (tmp >= 0x36) {
01693                 skip_bits(gb, 2);
01694                 aw_idx_is_ext = 1;
01695             }
01696         }
01697 
01698         /* blocks */
01699         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01700             need_bits = s->block_pitch_nbits +
01701                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01702         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01703             need_bits = 2 * !aw_idx_is_ext;
01704         } else
01705             need_bits = 0;
01706         need_bits += frame_desc->frame_size;
01707         if (get_bits_left(gb) < need_bits)
01708             return 1;
01709         skip_bits_long(gb, need_bits);
01710     }
01711 
01712     return 0;
01713 }
01714 
01732 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01733 {
01734     WMAVoiceContext *s = ctx->priv_data;
01735     GetBitContext *gb = &s->gb, s_gb;
01736     int n, res, n_samples = 480;
01737     double lsps[MAX_FRAMES][MAX_LSPS];
01738     const double *mean_lsf = s->lsps == 16 ?
01739         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01740     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01741     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01742     float *samples;
01743 
01744     memcpy(synth,      s->synth_history,
01745            s->lsps             * sizeof(*synth));
01746     memcpy(excitation, s->excitation_history,
01747            s->history_nsamples * sizeof(*excitation));
01748 
01749     if (s->sframe_cache_size > 0) {
01750         gb = &s_gb;
01751         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01752         s->sframe_cache_size = 0;
01753     }
01754 
01755     if ((res = check_bits_for_superframe(gb, s)) == 1) {
01756         *got_frame_ptr = 0;
01757         return 1;
01758     }
01759 
01760     /* First bit is speech/music bit, it differentiates between WMAVoice
01761      * speech samples (the actual codec) and WMAVoice music samples, which
01762      * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
01763      * the wild yet. */
01764     if (!get_bits1(gb)) {
01765         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01766         return -1;
01767     }
01768 
01769     /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
01770     if (get_bits1(gb)) {
01771         if ((n_samples = get_bits(gb, 12)) > 480) {
01772             av_log(ctx, AV_LOG_ERROR,
01773                    "Superframe encodes >480 samples (%d), not allowed\n",
01774                    n_samples);
01775             return -1;
01776         }
01777     }
01778     /* Parse LSPs, if global for the superframe (can also be per-frame). */
01779     if (s->has_residual_lsps) {
01780         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01781 
01782         for (n = 0; n < s->lsps; n++)
01783             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01784 
01785         if (s->lsps == 10) {
01786             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01787         } else /* s->lsps == 16 */
01788             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01789 
01790         for (n = 0; n < s->lsps; n++) {
01791             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01792             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01793             lsps[2][n] += mean_lsf[n];
01794         }
01795         for (n = 0; n < 3; n++)
01796             stabilize_lsps(lsps[n], s->lsps);
01797     }
01798 
01799     /* get output buffer */
01800     s->frame.nb_samples = 480;
01801     if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
01802         av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01803         return res;
01804     }
01805     s->frame.nb_samples = n_samples;
01806     samples = (float *)s->frame.data[0];
01807 
01808     /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
01809     for (n = 0; n < 3; n++) {
01810         if (!s->has_residual_lsps) {
01811             int m;
01812 
01813             if (s->lsps == 10) {
01814                 dequant_lsp10i(gb, lsps[n]);
01815             } else /* s->lsps == 16 */
01816                 dequant_lsp16i(gb, lsps[n]);
01817 
01818             for (m = 0; m < s->lsps; m++)
01819                 lsps[n][m] += mean_lsf[m];
01820             stabilize_lsps(lsps[n], s->lsps);
01821         }
01822 
01823         if ((res = synth_frame(ctx, gb, n,
01824                                &samples[n * MAX_FRAMESIZE],
01825                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01826                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01827                                &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01828             *got_frame_ptr = 0;
01829             return res;
01830         }
01831     }
01832 
01833     /* Statistics? FIXME - we don't check for length, a slight overrun
01834      * will be caught by internal buffer padding, and anything else
01835      * will be skipped, not read. */
01836     if (get_bits1(gb)) {
01837         res = get_bits(gb, 4);
01838         skip_bits(gb, 10 * (res + 1));
01839     }
01840 
01841     *got_frame_ptr = 1;
01842 
01843     /* Update history */
01844     memcpy(s->prev_lsps,           lsps[2],
01845            s->lsps             * sizeof(*s->prev_lsps));
01846     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01847            s->lsps             * sizeof(*synth));
01848     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01849            s->history_nsamples * sizeof(*excitation));
01850     if (s->do_apf)
01851         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01852                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01853 
01854     return 0;
01855 }
01856 
01864 static int parse_packet_header(WMAVoiceContext *s)
01865 {
01866     GetBitContext *gb = &s->gb;
01867     unsigned int res;
01868 
01869     if (get_bits_left(gb) < 11)
01870         return 1;
01871     skip_bits(gb, 4);          // packet sequence number
01872     s->has_residual_lsps = get_bits1(gb);
01873     do {
01874         res = get_bits(gb, 6); // number of superframes per packet
01875                                // (minus first one if there is spillover)
01876         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01877             return 1;
01878     } while (res == 0x3F);
01879     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01880 
01881     return 0;
01882 }
01883 
01899 static void copy_bits(PutBitContext *pb,
01900                       const uint8_t *data, int size,
01901                       GetBitContext *gb, int nbits)
01902 {
01903     int rmn_bytes, rmn_bits;
01904 
01905     rmn_bits = rmn_bytes = get_bits_left(gb);
01906     if (rmn_bits < nbits)
01907         return;
01908     if (nbits > pb->size_in_bits - put_bits_count(pb))
01909         return;
01910     rmn_bits &= 7; rmn_bytes >>= 3;
01911     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01912         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01913     avpriv_copy_bits(pb, data + size - rmn_bytes,
01914                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01915 }
01916 
01928 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01929                                   int *got_frame_ptr, AVPacket *avpkt)
01930 {
01931     WMAVoiceContext *s = ctx->priv_data;
01932     GetBitContext *gb = &s->gb;
01933     int size, res, pos;
01934 
01935     /* Packets are sometimes a multiple of ctx->block_align, with a packet
01936      * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
01937      * feeds us ASF packets, which may concatenate multiple "codec" packets
01938      * in a single "muxer" packet, so we artificially emulate that by
01939      * capping the packet size at ctx->block_align. */
01940     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01941     if (!size) {
01942         *got_frame_ptr = 0;
01943         return 0;
01944     }
01945     init_get_bits(&s->gb, avpkt->data, size << 3);
01946 
01947     /* size == ctx->block_align is used to indicate whether we are dealing with
01948      * a new packet or a packet of which we already read the packet header
01949      * previously. */
01950     if (size == ctx->block_align) { // new packet header
01951         if ((res = parse_packet_header(s)) < 0)
01952             return res;
01953 
01954         /* If the packet header specifies a s->spillover_nbits, then we want
01955          * to push out all data of the previous packet (+ spillover) before
01956          * continuing to parse new superframes in the current packet. */
01957         if (s->spillover_nbits > 0) {
01958             if (s->sframe_cache_size > 0) {
01959                 int cnt = get_bits_count(gb);
01960                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01961                 flush_put_bits(&s->pb);
01962                 s->sframe_cache_size += s->spillover_nbits;
01963                 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01964                     *got_frame_ptr) {
01965                     cnt += s->spillover_nbits;
01966                     s->skip_bits_next = cnt & 7;
01967                     *(AVFrame *)data = s->frame;
01968                     return cnt >> 3;
01969                 } else
01970                     skip_bits_long (gb, s->spillover_nbits - cnt +
01971                                     get_bits_count(gb)); // resync
01972             } else
01973                 skip_bits_long(gb, s->spillover_nbits);  // resync
01974         }
01975     } else if (s->skip_bits_next)
01976         skip_bits(gb, s->skip_bits_next);
01977 
01978     /* Try parsing superframes in current packet */
01979     s->sframe_cache_size = 0;
01980     s->skip_bits_next = 0;
01981     pos = get_bits_left(gb);
01982     if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01983         return res;
01984     } else if (*got_frame_ptr) {
01985         int cnt = get_bits_count(gb);
01986         s->skip_bits_next = cnt & 7;
01987         *(AVFrame *)data = s->frame;
01988         return cnt >> 3;
01989     } else if ((s->sframe_cache_size = pos) > 0) {
01990         /* rewind bit reader to start of last (incomplete) superframe... */
01991         init_get_bits(gb, avpkt->data, size << 3);
01992         skip_bits_long(gb, (size << 3) - pos);
01993         av_assert1(get_bits_left(gb) == pos);
01994 
01995         /* ...and cache it for spillover in next packet */
01996         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01997         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01998         // FIXME bad - just copy bytes as whole and add use the
01999         // skip_bits_next field
02000     }
02001 
02002     return size;
02003 }
02004 
02005 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02006 {
02007     WMAVoiceContext *s = ctx->priv_data;
02008 
02009     if (s->do_apf) {
02010         ff_rdft_end(&s->rdft);
02011         ff_rdft_end(&s->irdft);
02012         ff_dct_end(&s->dct);
02013         ff_dct_end(&s->dst);
02014     }
02015 
02016     return 0;
02017 }
02018 
02019 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02020 {
02021     WMAVoiceContext *s = ctx->priv_data;
02022     int n;
02023 
02024     s->postfilter_agc    = 0;
02025     s->sframe_cache_size = 0;
02026     s->skip_bits_next    = 0;
02027     for (n = 0; n < s->lsps; n++)
02028         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02029     memset(s->excitation_history, 0,
02030            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02031     memset(s->synth_history,      0,
02032            sizeof(*s->synth_history)      * MAX_LSPS);
02033     memset(s->gain_pred_err,      0,
02034            sizeof(s->gain_pred_err));
02035 
02036     if (s->do_apf) {
02037         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02038                sizeof(*s->synth_filter_out_buf) * s->lsps);
02039         memset(s->dcf_mem,              0,
02040                sizeof(*s->dcf_mem)              * 2);
02041         memset(s->zero_exc_pf,          0,
02042                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02043         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02044     }
02045 }
02046 
02047 AVCodec ff_wmavoice_decoder = {
02048     .name           = "wmavoice",
02049     .type           = AVMEDIA_TYPE_AUDIO,
02050     .id             = AV_CODEC_ID_WMAVOICE,
02051     .priv_data_size = sizeof(WMAVoiceContext),
02052     .init           = wmavoice_decode_init,
02053     .close          = wmavoice_decode_end,
02054     .decode         = wmavoice_decode_packet,
02055     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02056     .flush          = wmavoice_flush,
02057     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02058 };