00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #define UNCHECKED_BITSTREAM_READER 1
00029
00030 #include <math.h>
00031 #include "avcodec.h"
00032 #include "get_bits.h"
00033 #include "put_bits.h"
00034 #include "wmavoice_data.h"
00035 #include "celp_math.h"
00036 #include "celp_filters.h"
00037 #include "acelp_vectors.h"
00038 #include "acelp_filters.h"
00039 #include "lsp.h"
00040 #include "libavutil/lzo.h"
00041 #include "dct.h"
00042 #include "rdft.h"
00043 #include "sinewin.h"
00044
00045 #define MAX_BLOCKS 8
00046 #define MAX_LSPS 16
00047 #define MAX_LSPS_ALIGN16 16
00048
00049 #define MAX_FRAMES 3
00050 #define MAX_FRAMESIZE 160
00051 #define MAX_SIGNAL_HISTORY 416
00052 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00054 #define SFRAME_CACHE_MAXSIZE 256
00055
00056 #define VLC_NBITS 6
00057
00058
00061 static VLC frame_type_vlc;
00062
00066 enum {
00067 ACB_TYPE_NONE = 0,
00068 ACB_TYPE_ASYMMETRIC = 1,
00069
00070
00071
00072
00073 ACB_TYPE_HAMMING = 2
00074
00075
00076 };
00077
00081 enum {
00082 FCB_TYPE_SILENCE = 0,
00083
00084
00085 FCB_TYPE_HARDCODED = 1,
00086
00087 FCB_TYPE_AW_PULSES = 2,
00088
00089 FCB_TYPE_EXC_PULSES = 3,
00090
00091
00092 };
00093
00097 static const struct frame_type_desc {
00098 uint8_t n_blocks;
00099
00100 uint8_t log_n_blocks;
00101 uint8_t acb_type;
00102 uint8_t fcb_type;
00103 uint8_t dbl_pulses;
00104
00105
00106 uint16_t frame_size;
00107
00108 } frame_descs[17] = {
00109 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00110 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00111 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00112 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00113 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00114 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00115 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00116 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00117 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00118 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00119 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00120 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00121 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00122 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00123 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00124 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00125 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00126 };
00127
00131 typedef struct {
00136 AVFrame frame;
00137 GetBitContext gb;
00138
00139
00140
00141 int8_t vbm_tree[25];
00142
00143 int spillover_bitsize;
00144
00145
00146 int history_nsamples;
00147
00148
00149
00150 int do_apf;
00151
00152 int denoise_strength;
00153
00154 int denoise_tilt_corr;
00155
00156 int dc_level;
00157
00158
00159 int lsps;
00160 int lsp_q_mode;
00161 int lsp_def_mode;
00162
00163 int frame_lsp_bitsize;
00164
00165 int sframe_lsp_bitsize;
00166
00167
00168 int min_pitch_val;
00169 int max_pitch_val;
00170 int pitch_nbits;
00171
00172 int block_pitch_nbits;
00173
00174 int block_pitch_range;
00175 int block_delta_pitch_nbits;
00176
00177
00178
00179 int block_delta_pitch_hrange;
00180
00181 uint16_t block_conv_table[4];
00182
00183
00193 int spillover_nbits;
00194
00195
00196
00197 int has_residual_lsps;
00198
00199
00200
00201
00202 int skip_bits_next;
00203
00204
00205
00206 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00209 int sframe_cache_size;
00210
00211
00212
00213
00214 PutBitContext pb;
00215
00225 double prev_lsps[MAX_LSPS];
00226
00227 int last_pitch_val;
00228 int last_acb_type;
00229 int pitch_diff_sh16;
00230
00231 float silence_gain;
00232
00233 int aw_idx_is_ext;
00234
00235 int aw_pulse_range;
00236
00237
00238
00239
00240
00241 int aw_n_pulses[2];
00242
00243
00244 int aw_first_pulse_off[2];
00245
00246 int aw_next_pulse_off_cache;
00247
00248
00249
00250
00251
00252 int frame_cntr;
00253
00254 float gain_pred_err[6];
00255 float excitation_history[MAX_SIGNAL_HISTORY];
00259 float synth_history[MAX_LSPS];
00260
00269 RDFTContext rdft, irdft;
00270
00271 DCTContext dct, dst;
00272
00273 float sin[511], cos[511];
00274
00275 float postfilter_agc;
00276
00277 float dcf_mem[2];
00278 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00281 float denoise_filter_cache[MAX_FRAMESIZE];
00282 int denoise_filter_cache_size;
00283 DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00285 DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00287 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00290
00293 } WMAVoiceContext;
00294
00304 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00305 {
00306 static const uint8_t bits[] = {
00307 2, 2, 2, 4, 4, 4,
00308 6, 6, 6, 8, 8, 8,
00309 10, 10, 10, 12, 12, 12,
00310 14, 14, 14, 14
00311 };
00312 static const uint16_t codes[] = {
00313 0x0000, 0x0001, 0x0002,
00314 0x000c, 0x000d, 0x000e,
00315 0x003c, 0x003d, 0x003e,
00316 0x00fc, 0x00fd, 0x00fe,
00317 0x03fc, 0x03fd, 0x03fe,
00318 0x0ffc, 0x0ffd, 0x0ffe,
00319 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00320 };
00321 int cntr[8], n, res;
00322
00323 memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00324 memset(cntr, 0, sizeof(cntr));
00325 for (n = 0; n < 17; n++) {
00326 res = get_bits(gb, 3);
00327 if (cntr[res] > 3)
00328 return -1;
00329 vbm_tree[res * 3 + cntr[res]++] = n;
00330 }
00331 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00332 bits, 1, 1, codes, 2, 2, 132);
00333 return 0;
00334 }
00335
00339 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00340 {
00341 int n, flags, pitch_range, lsp16_flag;
00342 WMAVoiceContext *s = ctx->priv_data;
00343
00352 if (ctx->extradata_size != 46) {
00353 av_log(ctx, AV_LOG_ERROR,
00354 "Invalid extradata size %d (should be 46)\n",
00355 ctx->extradata_size);
00356 return -1;
00357 }
00358 flags = AV_RL32(ctx->extradata + 18);
00359 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00360 s->do_apf = flags & 0x1;
00361 if (s->do_apf) {
00362 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00363 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00364 ff_dct_init(&s->dct, 6, DCT_I);
00365 ff_dct_init(&s->dst, 6, DST_I);
00366
00367 ff_sine_window_init(s->cos, 256);
00368 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00369 for (n = 0; n < 255; n++) {
00370 s->sin[n] = -s->sin[510 - n];
00371 s->cos[510 - n] = s->cos[n];
00372 }
00373 }
00374 s->denoise_strength = (flags >> 2) & 0xF;
00375 if (s->denoise_strength >= 12) {
00376 av_log(ctx, AV_LOG_ERROR,
00377 "Invalid denoise filter strength %d (max=11)\n",
00378 s->denoise_strength);
00379 return -1;
00380 }
00381 s->denoise_tilt_corr = !!(flags & 0x40);
00382 s->dc_level = (flags >> 7) & 0xF;
00383 s->lsp_q_mode = !!(flags & 0x2000);
00384 s->lsp_def_mode = !!(flags & 0x4000);
00385 lsp16_flag = flags & 0x1000;
00386 if (lsp16_flag) {
00387 s->lsps = 16;
00388 s->frame_lsp_bitsize = 34;
00389 s->sframe_lsp_bitsize = 60;
00390 } else {
00391 s->lsps = 10;
00392 s->frame_lsp_bitsize = 24;
00393 s->sframe_lsp_bitsize = 48;
00394 }
00395 for (n = 0; n < s->lsps; n++)
00396 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00397
00398 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00399 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00400 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00401 return -1;
00402 }
00403
00404 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00405 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00406 pitch_range = s->max_pitch_val - s->min_pitch_val;
00407 if (pitch_range <= 0) {
00408 av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00409 return -1;
00410 }
00411 s->pitch_nbits = av_ceil_log2(pitch_range);
00412 s->last_pitch_val = 40;
00413 s->last_acb_type = ACB_TYPE_NONE;
00414 s->history_nsamples = s->max_pitch_val + 8;
00415
00416 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00417 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00418 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00419
00420 av_log(ctx, AV_LOG_ERROR,
00421 "Unsupported samplerate %d (min=%d, max=%d)\n",
00422 ctx->sample_rate, min_sr, max_sr);
00423
00424 return -1;
00425 }
00426
00427 s->block_conv_table[0] = s->min_pitch_val;
00428 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00429 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00430 s->block_conv_table[3] = s->max_pitch_val - 1;
00431 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00432 if (s->block_delta_pitch_hrange <= 0) {
00433 av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00434 return -1;
00435 }
00436 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00437 s->block_pitch_range = s->block_conv_table[2] +
00438 s->block_conv_table[3] + 1 +
00439 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00440 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00441
00442 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00443
00444 avcodec_get_frame_defaults(&s->frame);
00445 ctx->coded_frame = &s->frame;
00446
00447 return 0;
00448 }
00449
00471 static void adaptive_gain_control(float *out, const float *in,
00472 const float *speech_synth,
00473 int size, float alpha, float *gain_mem)
00474 {
00475 int i;
00476 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00477 float mem = *gain_mem;
00478
00479 for (i = 0; i < size; i++) {
00480 speech_energy += fabsf(speech_synth[i]);
00481 postfilter_energy += fabsf(in[i]);
00482 }
00483 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00484
00485 for (i = 0; i < size; i++) {
00486 mem = alpha * mem + gain_scale_factor;
00487 out[i] = in[i] * mem;
00488 }
00489
00490 *gain_mem = mem;
00491 }
00492
00511 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00512 const float *in, float *out, int size)
00513 {
00514 int n;
00515 float optimal_gain = 0, dot;
00516 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00517 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00518 *best_hist_ptr;
00519
00520
00521 do {
00522 dot = ff_dot_productf(in, ptr, size);
00523 if (dot > optimal_gain) {
00524 optimal_gain = dot;
00525 best_hist_ptr = ptr;
00526 }
00527 } while (--ptr >= end);
00528
00529 if (optimal_gain <= 0)
00530 return -1;
00531 dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00532 if (dot <= 0)
00533 return -1;
00534
00535 if (optimal_gain <= dot) {
00536 dot = dot / (dot + 0.6 * optimal_gain);
00537 } else
00538 dot = 0.625;
00539
00540
00541 for (n = 0; n < size; n++)
00542 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00543
00544 return 0;
00545 }
00546
00557 static float tilt_factor(const float *lpcs, int n_lpcs)
00558 {
00559 float rh0, rh1;
00560
00561 rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
00562 rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00563
00564 return rh1 / rh0;
00565 }
00566
00570 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00571 int fcb_type, float *coeffs, int remainder)
00572 {
00573 float last_coeff, min = 15.0, max = -15.0;
00574 float irange, angle_mul, gain_mul, range, sq;
00575 int n, idx;
00576
00577
00578 s->rdft.rdft_calc(&s->rdft, lpcs);
00579 #define log_range(var, assign) do { \
00580 float tmp = log10f(assign); var = tmp; \
00581 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00582 } while (0)
00583 log_range(last_coeff, lpcs[1] * lpcs[1]);
00584 for (n = 1; n < 64; n++)
00585 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00586 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00587 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00588 #undef log_range
00589 range = max - min;
00590 lpcs[64] = last_coeff;
00591
00592
00593
00594
00595
00596
00597 irange = 64.0 / range;
00598 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00599 (5.0 / 14.7));
00600 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00601 for (n = 0; n <= 64; n++) {
00602 float pwr;
00603
00604 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00605 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00606 lpcs[n] = angle_mul * pwr;
00607
00608
00609 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00610 if (idx > 127) {
00611 coeffs[n] = wmavoice_energy_table[127] *
00612 powf(1.0331663, idx - 127);
00613 } else
00614 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00615 }
00616
00617
00618
00619
00620
00621 s->dct.dct_calc(&s->dct, lpcs);
00622 s->dst.dct_calc(&s->dst, lpcs);
00623
00624
00625 idx = 255 + av_clip(lpcs[64], -255, 255);
00626 coeffs[0] = coeffs[0] * s->cos[idx];
00627 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00628 last_coeff = coeffs[64] * s->cos[idx];
00629 for (n = 63;; n--) {
00630 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00633
00634 if (!--n) break;
00635
00636 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00637 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00638 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00639 }
00640 coeffs[1] = last_coeff;
00641
00642
00643 s->irdft.rdft_calc(&s->irdft, coeffs);
00644
00645
00646 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00647 if (s->denoise_tilt_corr) {
00648 float tilt_mem = 0;
00649
00650 coeffs[remainder - 1] = 0;
00651 ff_tilt_compensation(&tilt_mem,
00652 -1.8 * tilt_factor(coeffs, remainder - 1),
00653 coeffs, remainder);
00654 }
00655 sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00656 for (n = 0; n < remainder; n++)
00657 coeffs[n] *= sq;
00658 }
00659
00686 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00687 float *synth_pf, int size,
00688 const float *lpcs)
00689 {
00690 int remainder, lim, n;
00691
00692 if (fcb_type != FCB_TYPE_SILENCE) {
00693 float *tilted_lpcs = s->tilted_lpcs_pf,
00694 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00695
00696 tilted_lpcs[0] = 1.0;
00697 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00698 memset(&tilted_lpcs[s->lsps + 1], 0,
00699 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00700 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00701 tilted_lpcs, s->lsps + 2);
00702
00703
00704
00705
00706
00707 remainder = FFMIN(127 - size, size - 1);
00708 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00709
00710
00711
00712 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00713 s->rdft.rdft_calc(&s->rdft, synth_pf);
00714 s->rdft.rdft_calc(&s->rdft, coeffs);
00715 synth_pf[0] *= coeffs[0];
00716 synth_pf[1] *= coeffs[1];
00717 for (n = 1; n < 64; n++) {
00718 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00719 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00720 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00721 }
00722 s->irdft.rdft_calc(&s->irdft, synth_pf);
00723 }
00724
00725
00726 if (s->denoise_filter_cache_size) {
00727 lim = FFMIN(s->denoise_filter_cache_size, size);
00728 for (n = 0; n < lim; n++)
00729 synth_pf[n] += s->denoise_filter_cache[n];
00730 s->denoise_filter_cache_size -= lim;
00731 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00732 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00733 }
00734
00735
00736 if (fcb_type != FCB_TYPE_SILENCE) {
00737 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00738 for (n = 0; n < lim; n++)
00739 s->denoise_filter_cache[n] += synth_pf[size + n];
00740 if (lim < remainder) {
00741 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00742 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00743 s->denoise_filter_cache_size = remainder;
00744 }
00745 }
00746 }
00747
00768 static void postfilter(WMAVoiceContext *s, const float *synth,
00769 float *samples, int size,
00770 const float *lpcs, float *zero_exc_pf,
00771 int fcb_type, int pitch)
00772 {
00773 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00774 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00775 *synth_filter_in = zero_exc_pf;
00776
00777 assert(size <= MAX_FRAMESIZE / 2);
00778
00779
00780 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00781
00782 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00783 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00784 synth_filter_in = synth_filter_in_buf;
00785
00786
00787 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00788 synth_filter_in, size, s->lsps);
00789 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00790 sizeof(synth_pf[0]) * s->lsps);
00791
00792 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00793
00794 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00795 &s->postfilter_agc);
00796
00797 if (s->dc_level > 8) {
00798
00799
00800
00801 ff_acelp_apply_order_2_transfer_function(samples, samples,
00802 (const float[2]) { -1.99997, 1.0 },
00803 (const float[2]) { -1.9330735188, 0.93589198496 },
00804 0.93980580475, s->dcf_mem, size);
00805 }
00806 }
00822 static void dequant_lsps(double *lsps, int num,
00823 const uint16_t *values,
00824 const uint16_t *sizes,
00825 int n_stages, const uint8_t *table,
00826 const double *mul_q,
00827 const double *base_q)
00828 {
00829 int n, m;
00830
00831 memset(lsps, 0, num * sizeof(*lsps));
00832 for (n = 0; n < n_stages; n++) {
00833 const uint8_t *t_off = &table[values[n] * num];
00834 double base = base_q[n], mul = mul_q[n];
00835
00836 for (m = 0; m < num; m++)
00837 lsps[m] += base + mul * t_off[m];
00838
00839 table += sizes[n] * num;
00840 }
00841 }
00842
00854 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00855 {
00856 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00857 static const double mul_lsf[4] = {
00858 5.2187144800e-3, 1.4626986422e-3,
00859 9.6179549166e-4, 1.1325736225e-3
00860 };
00861 static const double base_lsf[4] = {
00862 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00863 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00864 };
00865 uint16_t v[4];
00866
00867 v[0] = get_bits(gb, 8);
00868 v[1] = get_bits(gb, 6);
00869 v[2] = get_bits(gb, 5);
00870 v[3] = get_bits(gb, 5);
00871
00872 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00873 mul_lsf, base_lsf);
00874 }
00875
00880 static void dequant_lsp10r(GetBitContext *gb,
00881 double *i_lsps, const double *old,
00882 double *a1, double *a2, int q_mode)
00883 {
00884 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00885 static const double mul_lsf[3] = {
00886 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00887 };
00888 static const double base_lsf[3] = {
00889 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00890 };
00891 const float (*ipol_tab)[2][10] = q_mode ?
00892 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00893 uint16_t interpol, v[3];
00894 int n;
00895
00896 dequant_lsp10i(gb, i_lsps);
00897
00898 interpol = get_bits(gb, 5);
00899 v[0] = get_bits(gb, 7);
00900 v[1] = get_bits(gb, 6);
00901 v[2] = get_bits(gb, 6);
00902
00903 for (n = 0; n < 10; n++) {
00904 double delta = old[n] - i_lsps[n];
00905 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00906 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00907 }
00908
00909 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00910 mul_lsf, base_lsf);
00911 }
00912
00916 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00917 {
00918 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00919 static const double mul_lsf[5] = {
00920 3.3439586280e-3, 6.9908173703e-4,
00921 3.3216608306e-3, 1.0334960326e-3,
00922 3.1899104283e-3
00923 };
00924 static const double base_lsf[5] = {
00925 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00926 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00927 M_PI * -1.29816e-1
00928 };
00929 uint16_t v[5];
00930
00931 v[0] = get_bits(gb, 8);
00932 v[1] = get_bits(gb, 6);
00933 v[2] = get_bits(gb, 7);
00934 v[3] = get_bits(gb, 6);
00935 v[4] = get_bits(gb, 7);
00936
00937 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00938 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00939 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00940 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00941 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00942 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00943 }
00944
00949 static void dequant_lsp16r(GetBitContext *gb,
00950 double *i_lsps, const double *old,
00951 double *a1, double *a2, int q_mode)
00952 {
00953 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00954 static const double mul_lsf[3] = {
00955 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00956 };
00957 static const double base_lsf[3] = {
00958 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00959 };
00960 const float (*ipol_tab)[2][16] = q_mode ?
00961 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00962 uint16_t interpol, v[3];
00963 int n;
00964
00965 dequant_lsp16i(gb, i_lsps);
00966
00967 interpol = get_bits(gb, 5);
00968 v[0] = get_bits(gb, 7);
00969 v[1] = get_bits(gb, 7);
00970 v[2] = get_bits(gb, 7);
00971
00972 for (n = 0; n < 16; n++) {
00973 double delta = old[n] - i_lsps[n];
00974 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00975 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00976 }
00977
00978 dequant_lsps( a2, 10, v, vec_sizes, 1,
00979 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00980 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00981 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00982 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00983 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00984 }
00985
00999 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01000 const int *pitch)
01001 {
01002 static const int16_t start_offset[94] = {
01003 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
01004 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
01005 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
01006 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
01007 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
01008 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
01009 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01010 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01011 };
01012 int bits, offset;
01013
01014
01015 s->aw_idx_is_ext = 0;
01016 if ((bits = get_bits(gb, 6)) >= 54) {
01017 s->aw_idx_is_ext = 1;
01018 bits += (bits - 54) * 3 + get_bits(gb, 2);
01019 }
01020
01021
01022
01023 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01024 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01025 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01026 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01027 offset += s->aw_n_pulses[0] * pitch[0];
01028 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01029 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01030
01031
01032
01033
01034 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01035 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01036 s->aw_first_pulse_off[1] -= pitch[1];
01037 if (start_offset[bits] < 0)
01038 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01039 s->aw_first_pulse_off[0] -= pitch[0];
01040 }
01041 }
01042
01050 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01051 int block_idx, AMRFixed *fcb)
01052 {
01053 uint16_t use_mask_mem[9];
01054 uint16_t *use_mask = use_mask_mem + 2;
01055
01056
01057
01058
01059
01060
01061
01062 int pulse_off = s->aw_first_pulse_off[block_idx],
01063 pulse_start, n, idx, range, aidx, start_off = 0;
01064
01065
01066 if (s->aw_n_pulses[block_idx] > 0)
01067 while (pulse_off + s->aw_pulse_range < 1)
01068 pulse_off += fcb->pitch_lag;
01069
01070
01071 if (s->aw_n_pulses[0] > 0) {
01072 if (block_idx == 0) {
01073 range = 32;
01074 } else {
01075 range = 8;
01076 if (s->aw_n_pulses[block_idx] > 0)
01077 pulse_off = s->aw_next_pulse_off_cache;
01078 }
01079 } else
01080 range = 16;
01081 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01082
01083
01084
01085
01086 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01087 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01088 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01089 if (s->aw_n_pulses[block_idx] > 0)
01090 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01091 int excl_range = s->aw_pulse_range;
01092 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01093 int first_sh = 16 - (idx & 15);
01094 *use_mask_ptr++ &= 0xFFFFu << first_sh;
01095 excl_range -= first_sh;
01096 if (excl_range >= 16) {
01097 *use_mask_ptr++ = 0;
01098 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01099 } else
01100 *use_mask_ptr &= 0xFFFF >> excl_range;
01101 }
01102
01103
01104 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01105 for (n = 0; n <= aidx; pulse_start++) {
01106 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01107 if (idx >= MAX_FRAMESIZE / 2) {
01108 if (use_mask[0]) idx = 0x0F;
01109 else if (use_mask[1]) idx = 0x1F;
01110 else if (use_mask[2]) idx = 0x2F;
01111 else if (use_mask[3]) idx = 0x3F;
01112 else if (use_mask[4]) idx = 0x4F;
01113 else return;
01114 idx -= av_log2_16bit(use_mask[idx >> 4]);
01115 }
01116 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01117 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01118 n++;
01119 start_off = idx;
01120 }
01121 }
01122
01123 fcb->x[fcb->n] = start_off;
01124 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01125 fcb->n++;
01126
01127
01128 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01129 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01130 }
01131
01139 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01140 int block_idx, AMRFixed *fcb)
01141 {
01142 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01143 float v;
01144
01145 if (s->aw_n_pulses[block_idx] > 0) {
01146 int n, v_mask, i_mask, sh, n_pulses;
01147
01148 if (s->aw_pulse_range == 24) {
01149 n_pulses = 3;
01150 v_mask = 8;
01151 i_mask = 7;
01152 sh = 4;
01153 } else {
01154 n_pulses = 4;
01155 v_mask = 4;
01156 i_mask = 3;
01157 sh = 3;
01158 }
01159
01160 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01161 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01162 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01163 s->aw_first_pulse_off[block_idx];
01164 while (fcb->x[fcb->n] < 0)
01165 fcb->x[fcb->n] += fcb->pitch_lag;
01166 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01167 fcb->n++;
01168 }
01169 } else {
01170 int num2 = (val & 0x1FF) >> 1, delta, idx;
01171
01172 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01173 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01174 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01175 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01176 v = (val & 0x200) ? -1.0 : 1.0;
01177
01178 fcb->no_repeat_mask |= 3 << fcb->n;
01179 fcb->x[fcb->n] = idx - delta;
01180 fcb->y[fcb->n] = v;
01181 fcb->x[fcb->n + 1] = idx;
01182 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01183 fcb->n += 2;
01184 }
01185 }
01186
01200 static int pRNG(int frame_cntr, int block_num, int block_size)
01201 {
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212 static const unsigned int div_tbl[9][2] = {
01213 { 8332, 3 * 715827883U },
01214 { 4545, 0 * 390451573U },
01215 { 3124, 11 * 268435456U },
01216 { 2380, 15 * 204522253U },
01217 { 1922, 23 * 165191050U },
01218 { 1612, 23 * 138547333U },
01219 { 1388, 27 * 119304648U },
01220 { 1219, 16 * 104755300U },
01221 { 1086, 39 * 93368855U }
01222 };
01223 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01224 if (x >= 0xFFFF) x -= 0xFFFF;
01225
01226 y = x - 9 * MULH(477218589, x);
01227 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01228
01229 return z % (1000 - block_size);
01230 }
01231
01236 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01237 int block_idx, int size,
01238 const struct frame_type_desc *frame_desc,
01239 float *excitation)
01240 {
01241 float gain;
01242 int n, r_idx;
01243
01244 assert(size <= MAX_FRAMESIZE);
01245
01246
01247 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01248 r_idx = pRNG(s->frame_cntr, block_idx, size);
01249 gain = s->silence_gain;
01250 } else {
01251 r_idx = get_bits(gb, 8);
01252 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01253 }
01254
01255
01256 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01257
01258
01259 for (n = 0; n < size; n++)
01260 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01261 }
01262
01267 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01268 int block_idx, int size,
01269 int block_pitch_sh2,
01270 const struct frame_type_desc *frame_desc,
01271 float *excitation)
01272 {
01273 static const float gain_coeff[6] = {
01274 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01275 };
01276 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01277 int n, idx, gain_weight;
01278 AMRFixed fcb;
01279
01280 assert(size <= MAX_FRAMESIZE / 2);
01281 memset(pulses, 0, sizeof(*pulses) * size);
01282
01283 fcb.pitch_lag = block_pitch_sh2 >> 2;
01284 fcb.pitch_fac = 1.0;
01285 fcb.no_repeat_mask = 0;
01286 fcb.n = 0;
01287
01288
01289
01290 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01291 aw_pulse_set1(s, gb, block_idx, &fcb);
01292 aw_pulse_set2(s, gb, block_idx, &fcb);
01293 } else {
01294 int offset_nbits = 5 - frame_desc->log_n_blocks;
01295
01296 fcb.no_repeat_mask = -1;
01297
01298
01299 for (n = 0; n < 5; n++) {
01300 float sign;
01301 int pos1, pos2;
01302
01303 sign = get_bits1(gb) ? 1.0 : -1.0;
01304 pos1 = get_bits(gb, offset_nbits);
01305 fcb.x[fcb.n] = n + 5 * pos1;
01306 fcb.y[fcb.n++] = sign;
01307 if (n < frame_desc->dbl_pulses) {
01308 pos2 = get_bits(gb, offset_nbits);
01309 fcb.x[fcb.n] = n + 5 * pos2;
01310 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01311 }
01312 }
01313 }
01314 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01315
01316
01317
01318 idx = get_bits(gb, 7);
01319 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01320 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01321 acb_gain = wmavoice_gain_codebook_acb[idx];
01322 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01323 -2.9957322736 ,
01324 1.6094379124 );
01325
01326 gain_weight = 8 >> frame_desc->log_n_blocks;
01327 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01328 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01329 for (n = 0; n < gain_weight; n++)
01330 s->gain_pred_err[n] = pred_err;
01331
01332
01333 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01334 int len;
01335 for (n = 0; n < size; n += len) {
01336 int next_idx_sh16;
01337 int abs_idx = block_idx * size + n;
01338 int pitch_sh16 = (s->last_pitch_val << 16) +
01339 s->pitch_diff_sh16 * abs_idx;
01340 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01341 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01342 idx = idx_sh16 >> 16;
01343 if (s->pitch_diff_sh16) {
01344 if (s->pitch_diff_sh16 > 0) {
01345 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01346 } else
01347 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01348 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01349 1, size - n);
01350 } else
01351 len = size;
01352
01353 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01354 wmavoice_ipol1_coeffs, 17,
01355 idx, 9, len);
01356 }
01357 } else {
01358 int block_pitch = block_pitch_sh2 >> 2;
01359 idx = block_pitch_sh2 & 3;
01360 if (idx) {
01361 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01362 wmavoice_ipol2_coeffs, 4,
01363 idx, 8, size);
01364 } else
01365 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01366 sizeof(float) * size);
01367 }
01368
01369
01370 ff_weighted_vector_sumf(excitation, excitation, pulses,
01371 acb_gain, fcb_gain, size);
01372 }
01373
01390 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01391 int block_idx, int size,
01392 int block_pitch_sh2,
01393 const double *lsps, const double *prev_lsps,
01394 const struct frame_type_desc *frame_desc,
01395 float *excitation, float *synth)
01396 {
01397 double i_lsps[MAX_LSPS];
01398 float lpcs[MAX_LSPS];
01399 float fac;
01400 int n;
01401
01402 if (frame_desc->acb_type == ACB_TYPE_NONE)
01403 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01404 else
01405 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01406 frame_desc, excitation);
01407
01408
01409 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01410 for (n = 0; n < s->lsps; n++)
01411 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01412 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01413
01414
01415 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01416 }
01417
01433 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01434 float *samples,
01435 const double *lsps, const double *prev_lsps,
01436 float *excitation, float *synth)
01437 {
01438 WMAVoiceContext *s = ctx->priv_data;
01439 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01440 int pitch[MAX_BLOCKS], last_block_pitch;
01441
01442
01443 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01444
01445 if (bd_idx < 0) {
01446 av_log(ctx, AV_LOG_ERROR,
01447 "Invalid frame type VLC code, skipping\n");
01448 return -1;
01449 }
01450
01451 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01452
01453
01454 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01455
01456
01457
01458
01459 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01460 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01461 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01462 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01463 if (s->last_acb_type == ACB_TYPE_NONE ||
01464 20 * abs(cur_pitch_val - s->last_pitch_val) >
01465 (cur_pitch_val + s->last_pitch_val))
01466 s->last_pitch_val = cur_pitch_val;
01467
01468
01469 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01470 int fac = n * 2 + 1;
01471
01472 pitch[n] = (MUL16(fac, cur_pitch_val) +
01473 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01474 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01475 }
01476
01477
01478 s->pitch_diff_sh16 =
01479 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01480 }
01481
01482
01483 switch (frame_descs[bd_idx].fcb_type) {
01484 case FCB_TYPE_SILENCE:
01485 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01486 break;
01487 case FCB_TYPE_AW_PULSES:
01488 aw_parse_coords(s, gb, pitch);
01489 break;
01490 }
01491
01492 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01493 int bl_pitch_sh2;
01494
01495
01496 switch (frame_descs[bd_idx].acb_type) {
01497 case ACB_TYPE_HAMMING: {
01498
01499
01500
01501
01502
01503 int block_pitch,
01504 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01505 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01506 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01507
01508 if (n == 0) {
01509 block_pitch = get_bits(gb, s->block_pitch_nbits);
01510 } else
01511 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01512 get_bits(gb, s->block_delta_pitch_nbits);
01513
01514 last_block_pitch = av_clip(block_pitch,
01515 s->block_delta_pitch_hrange,
01516 s->block_pitch_range -
01517 s->block_delta_pitch_hrange);
01518
01519
01520 if (block_pitch < t1) {
01521 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01522 } else {
01523 block_pitch -= t1;
01524 if (block_pitch < t2) {
01525 bl_pitch_sh2 =
01526 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01527 } else {
01528 block_pitch -= t2;
01529 if (block_pitch < t3) {
01530 bl_pitch_sh2 =
01531 (s->block_conv_table[2] + block_pitch) << 2;
01532 } else
01533 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01534 }
01535 }
01536 pitch[n] = bl_pitch_sh2 >> 2;
01537 break;
01538 }
01539
01540 case ACB_TYPE_ASYMMETRIC: {
01541 bl_pitch_sh2 = pitch[n] << 2;
01542 break;
01543 }
01544
01545 default:
01546 bl_pitch_sh2 = 0;
01547 break;
01548 }
01549
01550 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01551 lsps, prev_lsps, &frame_descs[bd_idx],
01552 &excitation[n * block_nsamples],
01553 &synth[n * block_nsamples]);
01554 }
01555
01556
01557
01558 if (s->do_apf) {
01559 double i_lsps[MAX_LSPS];
01560 float lpcs[MAX_LSPS];
01561
01562 for (n = 0; n < s->lsps; n++)
01563 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01564 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01565 postfilter(s, synth, samples, 80, lpcs,
01566 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01567 frame_descs[bd_idx].fcb_type, pitch[0]);
01568
01569 for (n = 0; n < s->lsps; n++)
01570 i_lsps[n] = cos(lsps[n]);
01571 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01572 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01573 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01574 frame_descs[bd_idx].fcb_type, pitch[0]);
01575 } else
01576 memcpy(samples, synth, 160 * sizeof(synth[0]));
01577
01578
01579 s->frame_cntr++;
01580 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01581 s->last_acb_type = frame_descs[bd_idx].acb_type;
01582 switch (frame_descs[bd_idx].acb_type) {
01583 case ACB_TYPE_NONE:
01584 s->last_pitch_val = 0;
01585 break;
01586 case ACB_TYPE_ASYMMETRIC:
01587 s->last_pitch_val = cur_pitch_val;
01588 break;
01589 case ACB_TYPE_HAMMING:
01590 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01591 break;
01592 }
01593
01594 return 0;
01595 }
01596
01609 static void stabilize_lsps(double *lsps, int num)
01610 {
01611 int n, m, l;
01612
01613
01614
01615
01616 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01617 for (n = 1; n < num; n++)
01618 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01619 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01620
01621
01622
01623 for (n = 1; n < num; n++) {
01624 if (lsps[n] < lsps[n - 1]) {
01625 for (m = 1; m < num; m++) {
01626 double tmp = lsps[m];
01627 for (l = m - 1; l >= 0; l--) {
01628 if (lsps[l] <= tmp) break;
01629 lsps[l + 1] = lsps[l];
01630 }
01631 lsps[l + 1] = tmp;
01632 }
01633 break;
01634 }
01635 }
01636 }
01637
01647 static int check_bits_for_superframe(GetBitContext *orig_gb,
01648 WMAVoiceContext *s)
01649 {
01650 GetBitContext s_gb, *gb = &s_gb;
01651 int n, need_bits, bd_idx;
01652 const struct frame_type_desc *frame_desc;
01653
01654
01655 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01656 skip_bits_long(gb, get_bits_count(orig_gb));
01657 assert(get_bits_left(gb) == get_bits_left(orig_gb));
01658
01659
01660 if (get_bits_left(gb) < 14)
01661 return 1;
01662 if (!get_bits1(gb))
01663 return -1;
01664 if (get_bits1(gb)) skip_bits(gb, 12);
01665 if (s->has_residual_lsps) {
01666 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01667 return 1;
01668 skip_bits_long(gb, s->sframe_lsp_bitsize);
01669 }
01670
01671
01672 for (n = 0; n < MAX_FRAMES; n++) {
01673 int aw_idx_is_ext = 0;
01674
01675 if (!s->has_residual_lsps) {
01676 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01677 skip_bits_long(gb, s->frame_lsp_bitsize);
01678 }
01679 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01680 if (bd_idx < 0)
01681 return -1;
01682 frame_desc = &frame_descs[bd_idx];
01683 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01684 if (get_bits_left(gb) < s->pitch_nbits)
01685 return 1;
01686 skip_bits_long(gb, s->pitch_nbits);
01687 }
01688 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01689 skip_bits(gb, 8);
01690 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01691 int tmp = get_bits(gb, 6);
01692 if (tmp >= 0x36) {
01693 skip_bits(gb, 2);
01694 aw_idx_is_ext = 1;
01695 }
01696 }
01697
01698
01699 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01700 need_bits = s->block_pitch_nbits +
01701 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01702 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01703 need_bits = 2 * !aw_idx_is_ext;
01704 } else
01705 need_bits = 0;
01706 need_bits += frame_desc->frame_size;
01707 if (get_bits_left(gb) < need_bits)
01708 return 1;
01709 skip_bits_long(gb, need_bits);
01710 }
01711
01712 return 0;
01713 }
01714
01735 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01736 {
01737 WMAVoiceContext *s = ctx->priv_data;
01738 GetBitContext *gb = &s->gb, s_gb;
01739 int n, res, n_samples = 480;
01740 double lsps[MAX_FRAMES][MAX_LSPS];
01741 const double *mean_lsf = s->lsps == 16 ?
01742 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01743 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01744 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01745 float *samples;
01746
01747 memcpy(synth, s->synth_history,
01748 s->lsps * sizeof(*synth));
01749 memcpy(excitation, s->excitation_history,
01750 s->history_nsamples * sizeof(*excitation));
01751
01752 if (s->sframe_cache_size > 0) {
01753 gb = &s_gb;
01754 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01755 s->sframe_cache_size = 0;
01756 }
01757
01758 if ((res = check_bits_for_superframe(gb, s)) == 1) {
01759 *got_frame_ptr = 0;
01760 return 1;
01761 }
01762
01763
01764
01765
01766
01767 if (!get_bits1(gb)) {
01768 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01769 return -1;
01770 }
01771
01772
01773 if (get_bits1(gb)) {
01774 if ((n_samples = get_bits(gb, 12)) > 480) {
01775 av_log(ctx, AV_LOG_ERROR,
01776 "Superframe encodes >480 samples (%d), not allowed\n",
01777 n_samples);
01778 return -1;
01779 }
01780 }
01781
01782 if (s->has_residual_lsps) {
01783 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01784
01785 for (n = 0; n < s->lsps; n++)
01786 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01787
01788 if (s->lsps == 10) {
01789 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01790 } else
01791 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01792
01793 for (n = 0; n < s->lsps; n++) {
01794 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01795 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01796 lsps[2][n] += mean_lsf[n];
01797 }
01798 for (n = 0; n < 3; n++)
01799 stabilize_lsps(lsps[n], s->lsps);
01800 }
01801
01802
01803 s->frame.nb_samples = 480;
01804 if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
01805 av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01806 return res;
01807 }
01808 s->frame.nb_samples = n_samples;
01809 samples = (float *)s->frame.data[0];
01810
01811
01812 for (n = 0; n < 3; n++) {
01813 if (!s->has_residual_lsps) {
01814 int m;
01815
01816 if (s->lsps == 10) {
01817 dequant_lsp10i(gb, lsps[n]);
01818 } else
01819 dequant_lsp16i(gb, lsps[n]);
01820
01821 for (m = 0; m < s->lsps; m++)
01822 lsps[n][m] += mean_lsf[m];
01823 stabilize_lsps(lsps[n], s->lsps);
01824 }
01825
01826 if ((res = synth_frame(ctx, gb, n,
01827 &samples[n * MAX_FRAMESIZE],
01828 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01829 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01830 &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01831 *got_frame_ptr = 0;
01832 return res;
01833 }
01834 }
01835
01836
01837
01838
01839 if (get_bits1(gb)) {
01840 res = get_bits(gb, 4);
01841 skip_bits(gb, 10 * (res + 1));
01842 }
01843
01844 *got_frame_ptr = 1;
01845
01846
01847 memcpy(s->prev_lsps, lsps[2],
01848 s->lsps * sizeof(*s->prev_lsps));
01849 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01850 s->lsps * sizeof(*synth));
01851 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01852 s->history_nsamples * sizeof(*excitation));
01853 if (s->do_apf)
01854 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01855 s->history_nsamples * sizeof(*s->zero_exc_pf));
01856
01857 return 0;
01858 }
01859
01867 static int parse_packet_header(WMAVoiceContext *s)
01868 {
01869 GetBitContext *gb = &s->gb;
01870 unsigned int res;
01871
01872 if (get_bits_left(gb) < 11)
01873 return 1;
01874 skip_bits(gb, 4);
01875 s->has_residual_lsps = get_bits1(gb);
01876 do {
01877 res = get_bits(gb, 6);
01878
01879 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01880 return 1;
01881 } while (res == 0x3F);
01882 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01883
01884 return 0;
01885 }
01886
01902 static void copy_bits(PutBitContext *pb,
01903 const uint8_t *data, int size,
01904 GetBitContext *gb, int nbits)
01905 {
01906 int rmn_bytes, rmn_bits;
01907
01908 rmn_bits = rmn_bytes = get_bits_left(gb);
01909 if (rmn_bits < nbits)
01910 return;
01911 if (nbits > pb->size_in_bits - put_bits_count(pb))
01912 return;
01913 rmn_bits &= 7; rmn_bytes >>= 3;
01914 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01915 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01916 avpriv_copy_bits(pb, data + size - rmn_bytes,
01917 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01918 }
01919
01931 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01932 int *got_frame_ptr, AVPacket *avpkt)
01933 {
01934 WMAVoiceContext *s = ctx->priv_data;
01935 GetBitContext *gb = &s->gb;
01936 int size, res, pos;
01937
01938
01939
01940
01941
01942
01943 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01944 if (!size) {
01945 *got_frame_ptr = 0;
01946 return 0;
01947 }
01948 init_get_bits(&s->gb, avpkt->data, size << 3);
01949
01950
01951
01952
01953 if (size == ctx->block_align) {
01954 if ((res = parse_packet_header(s)) < 0)
01955 return res;
01956
01957
01958
01959
01960 if (s->spillover_nbits > 0) {
01961 if (s->sframe_cache_size > 0) {
01962 int cnt = get_bits_count(gb);
01963 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01964 flush_put_bits(&s->pb);
01965 s->sframe_cache_size += s->spillover_nbits;
01966 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01967 *got_frame_ptr) {
01968 cnt += s->spillover_nbits;
01969 s->skip_bits_next = cnt & 7;
01970 *(AVFrame *)data = s->frame;
01971 return cnt >> 3;
01972 } else
01973 skip_bits_long (gb, s->spillover_nbits - cnt +
01974 get_bits_count(gb));
01975 } else
01976 skip_bits_long(gb, s->spillover_nbits);
01977 }
01978 } else if (s->skip_bits_next)
01979 skip_bits(gb, s->skip_bits_next);
01980
01981
01982 s->sframe_cache_size = 0;
01983 s->skip_bits_next = 0;
01984 pos = get_bits_left(gb);
01985 if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01986 return res;
01987 } else if (*got_frame_ptr) {
01988 int cnt = get_bits_count(gb);
01989 s->skip_bits_next = cnt & 7;
01990 *(AVFrame *)data = s->frame;
01991 return cnt >> 3;
01992 } else if ((s->sframe_cache_size = pos) > 0) {
01993
01994 init_get_bits(gb, avpkt->data, size << 3);
01995 skip_bits_long(gb, (size << 3) - pos);
01996 assert(get_bits_left(gb) == pos);
01997
01998
01999 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
02000 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02001
02002
02003 }
02004
02005 return size;
02006 }
02007
02008 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02009 {
02010 WMAVoiceContext *s = ctx->priv_data;
02011
02012 if (s->do_apf) {
02013 ff_rdft_end(&s->rdft);
02014 ff_rdft_end(&s->irdft);
02015 ff_dct_end(&s->dct);
02016 ff_dct_end(&s->dst);
02017 }
02018
02019 return 0;
02020 }
02021
02022 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02023 {
02024 WMAVoiceContext *s = ctx->priv_data;
02025 int n;
02026
02027 s->postfilter_agc = 0;
02028 s->sframe_cache_size = 0;
02029 s->skip_bits_next = 0;
02030 for (n = 0; n < s->lsps; n++)
02031 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02032 memset(s->excitation_history, 0,
02033 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02034 memset(s->synth_history, 0,
02035 sizeof(*s->synth_history) * MAX_LSPS);
02036 memset(s->gain_pred_err, 0,
02037 sizeof(s->gain_pred_err));
02038
02039 if (s->do_apf) {
02040 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02041 sizeof(*s->synth_filter_out_buf) * s->lsps);
02042 memset(s->dcf_mem, 0,
02043 sizeof(*s->dcf_mem) * 2);
02044 memset(s->zero_exc_pf, 0,
02045 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02046 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02047 }
02048 }
02049
02050 AVCodec ff_wmavoice_decoder = {
02051 .name = "wmavoice",
02052 .type = AVMEDIA_TYPE_AUDIO,
02053 .id = CODEC_ID_WMAVOICE,
02054 .priv_data_size = sizeof(WMAVoiceContext),
02055 .init = wmavoice_decode_init,
02056 .close = wmavoice_decode_end,
02057 .decode = wmavoice_decode_packet,
02058 .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02059 .flush = wmavoice_flush,
02060 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02061 };