00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #define UNCHECKED_BITSTREAM_READER 1
00029
00030 #include <math.h>
00031
00032 #include "dsputil.h"
00033 #include "avcodec.h"
00034 #include "get_bits.h"
00035 #include "put_bits.h"
00036 #include "wmavoice_data.h"
00037 #include "celp_filters.h"
00038 #include "acelp_vectors.h"
00039 #include "acelp_filters.h"
00040 #include "lsp.h"
00041 #include "libavutil/lzo.h"
00042 #include "dct.h"
00043 #include "rdft.h"
00044 #include "sinewin.h"
00045
00046 #define MAX_BLOCKS 8
00047 #define MAX_LSPS 16
00048 #define MAX_LSPS_ALIGN16 16
00049
00050 #define MAX_FRAMES 3
00051 #define MAX_FRAMESIZE 160
00052 #define MAX_SIGNAL_HISTORY 416
00053 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00055 #define SFRAME_CACHE_MAXSIZE 256
00056
00057 #define VLC_NBITS 6
00058
00059
00062 static VLC frame_type_vlc;
00063
00067 enum {
00068 ACB_TYPE_NONE = 0,
00069 ACB_TYPE_ASYMMETRIC = 1,
00070
00071
00072
00073
00074 ACB_TYPE_HAMMING = 2
00075
00076
00077 };
00078
00082 enum {
00083 FCB_TYPE_SILENCE = 0,
00084
00085
00086 FCB_TYPE_HARDCODED = 1,
00087
00088 FCB_TYPE_AW_PULSES = 2,
00089
00090 FCB_TYPE_EXC_PULSES = 3,
00091
00092
00093 };
00094
00098 static const struct frame_type_desc {
00099 uint8_t n_blocks;
00100
00101 uint8_t log_n_blocks;
00102 uint8_t acb_type;
00103 uint8_t fcb_type;
00104 uint8_t dbl_pulses;
00105
00106
00107 uint16_t frame_size;
00108
00109 } frame_descs[17] = {
00110 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00111 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00112 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00113 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00114 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00115 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00116 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00117 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00118 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00119 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00120 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00121 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00122 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00123 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00124 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00125 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00126 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00127 };
00128
00132 typedef struct {
00137 AVFrame frame;
00138 GetBitContext gb;
00139
00140
00141
00142 int8_t vbm_tree[25];
00143
00144 int spillover_bitsize;
00145
00146
00147 int history_nsamples;
00148
00149
00150
00151 int do_apf;
00152
00153 int denoise_strength;
00154
00155 int denoise_tilt_corr;
00156
00157 int dc_level;
00158
00159
00160 int lsps;
00161 int lsp_q_mode;
00162 int lsp_def_mode;
00163
00164 int frame_lsp_bitsize;
00165
00166 int sframe_lsp_bitsize;
00167
00168
00169 int min_pitch_val;
00170 int max_pitch_val;
00171 int pitch_nbits;
00172
00173 int block_pitch_nbits;
00174
00175 int block_pitch_range;
00176 int block_delta_pitch_nbits;
00177
00178
00179
00180 int block_delta_pitch_hrange;
00181
00182 uint16_t block_conv_table[4];
00183
00184
00194 int spillover_nbits;
00195
00196
00197
00198 int has_residual_lsps;
00199
00200
00201
00202
00203 int skip_bits_next;
00204
00205
00206
00207 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00210 int sframe_cache_size;
00211
00212
00213
00214
00215 PutBitContext pb;
00216
00226 double prev_lsps[MAX_LSPS];
00227
00228 int last_pitch_val;
00229 int last_acb_type;
00230 int pitch_diff_sh16;
00231
00232 float silence_gain;
00233
00234 int aw_idx_is_ext;
00235
00236 int aw_pulse_range;
00237
00238
00239
00240
00241
00242 int aw_n_pulses[2];
00243
00244
00245 int aw_first_pulse_off[2];
00246
00247 int aw_next_pulse_off_cache;
00248
00249
00250
00251
00252
00253 int frame_cntr;
00254
00255 float gain_pred_err[6];
00256 float excitation_history[MAX_SIGNAL_HISTORY];
00260 float synth_history[MAX_LSPS];
00261
00270 RDFTContext rdft, irdft;
00271
00272 DCTContext dct, dst;
00273
00274 float sin[511], cos[511];
00275
00276 float postfilter_agc;
00277
00278 float dcf_mem[2];
00279 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00282 float denoise_filter_cache[MAX_FRAMESIZE];
00283 int denoise_filter_cache_size;
00284 DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00286 DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00288 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00291
00294 } WMAVoiceContext;
00295
00305 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00306 {
00307 static const uint8_t bits[] = {
00308 2, 2, 2, 4, 4, 4,
00309 6, 6, 6, 8, 8, 8,
00310 10, 10, 10, 12, 12, 12,
00311 14, 14, 14, 14
00312 };
00313 static const uint16_t codes[] = {
00314 0x0000, 0x0001, 0x0002,
00315 0x000c, 0x000d, 0x000e,
00316 0x003c, 0x003d, 0x003e,
00317 0x00fc, 0x00fd, 0x00fe,
00318 0x03fc, 0x03fd, 0x03fe,
00319 0x0ffc, 0x0ffd, 0x0ffe,
00320 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00321 };
00322 int cntr[8] = { 0 }, n, res;
00323
00324 memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00325 for (n = 0; n < 17; n++) {
00326 res = get_bits(gb, 3);
00327 if (cntr[res] > 3)
00328 return -1;
00329 vbm_tree[res * 3 + cntr[res]++] = n;
00330 }
00331 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00332 bits, 1, 1, codes, 2, 2, 132);
00333 return 0;
00334 }
00335
00339 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00340 {
00341 int n, flags, pitch_range, lsp16_flag;
00342 WMAVoiceContext *s = ctx->priv_data;
00343
00352 if (ctx->extradata_size != 46) {
00353 av_log(ctx, AV_LOG_ERROR,
00354 "Invalid extradata size %d (should be 46)\n",
00355 ctx->extradata_size);
00356 return -1;
00357 }
00358 flags = AV_RL32(ctx->extradata + 18);
00359 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00360 s->do_apf = flags & 0x1;
00361 if (s->do_apf) {
00362 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00363 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00364 ff_dct_init(&s->dct, 6, DCT_I);
00365 ff_dct_init(&s->dst, 6, DST_I);
00366
00367 ff_sine_window_init(s->cos, 256);
00368 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00369 for (n = 0; n < 255; n++) {
00370 s->sin[n] = -s->sin[510 - n];
00371 s->cos[510 - n] = s->cos[n];
00372 }
00373 }
00374 s->denoise_strength = (flags >> 2) & 0xF;
00375 if (s->denoise_strength >= 12) {
00376 av_log(ctx, AV_LOG_ERROR,
00377 "Invalid denoise filter strength %d (max=11)\n",
00378 s->denoise_strength);
00379 return -1;
00380 }
00381 s->denoise_tilt_corr = !!(flags & 0x40);
00382 s->dc_level = (flags >> 7) & 0xF;
00383 s->lsp_q_mode = !!(flags & 0x2000);
00384 s->lsp_def_mode = !!(flags & 0x4000);
00385 lsp16_flag = flags & 0x1000;
00386 if (lsp16_flag) {
00387 s->lsps = 16;
00388 s->frame_lsp_bitsize = 34;
00389 s->sframe_lsp_bitsize = 60;
00390 } else {
00391 s->lsps = 10;
00392 s->frame_lsp_bitsize = 24;
00393 s->sframe_lsp_bitsize = 48;
00394 }
00395 for (n = 0; n < s->lsps; n++)
00396 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00397
00398 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00399 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00400 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00401 return -1;
00402 }
00403
00404 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00405 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00406 pitch_range = s->max_pitch_val - s->min_pitch_val;
00407 if (pitch_range <= 0) {
00408 av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00409 return -1;
00410 }
00411 s->pitch_nbits = av_ceil_log2(pitch_range);
00412 s->last_pitch_val = 40;
00413 s->last_acb_type = ACB_TYPE_NONE;
00414 s->history_nsamples = s->max_pitch_val + 8;
00415
00416 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00417 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00418 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00419
00420 av_log(ctx, AV_LOG_ERROR,
00421 "Unsupported samplerate %d (min=%d, max=%d)\n",
00422 ctx->sample_rate, min_sr, max_sr);
00423
00424 return -1;
00425 }
00426
00427 s->block_conv_table[0] = s->min_pitch_val;
00428 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00429 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00430 s->block_conv_table[3] = s->max_pitch_val - 1;
00431 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00432 if (s->block_delta_pitch_hrange <= 0) {
00433 av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00434 return -1;
00435 }
00436 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00437 s->block_pitch_range = s->block_conv_table[2] +
00438 s->block_conv_table[3] + 1 +
00439 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00440 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00441
00442 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00443
00444 avcodec_get_frame_defaults(&s->frame);
00445 ctx->coded_frame = &s->frame;
00446
00447 return 0;
00448 }
00449
00471 static void adaptive_gain_control(float *out, const float *in,
00472 const float *speech_synth,
00473 int size, float alpha, float *gain_mem)
00474 {
00475 int i;
00476 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00477 float mem = *gain_mem;
00478
00479 for (i = 0; i < size; i++) {
00480 speech_energy += fabsf(speech_synth[i]);
00481 postfilter_energy += fabsf(in[i]);
00482 }
00483 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00484
00485 for (i = 0; i < size; i++) {
00486 mem = alpha * mem + gain_scale_factor;
00487 out[i] = in[i] * mem;
00488 }
00489
00490 *gain_mem = mem;
00491 }
00492
00511 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00512 const float *in, float *out, int size)
00513 {
00514 int n;
00515 float optimal_gain = 0, dot;
00516 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00517 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00518 *best_hist_ptr = NULL;
00519
00520
00521 do {
00522 dot = ff_scalarproduct_float_c(in, ptr, size);
00523 if (dot > optimal_gain) {
00524 optimal_gain = dot;
00525 best_hist_ptr = ptr;
00526 }
00527 } while (--ptr >= end);
00528
00529 if (optimal_gain <= 0)
00530 return -1;
00531 dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
00532 if (dot <= 0)
00533 return -1;
00534
00535 if (optimal_gain <= dot) {
00536 dot = dot / (dot + 0.6 * optimal_gain);
00537 } else
00538 dot = 0.625;
00539
00540
00541 for (n = 0; n < size; n++)
00542 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00543
00544 return 0;
00545 }
00546
00557 static float tilt_factor(const float *lpcs, int n_lpcs)
00558 {
00559 float rh0, rh1;
00560
00561 rh0 = 1.0 + ff_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
00562 rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
00563
00564 return rh1 / rh0;
00565 }
00566
00570 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00571 int fcb_type, float *coeffs, int remainder)
00572 {
00573 float last_coeff, min = 15.0, max = -15.0;
00574 float irange, angle_mul, gain_mul, range, sq;
00575 int n, idx;
00576
00577
00578 s->rdft.rdft_calc(&s->rdft, lpcs);
00579 #define log_range(var, assign) do { \
00580 float tmp = log10f(assign); var = tmp; \
00581 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00582 } while (0)
00583 log_range(last_coeff, lpcs[1] * lpcs[1]);
00584 for (n = 1; n < 64; n++)
00585 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00586 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00587 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00588 #undef log_range
00589 range = max - min;
00590 lpcs[64] = last_coeff;
00591
00592
00593
00594
00595
00596
00597 irange = 64.0 / range;
00598 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00599 (5.0 / 14.7));
00600 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00601 for (n = 0; n <= 64; n++) {
00602 float pwr;
00603
00604 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00605 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00606 lpcs[n] = angle_mul * pwr;
00607
00608
00609 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00610 if (idx > 127) {
00611 coeffs[n] = wmavoice_energy_table[127] *
00612 powf(1.0331663, idx - 127);
00613 } else
00614 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00615 }
00616
00617
00618
00619
00620
00621 s->dct.dct_calc(&s->dct, lpcs);
00622 s->dst.dct_calc(&s->dst, lpcs);
00623
00624
00625 idx = 255 + av_clip(lpcs[64], -255, 255);
00626 coeffs[0] = coeffs[0] * s->cos[idx];
00627 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00628 last_coeff = coeffs[64] * s->cos[idx];
00629 for (n = 63;; n--) {
00630 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00633
00634 if (!--n) break;
00635
00636 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00637 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00638 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00639 }
00640 coeffs[1] = last_coeff;
00641
00642
00643 s->irdft.rdft_calc(&s->irdft, coeffs);
00644
00645
00646 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00647 if (s->denoise_tilt_corr) {
00648 float tilt_mem = 0;
00649
00650 coeffs[remainder - 1] = 0;
00651 ff_tilt_compensation(&tilt_mem,
00652 -1.8 * tilt_factor(coeffs, remainder - 1),
00653 coeffs, remainder);
00654 }
00655 sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs, coeffs, remainder));
00656 for (n = 0; n < remainder; n++)
00657 coeffs[n] *= sq;
00658 }
00659
00686 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00687 float *synth_pf, int size,
00688 const float *lpcs)
00689 {
00690 int remainder, lim, n;
00691
00692 if (fcb_type != FCB_TYPE_SILENCE) {
00693 float *tilted_lpcs = s->tilted_lpcs_pf,
00694 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00695
00696 tilted_lpcs[0] = 1.0;
00697 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00698 memset(&tilted_lpcs[s->lsps + 1], 0,
00699 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00700 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00701 tilted_lpcs, s->lsps + 2);
00702
00703
00704
00705
00706
00707 remainder = FFMIN(127 - size, size - 1);
00708 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00709
00710
00711
00712 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00713 s->rdft.rdft_calc(&s->rdft, synth_pf);
00714 s->rdft.rdft_calc(&s->rdft, coeffs);
00715 synth_pf[0] *= coeffs[0];
00716 synth_pf[1] *= coeffs[1];
00717 for (n = 1; n < 64; n++) {
00718 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00719 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00720 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00721 }
00722 s->irdft.rdft_calc(&s->irdft, synth_pf);
00723 }
00724
00725
00726 if (s->denoise_filter_cache_size) {
00727 lim = FFMIN(s->denoise_filter_cache_size, size);
00728 for (n = 0; n < lim; n++)
00729 synth_pf[n] += s->denoise_filter_cache[n];
00730 s->denoise_filter_cache_size -= lim;
00731 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00732 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00733 }
00734
00735
00736 if (fcb_type != FCB_TYPE_SILENCE) {
00737 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00738 for (n = 0; n < lim; n++)
00739 s->denoise_filter_cache[n] += synth_pf[size + n];
00740 if (lim < remainder) {
00741 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00742 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00743 s->denoise_filter_cache_size = remainder;
00744 }
00745 }
00746 }
00747
00768 static void postfilter(WMAVoiceContext *s, const float *synth,
00769 float *samples, int size,
00770 const float *lpcs, float *zero_exc_pf,
00771 int fcb_type, int pitch)
00772 {
00773 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00774 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00775 *synth_filter_in = zero_exc_pf;
00776
00777 av_assert0(size <= MAX_FRAMESIZE / 2);
00778
00779
00780 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00781
00782 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00783 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00784 synth_filter_in = synth_filter_in_buf;
00785
00786
00787 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00788 synth_filter_in, size, s->lsps);
00789 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00790 sizeof(synth_pf[0]) * s->lsps);
00791
00792 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00793
00794 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00795 &s->postfilter_agc);
00796
00797 if (s->dc_level > 8) {
00798
00799
00800
00801 ff_acelp_apply_order_2_transfer_function(samples, samples,
00802 (const float[2]) { -1.99997, 1.0 },
00803 (const float[2]) { -1.9330735188, 0.93589198496 },
00804 0.93980580475, s->dcf_mem, size);
00805 }
00806 }
00822 static void dequant_lsps(double *lsps, int num,
00823 const uint16_t *values,
00824 const uint16_t *sizes,
00825 int n_stages, const uint8_t *table,
00826 const double *mul_q,
00827 const double *base_q)
00828 {
00829 int n, m;
00830
00831 memset(lsps, 0, num * sizeof(*lsps));
00832 for (n = 0; n < n_stages; n++) {
00833 const uint8_t *t_off = &table[values[n] * num];
00834 double base = base_q[n], mul = mul_q[n];
00835
00836 for (m = 0; m < num; m++)
00837 lsps[m] += base + mul * t_off[m];
00838
00839 table += sizes[n] * num;
00840 }
00841 }
00842
00854 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00855 {
00856 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00857 static const double mul_lsf[4] = {
00858 5.2187144800e-3, 1.4626986422e-3,
00859 9.6179549166e-4, 1.1325736225e-3
00860 };
00861 static const double base_lsf[4] = {
00862 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00863 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00864 };
00865 uint16_t v[4];
00866
00867 v[0] = get_bits(gb, 8);
00868 v[1] = get_bits(gb, 6);
00869 v[2] = get_bits(gb, 5);
00870 v[3] = get_bits(gb, 5);
00871
00872 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00873 mul_lsf, base_lsf);
00874 }
00875
00880 static void dequant_lsp10r(GetBitContext *gb,
00881 double *i_lsps, const double *old,
00882 double *a1, double *a2, int q_mode)
00883 {
00884 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00885 static const double mul_lsf[3] = {
00886 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00887 };
00888 static const double base_lsf[3] = {
00889 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00890 };
00891 const float (*ipol_tab)[2][10] = q_mode ?
00892 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00893 uint16_t interpol, v[3];
00894 int n;
00895
00896 dequant_lsp10i(gb, i_lsps);
00897
00898 interpol = get_bits(gb, 5);
00899 v[0] = get_bits(gb, 7);
00900 v[1] = get_bits(gb, 6);
00901 v[2] = get_bits(gb, 6);
00902
00903 for (n = 0; n < 10; n++) {
00904 double delta = old[n] - i_lsps[n];
00905 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00906 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00907 }
00908
00909 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00910 mul_lsf, base_lsf);
00911 }
00912
00916 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00917 {
00918 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00919 static const double mul_lsf[5] = {
00920 3.3439586280e-3, 6.9908173703e-4,
00921 3.3216608306e-3, 1.0334960326e-3,
00922 3.1899104283e-3
00923 };
00924 static const double base_lsf[5] = {
00925 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00926 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00927 M_PI * -1.29816e-1
00928 };
00929 uint16_t v[5];
00930
00931 v[0] = get_bits(gb, 8);
00932 v[1] = get_bits(gb, 6);
00933 v[2] = get_bits(gb, 7);
00934 v[3] = get_bits(gb, 6);
00935 v[4] = get_bits(gb, 7);
00936
00937 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00938 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00939 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00940 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00941 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00942 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00943 }
00944
00949 static void dequant_lsp16r(GetBitContext *gb,
00950 double *i_lsps, const double *old,
00951 double *a1, double *a2, int q_mode)
00952 {
00953 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00954 static const double mul_lsf[3] = {
00955 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00956 };
00957 static const double base_lsf[3] = {
00958 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00959 };
00960 const float (*ipol_tab)[2][16] = q_mode ?
00961 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00962 uint16_t interpol, v[3];
00963 int n;
00964
00965 dequant_lsp16i(gb, i_lsps);
00966
00967 interpol = get_bits(gb, 5);
00968 v[0] = get_bits(gb, 7);
00969 v[1] = get_bits(gb, 7);
00970 v[2] = get_bits(gb, 7);
00971
00972 for (n = 0; n < 16; n++) {
00973 double delta = old[n] - i_lsps[n];
00974 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00975 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00976 }
00977
00978 dequant_lsps( a2, 10, v, vec_sizes, 1,
00979 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00980 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00981 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00982 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00983 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00984 }
00985
00999 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01000 const int *pitch)
01001 {
01002 static const int16_t start_offset[94] = {
01003 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
01004 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
01005 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
01006 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
01007 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
01008 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
01009 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01010 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01011 };
01012 int bits, offset;
01013
01014
01015 s->aw_idx_is_ext = 0;
01016 if ((bits = get_bits(gb, 6)) >= 54) {
01017 s->aw_idx_is_ext = 1;
01018 bits += (bits - 54) * 3 + get_bits(gb, 2);
01019 }
01020
01021
01022
01023 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01024 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01025 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01026 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01027 offset += s->aw_n_pulses[0] * pitch[0];
01028 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01029 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01030
01031
01032
01033
01034 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01035 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01036 s->aw_first_pulse_off[1] -= pitch[1];
01037 if (start_offset[bits] < 0)
01038 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01039 s->aw_first_pulse_off[0] -= pitch[0];
01040 }
01041 }
01042
01050 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01051 int block_idx, AMRFixed *fcb)
01052 {
01053 uint16_t use_mask_mem[9];
01054 uint16_t *use_mask = use_mask_mem + 2;
01055
01056
01057
01058
01059
01060
01061
01062 int pulse_off = s->aw_first_pulse_off[block_idx],
01063 pulse_start, n, idx, range, aidx, start_off = 0;
01064
01065
01066 if (s->aw_n_pulses[block_idx] > 0)
01067 while (pulse_off + s->aw_pulse_range < 1)
01068 pulse_off += fcb->pitch_lag;
01069
01070
01071 if (s->aw_n_pulses[0] > 0) {
01072 if (block_idx == 0) {
01073 range = 32;
01074 } else {
01075 range = 8;
01076 if (s->aw_n_pulses[block_idx] > 0)
01077 pulse_off = s->aw_next_pulse_off_cache;
01078 }
01079 } else
01080 range = 16;
01081 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01082
01083
01084
01085
01086 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01087 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01088 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01089 if (s->aw_n_pulses[block_idx] > 0)
01090 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01091 int excl_range = s->aw_pulse_range;
01092 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01093 int first_sh = 16 - (idx & 15);
01094 *use_mask_ptr++ &= 0xFFFFu << first_sh;
01095 excl_range -= first_sh;
01096 if (excl_range >= 16) {
01097 *use_mask_ptr++ = 0;
01098 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01099 } else
01100 *use_mask_ptr &= 0xFFFF >> excl_range;
01101 }
01102
01103
01104 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01105 for (n = 0; n <= aidx; pulse_start++) {
01106 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01107 if (idx >= MAX_FRAMESIZE / 2) {
01108 if (use_mask[0]) idx = 0x0F;
01109 else if (use_mask[1]) idx = 0x1F;
01110 else if (use_mask[2]) idx = 0x2F;
01111 else if (use_mask[3]) idx = 0x3F;
01112 else if (use_mask[4]) idx = 0x4F;
01113 else return;
01114 idx -= av_log2_16bit(use_mask[idx >> 4]);
01115 }
01116 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01117 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01118 n++;
01119 start_off = idx;
01120 }
01121 }
01122
01123 fcb->x[fcb->n] = start_off;
01124 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01125 fcb->n++;
01126
01127
01128 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01129 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01130 }
01131
01139 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01140 int block_idx, AMRFixed *fcb)
01141 {
01142 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01143 float v;
01144
01145 if (s->aw_n_pulses[block_idx] > 0) {
01146 int n, v_mask, i_mask, sh, n_pulses;
01147
01148 if (s->aw_pulse_range == 24) {
01149 n_pulses = 3;
01150 v_mask = 8;
01151 i_mask = 7;
01152 sh = 4;
01153 } else {
01154 n_pulses = 4;
01155 v_mask = 4;
01156 i_mask = 3;
01157 sh = 3;
01158 }
01159
01160 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01161 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01162 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01163 s->aw_first_pulse_off[block_idx];
01164 while (fcb->x[fcb->n] < 0)
01165 fcb->x[fcb->n] += fcb->pitch_lag;
01166 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01167 fcb->n++;
01168 }
01169 } else {
01170 int num2 = (val & 0x1FF) >> 1, delta, idx;
01171
01172 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01173 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01174 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01175 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01176 v = (val & 0x200) ? -1.0 : 1.0;
01177
01178 fcb->no_repeat_mask |= 3 << fcb->n;
01179 fcb->x[fcb->n] = idx - delta;
01180 fcb->y[fcb->n] = v;
01181 fcb->x[fcb->n + 1] = idx;
01182 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01183 fcb->n += 2;
01184 }
01185 }
01186
01200 static int pRNG(int frame_cntr, int block_num, int block_size)
01201 {
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212 static const unsigned int div_tbl[9][2] = {
01213 { 8332, 3 * 715827883U },
01214 { 4545, 0 * 390451573U },
01215 { 3124, 11 * 268435456U },
01216 { 2380, 15 * 204522253U },
01217 { 1922, 23 * 165191050U },
01218 { 1612, 23 * 138547333U },
01219 { 1388, 27 * 119304648U },
01220 { 1219, 16 * 104755300U },
01221 { 1086, 39 * 93368855U }
01222 };
01223 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01224 if (x >= 0xFFFF) x -= 0xFFFF;
01225
01226 y = x - 9 * MULH(477218589, x);
01227 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01228
01229 return z % (1000 - block_size);
01230 }
01231
01236 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01237 int block_idx, int size,
01238 const struct frame_type_desc *frame_desc,
01239 float *excitation)
01240 {
01241 float gain;
01242 int n, r_idx;
01243
01244 av_assert0(size <= MAX_FRAMESIZE);
01245
01246
01247 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01248 r_idx = pRNG(s->frame_cntr, block_idx, size);
01249 gain = s->silence_gain;
01250 } else {
01251 r_idx = get_bits(gb, 8);
01252 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01253 }
01254
01255
01256 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01257
01258
01259 for (n = 0; n < size; n++)
01260 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01261 }
01262
01267 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01268 int block_idx, int size,
01269 int block_pitch_sh2,
01270 const struct frame_type_desc *frame_desc,
01271 float *excitation)
01272 {
01273 static const float gain_coeff[6] = {
01274 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01275 };
01276 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01277 int n, idx, gain_weight;
01278 AMRFixed fcb;
01279
01280 av_assert0(size <= MAX_FRAMESIZE / 2);
01281 memset(pulses, 0, sizeof(*pulses) * size);
01282
01283 fcb.pitch_lag = block_pitch_sh2 >> 2;
01284 fcb.pitch_fac = 1.0;
01285 fcb.no_repeat_mask = 0;
01286 fcb.n = 0;
01287
01288
01289
01290 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01291 aw_pulse_set1(s, gb, block_idx, &fcb);
01292 aw_pulse_set2(s, gb, block_idx, &fcb);
01293 } else {
01294 int offset_nbits = 5 - frame_desc->log_n_blocks;
01295
01296 fcb.no_repeat_mask = -1;
01297
01298
01299 for (n = 0; n < 5; n++) {
01300 float sign;
01301 int pos1, pos2;
01302
01303 sign = get_bits1(gb) ? 1.0 : -1.0;
01304 pos1 = get_bits(gb, offset_nbits);
01305 fcb.x[fcb.n] = n + 5 * pos1;
01306 fcb.y[fcb.n++] = sign;
01307 if (n < frame_desc->dbl_pulses) {
01308 pos2 = get_bits(gb, offset_nbits);
01309 fcb.x[fcb.n] = n + 5 * pos2;
01310 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01311 }
01312 }
01313 }
01314 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01315
01316
01317
01318 idx = get_bits(gb, 7);
01319 fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err, gain_coeff, 6) -
01320 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01321 acb_gain = wmavoice_gain_codebook_acb[idx];
01322 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01323 -2.9957322736 ,
01324 1.6094379124 );
01325
01326 gain_weight = 8 >> frame_desc->log_n_blocks;
01327 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01328 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01329 for (n = 0; n < gain_weight; n++)
01330 s->gain_pred_err[n] = pred_err;
01331
01332
01333 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01334 int len;
01335 for (n = 0; n < size; n += len) {
01336 int next_idx_sh16;
01337 int abs_idx = block_idx * size + n;
01338 int pitch_sh16 = (s->last_pitch_val << 16) +
01339 s->pitch_diff_sh16 * abs_idx;
01340 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01341 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01342 idx = idx_sh16 >> 16;
01343 if (s->pitch_diff_sh16) {
01344 if (s->pitch_diff_sh16 > 0) {
01345 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01346 } else
01347 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01348 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01349 1, size - n);
01350 } else
01351 len = size;
01352
01353 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01354 wmavoice_ipol1_coeffs, 17,
01355 idx, 9, len);
01356 }
01357 } else {
01358 int block_pitch = block_pitch_sh2 >> 2;
01359 idx = block_pitch_sh2 & 3;
01360 if (idx) {
01361 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01362 wmavoice_ipol2_coeffs, 4,
01363 idx, 8, size);
01364 } else
01365 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01366 sizeof(float) * size);
01367 }
01368
01369
01370 ff_weighted_vector_sumf(excitation, excitation, pulses,
01371 acb_gain, fcb_gain, size);
01372 }
01373
01390 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01391 int block_idx, int size,
01392 int block_pitch_sh2,
01393 const double *lsps, const double *prev_lsps,
01394 const struct frame_type_desc *frame_desc,
01395 float *excitation, float *synth)
01396 {
01397 double i_lsps[MAX_LSPS];
01398 float lpcs[MAX_LSPS];
01399 float fac;
01400 int n;
01401
01402 if (frame_desc->acb_type == ACB_TYPE_NONE)
01403 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01404 else
01405 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01406 frame_desc, excitation);
01407
01408
01409 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01410 for (n = 0; n < s->lsps; n++)
01411 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01412 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01413
01414
01415 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01416 }
01417
01433 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01434 float *samples,
01435 const double *lsps, const double *prev_lsps,
01436 float *excitation, float *synth)
01437 {
01438 WMAVoiceContext *s = ctx->priv_data;
01439 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01440 int pitch[MAX_BLOCKS], last_block_pitch;
01441
01442
01443 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01444
01445 if (bd_idx < 0) {
01446 av_log(ctx, AV_LOG_ERROR,
01447 "Invalid frame type VLC code, skipping\n");
01448 return -1;
01449 }
01450
01451 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01452
01453
01454 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01455
01456
01457
01458
01459 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01460 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01461 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01462 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01463 if (s->last_acb_type == ACB_TYPE_NONE ||
01464 20 * abs(cur_pitch_val - s->last_pitch_val) >
01465 (cur_pitch_val + s->last_pitch_val))
01466 s->last_pitch_val = cur_pitch_val;
01467
01468
01469 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01470 int fac = n * 2 + 1;
01471
01472 pitch[n] = (MUL16(fac, cur_pitch_val) +
01473 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01474 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01475 }
01476
01477
01478 s->pitch_diff_sh16 =
01479 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01480 }
01481
01482
01483 switch (frame_descs[bd_idx].fcb_type) {
01484 case FCB_TYPE_SILENCE:
01485 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01486 break;
01487 case FCB_TYPE_AW_PULSES:
01488 aw_parse_coords(s, gb, pitch);
01489 break;
01490 }
01491
01492 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01493 int bl_pitch_sh2;
01494
01495
01496 switch (frame_descs[bd_idx].acb_type) {
01497 case ACB_TYPE_HAMMING: {
01498
01499
01500
01501
01502
01503 int block_pitch,
01504 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01505 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01506 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01507
01508 if (n == 0) {
01509 block_pitch = get_bits(gb, s->block_pitch_nbits);
01510 } else
01511 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01512 get_bits(gb, s->block_delta_pitch_nbits);
01513
01514 last_block_pitch = av_clip(block_pitch,
01515 s->block_delta_pitch_hrange,
01516 s->block_pitch_range -
01517 s->block_delta_pitch_hrange);
01518
01519
01520 if (block_pitch < t1) {
01521 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01522 } else {
01523 block_pitch -= t1;
01524 if (block_pitch < t2) {
01525 bl_pitch_sh2 =
01526 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01527 } else {
01528 block_pitch -= t2;
01529 if (block_pitch < t3) {
01530 bl_pitch_sh2 =
01531 (s->block_conv_table[2] + block_pitch) << 2;
01532 } else
01533 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01534 }
01535 }
01536 pitch[n] = bl_pitch_sh2 >> 2;
01537 break;
01538 }
01539
01540 case ACB_TYPE_ASYMMETRIC: {
01541 bl_pitch_sh2 = pitch[n] << 2;
01542 break;
01543 }
01544
01545 default:
01546 bl_pitch_sh2 = 0;
01547 break;
01548 }
01549
01550 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01551 lsps, prev_lsps, &frame_descs[bd_idx],
01552 &excitation[n * block_nsamples],
01553 &synth[n * block_nsamples]);
01554 }
01555
01556
01557
01558 if (s->do_apf) {
01559 double i_lsps[MAX_LSPS];
01560 float lpcs[MAX_LSPS];
01561
01562 for (n = 0; n < s->lsps; n++)
01563 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01564 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01565 postfilter(s, synth, samples, 80, lpcs,
01566 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01567 frame_descs[bd_idx].fcb_type, pitch[0]);
01568
01569 for (n = 0; n < s->lsps; n++)
01570 i_lsps[n] = cos(lsps[n]);
01571 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01572 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01573 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01574 frame_descs[bd_idx].fcb_type, pitch[0]);
01575 } else
01576 memcpy(samples, synth, 160 * sizeof(synth[0]));
01577
01578
01579 s->frame_cntr++;
01580 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01581 s->last_acb_type = frame_descs[bd_idx].acb_type;
01582 switch (frame_descs[bd_idx].acb_type) {
01583 case ACB_TYPE_NONE:
01584 s->last_pitch_val = 0;
01585 break;
01586 case ACB_TYPE_ASYMMETRIC:
01587 s->last_pitch_val = cur_pitch_val;
01588 break;
01589 case ACB_TYPE_HAMMING:
01590 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01591 break;
01592 }
01593
01594 return 0;
01595 }
01596
01609 static void stabilize_lsps(double *lsps, int num)
01610 {
01611 int n, m, l;
01612
01613
01614
01615
01616 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01617 for (n = 1; n < num; n++)
01618 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01619 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01620
01621
01622
01623 for (n = 1; n < num; n++) {
01624 if (lsps[n] < lsps[n - 1]) {
01625 for (m = 1; m < num; m++) {
01626 double tmp = lsps[m];
01627 for (l = m - 1; l >= 0; l--) {
01628 if (lsps[l] <= tmp) break;
01629 lsps[l + 1] = lsps[l];
01630 }
01631 lsps[l + 1] = tmp;
01632 }
01633 break;
01634 }
01635 }
01636 }
01637
01647 static int check_bits_for_superframe(GetBitContext *orig_gb,
01648 WMAVoiceContext *s)
01649 {
01650 GetBitContext s_gb, *gb = &s_gb;
01651 int n, need_bits, bd_idx;
01652 const struct frame_type_desc *frame_desc;
01653
01654
01655 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01656 skip_bits_long(gb, get_bits_count(orig_gb));
01657 av_assert1(get_bits_left(gb) == get_bits_left(orig_gb));
01658
01659
01660 if (get_bits_left(gb) < 14)
01661 return 1;
01662 if (!get_bits1(gb))
01663 return -1;
01664 if (get_bits1(gb)) skip_bits(gb, 12);
01665 if (s->has_residual_lsps) {
01666 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01667 return 1;
01668 skip_bits_long(gb, s->sframe_lsp_bitsize);
01669 }
01670
01671
01672 for (n = 0; n < MAX_FRAMES; n++) {
01673 int aw_idx_is_ext = 0;
01674
01675 if (!s->has_residual_lsps) {
01676 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01677 skip_bits_long(gb, s->frame_lsp_bitsize);
01678 }
01679 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01680 if (bd_idx < 0)
01681 return -1;
01682 frame_desc = &frame_descs[bd_idx];
01683 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01684 if (get_bits_left(gb) < s->pitch_nbits)
01685 return 1;
01686 skip_bits_long(gb, s->pitch_nbits);
01687 }
01688 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01689 skip_bits(gb, 8);
01690 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01691 int tmp = get_bits(gb, 6);
01692 if (tmp >= 0x36) {
01693 skip_bits(gb, 2);
01694 aw_idx_is_ext = 1;
01695 }
01696 }
01697
01698
01699 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01700 need_bits = s->block_pitch_nbits +
01701 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01702 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01703 need_bits = 2 * !aw_idx_is_ext;
01704 } else
01705 need_bits = 0;
01706 need_bits += frame_desc->frame_size;
01707 if (get_bits_left(gb) < need_bits)
01708 return 1;
01709 skip_bits_long(gb, need_bits);
01710 }
01711
01712 return 0;
01713 }
01714
01732 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01733 {
01734 WMAVoiceContext *s = ctx->priv_data;
01735 GetBitContext *gb = &s->gb, s_gb;
01736 int n, res, n_samples = 480;
01737 double lsps[MAX_FRAMES][MAX_LSPS];
01738 const double *mean_lsf = s->lsps == 16 ?
01739 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01740 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01741 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01742 float *samples;
01743
01744 memcpy(synth, s->synth_history,
01745 s->lsps * sizeof(*synth));
01746 memcpy(excitation, s->excitation_history,
01747 s->history_nsamples * sizeof(*excitation));
01748
01749 if (s->sframe_cache_size > 0) {
01750 gb = &s_gb;
01751 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01752 s->sframe_cache_size = 0;
01753 }
01754
01755 if ((res = check_bits_for_superframe(gb, s)) == 1) {
01756 *got_frame_ptr = 0;
01757 return 1;
01758 }
01759
01760
01761
01762
01763
01764 if (!get_bits1(gb)) {
01765 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01766 return -1;
01767 }
01768
01769
01770 if (get_bits1(gb)) {
01771 if ((n_samples = get_bits(gb, 12)) > 480) {
01772 av_log(ctx, AV_LOG_ERROR,
01773 "Superframe encodes >480 samples (%d), not allowed\n",
01774 n_samples);
01775 return -1;
01776 }
01777 }
01778
01779 if (s->has_residual_lsps) {
01780 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01781
01782 for (n = 0; n < s->lsps; n++)
01783 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01784
01785 if (s->lsps == 10) {
01786 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01787 } else
01788 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01789
01790 for (n = 0; n < s->lsps; n++) {
01791 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01792 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01793 lsps[2][n] += mean_lsf[n];
01794 }
01795 for (n = 0; n < 3; n++)
01796 stabilize_lsps(lsps[n], s->lsps);
01797 }
01798
01799
01800 s->frame.nb_samples = 480;
01801 if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
01802 av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01803 return res;
01804 }
01805 s->frame.nb_samples = n_samples;
01806 samples = (float *)s->frame.data[0];
01807
01808
01809 for (n = 0; n < 3; n++) {
01810 if (!s->has_residual_lsps) {
01811 int m;
01812
01813 if (s->lsps == 10) {
01814 dequant_lsp10i(gb, lsps[n]);
01815 } else
01816 dequant_lsp16i(gb, lsps[n]);
01817
01818 for (m = 0; m < s->lsps; m++)
01819 lsps[n][m] += mean_lsf[m];
01820 stabilize_lsps(lsps[n], s->lsps);
01821 }
01822
01823 if ((res = synth_frame(ctx, gb, n,
01824 &samples[n * MAX_FRAMESIZE],
01825 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01826 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01827 &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01828 *got_frame_ptr = 0;
01829 return res;
01830 }
01831 }
01832
01833
01834
01835
01836 if (get_bits1(gb)) {
01837 res = get_bits(gb, 4);
01838 skip_bits(gb, 10 * (res + 1));
01839 }
01840
01841 *got_frame_ptr = 1;
01842
01843
01844 memcpy(s->prev_lsps, lsps[2],
01845 s->lsps * sizeof(*s->prev_lsps));
01846 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01847 s->lsps * sizeof(*synth));
01848 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01849 s->history_nsamples * sizeof(*excitation));
01850 if (s->do_apf)
01851 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01852 s->history_nsamples * sizeof(*s->zero_exc_pf));
01853
01854 return 0;
01855 }
01856
01864 static int parse_packet_header(WMAVoiceContext *s)
01865 {
01866 GetBitContext *gb = &s->gb;
01867 unsigned int res;
01868
01869 if (get_bits_left(gb) < 11)
01870 return 1;
01871 skip_bits(gb, 4);
01872 s->has_residual_lsps = get_bits1(gb);
01873 do {
01874 res = get_bits(gb, 6);
01875
01876 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01877 return 1;
01878 } while (res == 0x3F);
01879 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01880
01881 return 0;
01882 }
01883
01899 static void copy_bits(PutBitContext *pb,
01900 const uint8_t *data, int size,
01901 GetBitContext *gb, int nbits)
01902 {
01903 int rmn_bytes, rmn_bits;
01904
01905 rmn_bits = rmn_bytes = get_bits_left(gb);
01906 if (rmn_bits < nbits)
01907 return;
01908 if (nbits > pb->size_in_bits - put_bits_count(pb))
01909 return;
01910 rmn_bits &= 7; rmn_bytes >>= 3;
01911 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01912 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01913 avpriv_copy_bits(pb, data + size - rmn_bytes,
01914 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01915 }
01916
01928 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01929 int *got_frame_ptr, AVPacket *avpkt)
01930 {
01931 WMAVoiceContext *s = ctx->priv_data;
01932 GetBitContext *gb = &s->gb;
01933 int size, res, pos;
01934
01935
01936
01937
01938
01939
01940 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01941 if (!size) {
01942 *got_frame_ptr = 0;
01943 return 0;
01944 }
01945 init_get_bits(&s->gb, avpkt->data, size << 3);
01946
01947
01948
01949
01950 if (size == ctx->block_align) {
01951 if ((res = parse_packet_header(s)) < 0)
01952 return res;
01953
01954
01955
01956
01957 if (s->spillover_nbits > 0) {
01958 if (s->sframe_cache_size > 0) {
01959 int cnt = get_bits_count(gb);
01960 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01961 flush_put_bits(&s->pb);
01962 s->sframe_cache_size += s->spillover_nbits;
01963 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01964 *got_frame_ptr) {
01965 cnt += s->spillover_nbits;
01966 s->skip_bits_next = cnt & 7;
01967 *(AVFrame *)data = s->frame;
01968 return cnt >> 3;
01969 } else
01970 skip_bits_long (gb, s->spillover_nbits - cnt +
01971 get_bits_count(gb));
01972 } else
01973 skip_bits_long(gb, s->spillover_nbits);
01974 }
01975 } else if (s->skip_bits_next)
01976 skip_bits(gb, s->skip_bits_next);
01977
01978
01979 s->sframe_cache_size = 0;
01980 s->skip_bits_next = 0;
01981 pos = get_bits_left(gb);
01982 if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01983 return res;
01984 } else if (*got_frame_ptr) {
01985 int cnt = get_bits_count(gb);
01986 s->skip_bits_next = cnt & 7;
01987 *(AVFrame *)data = s->frame;
01988 return cnt >> 3;
01989 } else if ((s->sframe_cache_size = pos) > 0) {
01990
01991 init_get_bits(gb, avpkt->data, size << 3);
01992 skip_bits_long(gb, (size << 3) - pos);
01993 av_assert1(get_bits_left(gb) == pos);
01994
01995
01996 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01997 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01998
01999
02000 }
02001
02002 return size;
02003 }
02004
02005 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02006 {
02007 WMAVoiceContext *s = ctx->priv_data;
02008
02009 if (s->do_apf) {
02010 ff_rdft_end(&s->rdft);
02011 ff_rdft_end(&s->irdft);
02012 ff_dct_end(&s->dct);
02013 ff_dct_end(&s->dst);
02014 }
02015
02016 return 0;
02017 }
02018
02019 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02020 {
02021 WMAVoiceContext *s = ctx->priv_data;
02022 int n;
02023
02024 s->postfilter_agc = 0;
02025 s->sframe_cache_size = 0;
02026 s->skip_bits_next = 0;
02027 for (n = 0; n < s->lsps; n++)
02028 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02029 memset(s->excitation_history, 0,
02030 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02031 memset(s->synth_history, 0,
02032 sizeof(*s->synth_history) * MAX_LSPS);
02033 memset(s->gain_pred_err, 0,
02034 sizeof(s->gain_pred_err));
02035
02036 if (s->do_apf) {
02037 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02038 sizeof(*s->synth_filter_out_buf) * s->lsps);
02039 memset(s->dcf_mem, 0,
02040 sizeof(*s->dcf_mem) * 2);
02041 memset(s->zero_exc_pf, 0,
02042 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02043 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02044 }
02045 }
02046
02047 AVCodec ff_wmavoice_decoder = {
02048 .name = "wmavoice",
02049 .type = AVMEDIA_TYPE_AUDIO,
02050 .id = AV_CODEC_ID_WMAVOICE,
02051 .priv_data_size = sizeof(WMAVoiceContext),
02052 .init = wmavoice_decode_init,
02053 .close = wmavoice_decode_end,
02054 .decode = wmavoice_decode_packet,
02055 .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02056 .flush = wmavoice_flush,
02057 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02058 };