FFmpeg
wmavoice.c
Go to the documentation of this file.
1 /*
2  * Windows Media Audio Voice decoder.
3  * Copyright (c) 2009 Ronald S. Bultje
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * @brief Windows Media Audio Voice compatible decoder
25  * @author Ronald S. Bultje <rsbultje@gmail.com>
26  */
27 
28 #include <math.h>
29 
31 #include "libavutil/float_dsp.h"
32 #include "libavutil/mem_internal.h"
33 #include "libavutil/thread.h"
34 #include "libavutil/tx.h"
35 #include "avcodec.h"
36 #include "codec_internal.h"
37 #include "decode.h"
38 #include "get_bits.h"
39 #include "put_bits.h"
40 #include "wmavoice_data.h"
41 #include "celp_filters.h"
42 #include "acelp_vectors.h"
43 #include "acelp_filters.h"
44 #include "lsp.h"
45 #include "sinewin.h"
46 
47 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame
48 #define MAX_LSPS 16 ///< maximum filter order
49 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple
50  ///< of 16 for ASM input buffer alignment
51 #define MAX_FRAMES 3 ///< maximum number of frames per superframe
52 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame
53 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history
54 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
55  ///< maximum number of samples per superframe
56 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
57  ///< was split over two packets
58 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration
59 
60 /**
61  * Frame type VLC coding.
62  */
63 static VLCElem frame_type_vlc[132];
64 
65 /**
66  * Adaptive codebook types.
67  */
68 enum {
69  ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed)
70  ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with per-frame pitch, which
71  ///< we interpolate to get a per-sample pitch.
72  ///< Signal is generated using an asymmetric sinc
73  ///< window function
74  ///< @note see #wmavoice_ipol1_coeffs
75  ACB_TYPE_HAMMING = 2 ///< Per-block pitch with signal generation using
76  ///< a Hamming sinc window function
77  ///< @note see #wmavoice_ipol2_coeffs
78 };
79 
80 /**
81  * Fixed codebook types.
82  */
83 enum {
84  FCB_TYPE_SILENCE = 0, ///< comfort noise during silence
85  ///< generated from a hardcoded (fixed) codebook
86  ///< with per-frame (low) gain values
87  FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with per-block
88  ///< gain values
89  FCB_TYPE_AW_PULSES = 2, ///< Pitch-adaptive window (AW) pulse signals,
90  ///< used in particular for low-bitrate streams
91  FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in
92  ///< combinations of either single pulses or
93  ///< pulse pairs
94 };
95 
96 /**
97  * Description of frame types.
98  */
99 static const struct frame_type_desc {
100  uint8_t n_blocks; ///< amount of blocks per frame (each block
101  ///< (contains 160/#n_blocks samples)
102  uint8_t log_n_blocks; ///< log2(#n_blocks)
103  uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)
104  uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)
105  uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs
106  ///< (rather than just one single pulse)
107  ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES
108 } frame_descs[17] = {
109  { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0 },
110  { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0 },
126 };
127 
128 /**
129  * WMA Voice decoding context.
130  */
131 typedef struct WMAVoiceContext {
132  /**
133  * @name Global values specified in the stream header / extradata or used all over.
134  * @{
135  */
136  GetBitContext gb; ///< packet bitreader. During decoder init,
137  ///< it contains the extradata from the
138  ///< demuxer. During decoding, it contains
139  ///< packet data.
140  int8_t vbm_tree[25]; ///< converts VLC codes to frame type
141 
142  int spillover_bitsize; ///< number of bits used to specify
143  ///< #spillover_nbits in the packet header
144  ///< = ceil(log2(ctx->block_align << 3))
145  int history_nsamples; ///< number of samples in history for signal
146  ///< prediction (through ACB)
147 
148  /* postfilter specific values */
149  int do_apf; ///< whether to apply the averaged
150  ///< projection filter (APF)
151  int denoise_strength; ///< strength of denoising in Wiener filter
152  ///< [0-11]
153  int denoise_tilt_corr; ///< Whether to apply tilt correction to the
154  ///< Wiener filter coefficients (postfilter)
155  int dc_level; ///< Predicted amount of DC noise, based
156  ///< on which a DC removal filter is used
157 
158  int lsps; ///< number of LSPs per frame [10 or 16]
159  int lsp_q_mode; ///< defines quantizer defaults [0, 1]
160  int lsp_def_mode; ///< defines different sets of LSP defaults
161  ///< [0, 1]
162 
163  int min_pitch_val; ///< base value for pitch parsing code
164  int max_pitch_val; ///< max value + 1 for pitch parsing
165  int pitch_nbits; ///< number of bits used to specify the
166  ///< pitch value in the frame header
167  int block_pitch_nbits; ///< number of bits used to specify the
168  ///< first block's pitch value
169  int block_pitch_range; ///< range of the block pitch
170  int block_delta_pitch_nbits; ///< number of bits used to specify the
171  ///< delta pitch between this and the last
172  ///< block's pitch value, used in all but
173  ///< first block
174  int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is
175  ///< from -this to +this-1)
176  uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale
177  ///< conversion
178 
179  /**
180  * @}
181  *
182  * @name Packet values specified in the packet header or related to a packet.
183  *
184  * A packet is considered to be a single unit of data provided to this
185  * decoder by the demuxer.
186  * @{
187  */
188  int spillover_nbits; ///< number of bits of the previous packet's
189  ///< last superframe preceding this
190  ///< packet's first full superframe (useful
191  ///< for re-synchronization also)
192  int has_residual_lsps; ///< if set, superframes contain one set of
193  ///< LSPs that cover all frames, encoded as
194  ///< independent and residual LSPs; if not
195  ///< set, each frame contains its own, fully
196  ///< independent, LSPs
197  int skip_bits_next; ///< number of bits to skip at the next call
198  ///< to #wmavoice_decode_packet() (since
199  ///< they're part of the previous superframe)
200 
202  ///< cache for superframe data split over
203  ///< multiple packets
204  int sframe_cache_size; ///< set to >0 if we have data from an
205  ///< (incomplete) superframe from a previous
206  ///< packet that spilled over in the current
207  ///< packet; specifies the amount of bits in
208  ///< #sframe_cache
209  PutBitContext pb; ///< bitstream writer for #sframe_cache
210 
211  /**
212  * @}
213  *
214  * @name Frame and superframe values
215  * Superframe and frame data - these can change from frame to frame,
216  * although some of them do in that case serve as a cache / history for
217  * the next frame or superframe.
218  * @{
219  */
220  double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous
221  ///< superframe
222  int last_pitch_val; ///< pitch value of the previous frame
223  int last_acb_type; ///< frame type [0-2] of the previous frame
224  int pitch_diff_sh16; ///< ((cur_pitch_val - #last_pitch_val)
225  ///< << 16) / #MAX_FRAMESIZE
226  float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE
227 
228  int aw_idx_is_ext; ///< whether the AW index was encoded in
229  ///< 8 bits (instead of 6)
230  int aw_pulse_range; ///< the range over which #aw_pulse_set1()
231  ///< can apply the pulse, relative to the
232  ///< value in aw_first_pulse_off. The exact
233  ///< position of the first AW-pulse is within
234  ///< [pulse_off, pulse_off + this], and
235  ///< depends on bitstream values; [16 or 24]
236  int aw_n_pulses[2]; ///< number of AW-pulses in each block; note
237  ///< that this number can be negative (in
238  ///< which case it basically means "zero")
239  int aw_first_pulse_off[2]; ///< index of first sample to which to
240  ///< apply AW-pulses, or -0xff if unset
241  int aw_next_pulse_off_cache; ///< the position (relative to start of the
242  ///< second block) at which pulses should
243  ///< start to be positioned, serves as a
244  ///< cache for pitch-adaptive window pulses
245  ///< between blocks
246 
247  int frame_cntr; ///< current frame index [0 - 0xFFFE]; is
248  ///< only used for comfort noise in #pRNG()
249  int nb_superframes; ///< number of superframes in current packet
250  float gain_pred_err[6]; ///< cache for gain prediction
252  ///< cache of the signal of previous
253  ///< superframes, used as a history for
254  ///< signal generation
255  float synth_history[MAX_LSPS]; ///< see #excitation_history
256  /**
257  * @}
258  *
259  * @name Postfilter values
260  *
261  * Variables used for postfilter implementation, mostly history for
262  * smoothing and so on, and context variables for FFT/iFFT.
263  * @{
264  */
265  AVTXContext *rdft, *irdft; ///< contexts for FFT-calculation in the
266  av_tx_fn rdft_fn, irdft_fn; ///< postfilter (for denoise filter)
267  AVTXContext *dct, *dst; ///< contexts for phase shift (in Hilbert
268  av_tx_fn dct_fn, dst_fn; ///< transform, part of postfilter)
269  float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
270  ///< range
271  float postfilter_agc; ///< gain control memory, used in
272  ///< #adaptive_gain_control()
273  float dcf_mem[2]; ///< DC filter history
275  ///< zero filter output (i.e. excitation)
276  ///< by postfilter
278  int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
279  DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x82];
280  ///< aligned buffer for LPC tilting
282  ///< aligned buffer for denoise coefficients
284  ///< aligned buffer for postfilter speech
285  ///< synthesis
286  /**
287  * @}
288  */
290 
291 /**
292  * Set up the variable bit mode (VBM) tree from container extradata.
293  * @param gb bit I/O context.
294  * The bit context (s->gb) should be loaded with byte 23-46 of the
295  * container extradata (i.e. the ones containing the VBM tree).
296  * @param vbm_tree pointer to array to which the decoded VBM tree will be
297  * written.
298  * @return 0 on success, <0 on error.
299  */
300 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
301 {
302  int cntr[8] = { 0 }, n, res;
303 
304  memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
305  for (n = 0; n < 17; n++) {
306  res = get_bits(gb, 3);
307  if (cntr[res] > 3) // should be >= 3 + (res == 7))
308  return -1;
309  vbm_tree[res * 3 + cntr[res]++] = n;
310  }
311  return 0;
312 }
313 
315 {
316  static const uint8_t bits[] = {
317  2, 2, 2, 4, 4, 4,
318  6, 6, 6, 8, 8, 8,
319  10, 10, 10, 12, 12, 12,
320  14, 14, 14, 14
321  };
322 
325  1, NULL, 0, 0, 0, 0);
326 }
327 
329 {
331  int n;
332 
333  s->postfilter_agc = 0;
334  s->sframe_cache_size = 0;
335  s->skip_bits_next = 0;
336  for (n = 0; n < s->lsps; n++)
337  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
338  memset(s->excitation_history, 0,
339  sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
340  memset(s->synth_history, 0,
341  sizeof(*s->synth_history) * MAX_LSPS);
342  memset(s->gain_pred_err, 0,
343  sizeof(s->gain_pred_err));
344 
345  if (s->do_apf) {
346  memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
347  sizeof(*s->synth_filter_out_buf) * s->lsps);
348  memset(s->dcf_mem, 0,
349  sizeof(*s->dcf_mem) * 2);
350  memset(s->zero_exc_pf, 0,
351  sizeof(*s->zero_exc_pf) * s->history_nsamples);
352  memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
353  }
354 }
355 
356 /**
357  * Set up decoder with parameters from demuxer (extradata etc.).
358  */
360 {
361  static AVOnce init_static_once = AV_ONCE_INIT;
362  int n, flags, pitch_range, lsp16_flag, ret;
364 
365  ff_thread_once(&init_static_once, wmavoice_init_static_data);
366 
367  /**
368  * Extradata layout:
369  * - byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
370  * - byte 19-22: flags field (annoyingly in LE; see below for known
371  * values),
372  * - byte 23-46: variable bitmode tree (really just 17 * 3 bits,
373  * rest is 0).
374  */
375  if (ctx->extradata_size != 46) {
377  "Invalid extradata size %d (should be 46)\n",
378  ctx->extradata_size);
379  return AVERROR_INVALIDDATA;
380  }
381  if (ctx->block_align <= 0 || ctx->block_align > (1<<22)) {
382  av_log(ctx, AV_LOG_ERROR, "Invalid block alignment %d.\n", ctx->block_align);
383  return AVERROR_INVALIDDATA;
384  }
385 
386  flags = AV_RL32(ctx->extradata + 18);
387  s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
388  s->do_apf = flags & 0x1;
389  if (s->do_apf) {
390  float scale = 1.0f;
391 
392  ret = av_tx_init(&s->rdft, &s->rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, &scale, 0);
393  if (ret < 0)
394  return ret;
395 
396  ret = av_tx_init(&s->irdft, &s->irdft_fn, AV_TX_FLOAT_RDFT, 1, 1 << 7, &scale, 0);
397  if (ret < 0)
398  return ret;
399 
400  scale = 1.0 / (1 << 6);
401  ret = av_tx_init(&s->dct, &s->dct_fn, AV_TX_FLOAT_DCT_I, 0, 1 << 6, &scale, 0);
402  if (ret < 0)
403  return ret;
404 
405  scale = 1.0 / (1 << 6);
406  ret = av_tx_init(&s->dst, &s->dst_fn, AV_TX_FLOAT_DST_I, 0, 1 << 6, &scale, 0);
407  if (ret < 0)
408  return ret;
409 
410  ff_sine_window_init(s->cos, 256);
411  memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
412  for (n = 0; n < 255; n++) {
413  s->sin[n] = -s->sin[510 - n];
414  s->cos[510 - n] = s->cos[n];
415  }
416  }
417  s->denoise_strength = (flags >> 2) & 0xF;
418  if (s->denoise_strength >= 12) {
420  "Invalid denoise filter strength %d (max=11)\n",
421  s->denoise_strength);
422  return AVERROR_INVALIDDATA;
423  }
424  s->denoise_tilt_corr = !!(flags & 0x40);
425  s->dc_level = (flags >> 7) & 0xF;
426  s->lsp_q_mode = !!(flags & 0x2000);
427  s->lsp_def_mode = !!(flags & 0x4000);
428  lsp16_flag = flags & 0x1000;
429  if (lsp16_flag) {
430  s->lsps = 16;
431  } else {
432  s->lsps = 10;
433  }
434  for (n = 0; n < s->lsps; n++)
435  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
436 
437  init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
438  if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
439  av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
440  return AVERROR_INVALIDDATA;
441  }
442 
443  if (ctx->sample_rate >= INT_MAX / (256 * 37))
444  return AVERROR_INVALIDDATA;
445 
446  s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
447  s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
448  pitch_range = s->max_pitch_val - s->min_pitch_val;
449  if (pitch_range <= 0) {
450  av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
451  return AVERROR_INVALIDDATA;
452  }
453  s->pitch_nbits = av_ceil_log2(pitch_range);
454  s->last_pitch_val = 40;
455  s->last_acb_type = ACB_TYPE_NONE;
456  s->history_nsamples = s->max_pitch_val + 8;
457 
458  if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
459  int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
460  max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
461 
463  "Unsupported samplerate %d (min=%d, max=%d)\n",
464  ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
465 
466  return AVERROR(ENOSYS);
467  }
468 
469  s->block_conv_table[0] = s->min_pitch_val;
470  s->block_conv_table[1] = (pitch_range * 25) >> 6;
471  s->block_conv_table[2] = (pitch_range * 44) >> 6;
472  s->block_conv_table[3] = s->max_pitch_val - 1;
473  s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
474  if (s->block_delta_pitch_hrange <= 0) {
475  av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
476  return AVERROR_INVALIDDATA;
477  }
478  s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
479  s->block_pitch_range = s->block_conv_table[2] +
480  s->block_conv_table[3] + 1 +
481  2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
482  s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
483 
484  av_channel_layout_uninit(&ctx->ch_layout);
486  ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
487 
488  return 0;
489 }
490 
491 /**
492  * @name Postfilter functions
493  * Postfilter functions (gain control, wiener denoise filter, DC filter,
494  * kalman smoothening, plus surrounding code to wrap it)
495  * @{
496  */
497 /**
498  * Adaptive gain control (as used in postfilter).
499  *
500  * Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except
501  * that the energy here is calculated using sum(abs(...)), whereas the
502  * other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).
503  *
504  * @param out output buffer for filtered samples
505  * @param in input buffer containing the samples as they are after the
506  * postfilter steps so far
507  * @param speech_synth input buffer containing speech synth before postfilter
508  * @param size input buffer size
509  * @param alpha exponential filter factor
510  * @param gain_mem pointer to filter memory (single float)
511  */
512 static void adaptive_gain_control(float *out, const float *in,
513  const float *speech_synth,
514  int size, float alpha, float *gain_mem)
515 {
516  int i;
517  float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
518  float mem = *gain_mem;
519 
520  for (i = 0; i < size; i++) {
521  speech_energy += fabsf(speech_synth[i]);
522  postfilter_energy += fabsf(in[i]);
523  }
524  gain_scale_factor = postfilter_energy == 0.0 ? 0.0 :
525  (1.0 - alpha) * speech_energy / postfilter_energy;
526 
527  for (i = 0; i < size; i++) {
528  mem = alpha * mem + gain_scale_factor;
529  out[i] = in[i] * mem;
530  }
531 
532  *gain_mem = mem;
533 }
534 
535 /**
536  * Kalman smoothing function.
537  *
538  * This function looks back pitch +/- 3 samples back into history to find
539  * the best fitting curve (that one giving the optimal gain of the two
540  * signals, i.e. the highest dot product between the two), and then
541  * uses that signal history to smoothen the output of the speech synthesis
542  * filter.
543  *
544  * @param s WMA Voice decoding context
545  * @param pitch pitch of the speech signal
546  * @param in input speech signal
547  * @param out output pointer for smoothened signal
548  * @param size input/output buffer size
549  *
550  * @returns -1 if no smoothening took place, e.g. because no optimal
551  * fit could be found, or 0 on success.
552  */
553 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
554  const float *in, float *out, int size)
555 {
556  int n;
557  float optimal_gain = 0, dot;
558  const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
559  *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
560  *best_hist_ptr = NULL;
561 
562  /* find best fitting point in history */
563  do {
564  dot = avpriv_scalarproduct_float_c(in, ptr, size);
565  if (dot > optimal_gain) {
566  optimal_gain = dot;
567  best_hist_ptr = ptr;
568  }
569  } while (--ptr >= end);
570 
571  if (optimal_gain <= 0)
572  return -1;
573  dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
574  if (dot <= 0) // would be 1.0
575  return -1;
576 
577  if (optimal_gain <= dot) {
578  dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
579  } else
580  dot = 0.625;
581 
582  /* actual smoothing */
583  for (n = 0; n < size; n++)
584  out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
585 
586  return 0;
587 }
588 
589 /**
590  * Get the tilt factor of a formant filter from its transfer function
591  * @see #tilt_factor() in amrnbdec.c, which does essentially the same,
592  * but somehow (??) it does a speech synthesis filter in the
593  * middle, which is missing here
594  *
595  * @param lpcs LPC coefficients
596  * @param n_lpcs Size of LPC buffer
597  * @returns the tilt factor
598  */
599 static float tilt_factor(const float *lpcs, int n_lpcs)
600 {
601  float rh0, rh1;
602 
603  rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
604  rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
605 
606  return rh1 / rh0;
607 }
608 
609 /**
610  * Derive denoise filter coefficients (in real domain) from the LPCs.
611  */
612 static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
613  int fcb_type, float *coeffs_dst, int remainder)
614 {
615  float last_coeff, min = 15.0, max = -15.0;
616  float irange, angle_mul, gain_mul, range, sq;
617  LOCAL_ALIGNED_32(float, coeffs, [0x82]);
618  LOCAL_ALIGNED_32(float, lpcs, [0x82]);
619  LOCAL_ALIGNED_32(float, lpcs_dct, [0x82]);
620  int n, idx;
621 
622  memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));
623 
624  /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
625  s->rdft_fn(s->rdft, lpcs, lpcs_src, sizeof(float));
626 #define log_range(var, assign) do { \
627  float tmp = log10f(assign); var = tmp; \
628  max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
629  } while (0)
630  log_range(last_coeff, lpcs[64] * lpcs[64]);
631  for (n = 1; n < 64; n++)
632  log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
633  lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
634  log_range(lpcs[0], lpcs[0] * lpcs[0]);
635 #undef log_range
636  range = max - min;
637  lpcs[64] = last_coeff;
638 
639  /* Now, use this spectrum to pick out these frequencies with higher
640  * (relative) power/energy (which we then take to be "not noise"),
641  * and set up a table (still in lpc[]) of (relative) gains per frequency.
642  * These frequencies will be maintained, while others ("noise") will be
643  * decreased in the filter output. */
644  irange = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
645  gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
646  (5.0 / 14.7));
647  angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
648  for (n = 0; n <= 64; n++) {
649  float pwr;
650 
651  idx = lrint((max - lpcs[n]) * irange - 1);
652  idx = FFMAX(0, idx);
653  pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
654  lpcs[n] = angle_mul * pwr;
655 
656  /* 70.57 =~ 1/log10(1.0331663) */
657  idx = av_clipf((pwr * gain_mul - 0.0295) * 70.570526123, 0, INT_MAX / 2);
658 
659  if (idx > 127) { // fall back if index falls outside table range
660  coeffs[n] = wmavoice_energy_table[127] *
661  powf(1.0331663, idx - 127);
662  } else
663  coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
664  }
665 
666  /* calculate the Hilbert transform of the gains, which we do (since this
667  * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
668  * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
669  * "moment" of the LPCs in this filter. */
670  s->dct_fn(s->dct, lpcs_dct, lpcs, sizeof(float));
671  s->dst_fn(s->dst, lpcs, lpcs_dct, sizeof(float));
672 
673  /* Split out the coefficient indexes into phase/magnitude pairs */
674  idx = 255 + av_clip(lpcs[64], -255, 255);
675  coeffs[0] = coeffs[0] * s->cos[idx];
676  idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
677  last_coeff = coeffs[64] * s->cos[idx];
678  for (n = 63;; n--) {
679  idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
680  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
681  coeffs[n * 2] = coeffs[n] * s->cos[idx];
682 
683  if (!--n) break;
684 
685  idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
686  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
687  coeffs[n * 2] = coeffs[n] * s->cos[idx];
688  }
689  coeffs[64] = last_coeff;
690 
691  /* move into real domain */
692  s->irdft_fn(s->irdft, coeffs_dst, coeffs, sizeof(AVComplexFloat));
693 
694  /* tilt correction and normalize scale */
695  memset(&coeffs_dst[remainder], 0, sizeof(coeffs_dst[0]) * (128 - remainder));
696  if (s->denoise_tilt_corr) {
697  float tilt_mem = 0;
698 
699  coeffs_dst[remainder - 1] = 0;
700  ff_tilt_compensation(&tilt_mem,
701  -1.8 * tilt_factor(coeffs_dst, remainder - 1),
702  coeffs_dst, remainder);
703  }
704  sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs_dst, coeffs_dst,
705  remainder));
706  for (n = 0; n < remainder; n++)
707  coeffs_dst[n] *= sq;
708 }
709 
710 /**
711  * This function applies a Wiener filter on the (noisy) speech signal as
712  * a means to denoise it.
713  *
714  * - take RDFT of LPCs to get the power spectrum of the noise + speech;
715  * - using this power spectrum, calculate (for each frequency) the Wiener
716  * filter gain, which depends on the frequency power and desired level
717  * of noise subtraction (when set too high, this leads to artifacts)
718  * We can do this symmetrically over the X-axis (so 0-4kHz is the inverse
719  * of 4-8kHz);
720  * - by doing a phase shift, calculate the Hilbert transform of this array
721  * of per-frequency filter-gains to get the filtering coefficients;
722  * - smoothen/normalize/de-tilt these filter coefficients as desired;
723  * - take RDFT of noisy sound, apply the coefficients and take its IRDFT
724  * to get the denoised speech signal;
725  * - the leftover (i.e. output of the IRDFT on denoised speech data beyond
726  * the frame boundary) are saved and applied to subsequent frames by an
727  * overlap-add method (otherwise you get clicking-artifacts).
728  *
729  * @param s WMA Voice decoding context
730  * @param fcb_type Frame (codebook) type
731  * @param synth_pf input: the noisy speech signal, output: denoised speech
732  * data; should be 16-byte aligned (for ASM purposes)
733  * @param size size of the speech data
734  * @param lpcs LPCs used to synthesize this frame's speech data
735  */
736 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
737  float *synth_pf, int size,
738  const float *lpcs)
739 {
740  int remainder, lim, n;
741 
742  if (fcb_type != FCB_TYPE_SILENCE) {
743  LOCAL_ALIGNED_32(float, coeffs_f, [0x82]);
744  LOCAL_ALIGNED_32(float, synth_f, [0x82]);
745  float *tilted_lpcs = s->tilted_lpcs_pf,
746  *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
747 
748  tilted_lpcs[0] = 1.0;
749  memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
750  memset(&tilted_lpcs[s->lsps + 1], 0,
751  sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
752  ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
753  tilted_lpcs, s->lsps + 2);
754 
755  /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
756  * size is applied to the next frame. All input beyond this is zero,
757  * and thus all output beyond this will go towards zero, hence we can
758  * limit to min(size-1, 127-size) as a performance consideration. */
759  remainder = FFMIN(127 - size, size - 1);
760  calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
761 
762  /* apply coefficients (in frequency spectrum domain), i.e. complex
763  * number multiplication */
764  memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
765  s->rdft_fn(s->rdft, synth_f, synth_pf, sizeof(float));
766  s->rdft_fn(s->rdft, coeffs_f, coeffs, sizeof(float));
767  synth_f[0] *= coeffs_f[0];
768  synth_f[1] *= coeffs_f[1];
769  for (n = 1; n <= 64; n++) {
770  float v1 = synth_f[n * 2], v2 = synth_f[n * 2 + 1];
771  synth_f[n * 2] = v1 * coeffs_f[n * 2] - v2 * coeffs_f[n * 2 + 1];
772  synth_f[n * 2 + 1] = v2 * coeffs_f[n * 2] + v1 * coeffs_f[n * 2 + 1];
773  }
774  s->irdft_fn(s->irdft, synth_pf, synth_f, sizeof(AVComplexFloat));
775  }
776 
777  /* merge filter output with the history of previous runs */
778  if (s->denoise_filter_cache_size) {
779  lim = FFMIN(s->denoise_filter_cache_size, size);
780  for (n = 0; n < lim; n++)
781  synth_pf[n] += s->denoise_filter_cache[n];
782  s->denoise_filter_cache_size -= lim;
783  memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
784  sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
785  }
786 
787  /* move remainder of filter output into a cache for future runs */
788  if (fcb_type != FCB_TYPE_SILENCE) {
789  lim = FFMIN(remainder, s->denoise_filter_cache_size);
790  for (n = 0; n < lim; n++)
791  s->denoise_filter_cache[n] += synth_pf[size + n];
792  if (lim < remainder) {
793  memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
794  sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
795  s->denoise_filter_cache_size = remainder;
796  }
797  }
798 }
799 
800 /**
801  * Averaging projection filter, the postfilter used in WMAVoice.
802  *
803  * This uses the following steps:
804  * - A zero-synthesis filter (generate excitation from synth signal)
805  * - Kalman smoothing on excitation, based on pitch
806  * - Re-synthesized smoothened output
807  * - Iterative Wiener denoise filter
808  * - Adaptive gain filter
809  * - DC filter
810  *
811  * @param s WMAVoice decoding context
812  * @param synth Speech synthesis output (before postfilter)
813  * @param samples Output buffer for filtered samples
814  * @param size Buffer size of synth & samples
815  * @param lpcs Generated LPCs used for speech synthesis
816  * @param zero_exc_pf destination for zero synthesis filter (16-byte aligned)
817  * @param fcb_type Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
818  * @param pitch Pitch of the input signal
819  */
820 static void postfilter(WMAVoiceContext *s, const float *synth,
821  float *samples, int size,
822  const float *lpcs, float *zero_exc_pf,
823  int fcb_type, int pitch)
824 {
825  float synth_filter_in_buf[MAX_FRAMESIZE / 2],
826  *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
827  *synth_filter_in = zero_exc_pf;
828 
829  av_assert0(size <= MAX_FRAMESIZE / 2);
830 
831  /* generate excitation from input signal */
832  ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
833 
834  if (fcb_type >= FCB_TYPE_AW_PULSES &&
835  !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
836  synth_filter_in = synth_filter_in_buf;
837 
838  /* re-synthesize speech after smoothening, and keep history */
839  ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
840  synth_filter_in, size, s->lsps);
841  memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
842  sizeof(synth_pf[0]) * s->lsps);
843 
844  wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
845 
846  adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
847  &s->postfilter_agc);
848 
849  if (s->dc_level > 8) {
850  /* remove ultra-low frequency DC noise / highpass filter;
851  * coefficients are identical to those used in SIPR decoding,
852  * and very closely resemble those used in AMR-NB decoding. */
854  (const float[2]) { -1.99997, 1.0 },
855  (const float[2]) { -1.9330735188, 0.93589198496 },
856  0.93980580475, s->dcf_mem, size);
857  }
858 }
859 /**
860  * @}
861  */
862 
863 /**
864  * Dequantize LSPs
865  * @param lsps output pointer to the array that will hold the LSPs
866  * @param num number of LSPs to be dequantized
867  * @param values quantized values, contains n_stages values
868  * @param sizes range (i.e. max value) of each quantized value
869  * @param n_stages number of dequantization runs
870  * @param table dequantization table to be used
871  * @param mul_q LSF multiplier
872  * @param base_q base (lowest) LSF values
873  */
874 static void dequant_lsps(double *lsps, int num,
875  const uint16_t *values,
876  const uint16_t *sizes,
877  int n_stages, const uint8_t *table,
878  const double *mul_q,
879  const double *base_q)
880 {
881  int n, m;
882 
883  memset(lsps, 0, num * sizeof(*lsps));
884  for (n = 0; n < n_stages; n++) {
885  const uint8_t *t_off = &table[values[n] * num];
886  double base = base_q[n], mul = mul_q[n];
887 
888  for (m = 0; m < num; m++)
889  lsps[m] += base + mul * t_off[m];
890 
891  table += sizes[n] * num;
892  }
893 }
894 
895 /**
896  * @name LSP dequantization routines
897  * LSP dequantization routines, for 10/16LSPs and independent/residual coding.
898  * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;
899  * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
900  * @{
901  */
902 /**
903  * Parse 10 independently-coded LSPs.
904  */
905 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
906 {
907  static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
908  static const double mul_lsf[4] = {
909  5.2187144800e-3, 1.4626986422e-3,
910  9.6179549166e-4, 1.1325736225e-3
911  };
912  static const double base_lsf[4] = {
913  M_PI * -2.15522e-1, M_PI * -6.1646e-2,
914  M_PI * -3.3486e-2, M_PI * -5.7408e-2
915  };
916  uint16_t v[4];
917 
918  v[0] = get_bits(gb, 8);
919  v[1] = get_bits(gb, 6);
920  v[2] = get_bits(gb, 5);
921  v[3] = get_bits(gb, 5);
922 
923  dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
924  mul_lsf, base_lsf);
925 }
926 
927 /**
928  * Parse 10 independently-coded LSPs, and then derive the tables to
929  * generate LSPs for the other frames from them (residual coding).
930  */
932  double *i_lsps, const double *old,
933  double *a1, double *a2, int q_mode)
934 {
935  static const uint16_t vec_sizes[3] = { 128, 64, 64 };
936  static const double mul_lsf[3] = {
937  2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
938  };
939  static const double base_lsf[3] = {
940  M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
941  };
942  const float (*ipol_tab)[2][10] = q_mode ?
944  uint16_t interpol, v[3];
945  int n;
946 
947  dequant_lsp10i(gb, i_lsps);
948 
949  interpol = get_bits(gb, 5);
950  v[0] = get_bits(gb, 7);
951  v[1] = get_bits(gb, 6);
952  v[2] = get_bits(gb, 6);
953 
954  for (n = 0; n < 10; n++) {
955  double delta = old[n] - i_lsps[n];
956  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
957  a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
958  }
959 
960  dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
961  mul_lsf, base_lsf);
962 }
963 
964 /**
965  * Parse 16 independently-coded LSPs.
966  */
967 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
968 {
969  static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
970  static const double mul_lsf[5] = {
971  3.3439586280e-3, 6.9908173703e-4,
972  3.3216608306e-3, 1.0334960326e-3,
973  3.1899104283e-3
974  };
975  static const double base_lsf[5] = {
976  M_PI * -1.27576e-1, M_PI * -2.4292e-2,
977  M_PI * -1.28094e-1, M_PI * -3.2128e-2,
978  M_PI * -1.29816e-1
979  };
980  uint16_t v[5];
981 
982  v[0] = get_bits(gb, 8);
983  v[1] = get_bits(gb, 6);
984  v[2] = get_bits(gb, 7);
985  v[3] = get_bits(gb, 6);
986  v[4] = get_bits(gb, 7);
987 
988  dequant_lsps( lsps, 5, v, vec_sizes, 2,
989  wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
990  dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
991  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
992  dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
993  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
994 }
995 
996 /**
997  * Parse 16 independently-coded LSPs, and then derive the tables to
998  * generate LSPs for the other frames from them (residual coding).
999  */
1001  double *i_lsps, const double *old,
1002  double *a1, double *a2, int q_mode)
1003 {
1004  static const uint16_t vec_sizes[3] = { 128, 128, 128 };
1005  static const double mul_lsf[3] = {
1006  1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
1007  };
1008  static const double base_lsf[3] = {
1009  M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
1010  };
1011  const float (*ipol_tab)[2][16] = q_mode ?
1013  uint16_t interpol, v[3];
1014  int n;
1015 
1016  dequant_lsp16i(gb, i_lsps);
1017 
1018  interpol = get_bits(gb, 5);
1019  v[0] = get_bits(gb, 7);
1020  v[1] = get_bits(gb, 7);
1021  v[2] = get_bits(gb, 7);
1022 
1023  for (n = 0; n < 16; n++) {
1024  double delta = old[n] - i_lsps[n];
1025  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
1026  a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
1027  }
1028 
1029  dequant_lsps( a2, 10, v, vec_sizes, 1,
1030  wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
1031  dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
1032  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
1033  dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
1034  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
1035 }
1036 
1037 /**
1038  * @}
1039  * @name Pitch-adaptive window coding functions
1040  * The next few functions are for pitch-adaptive window coding.
1041  * @{
1042  */
1043 /**
1044  * Parse the offset of the first pitch-adaptive window pulses, and
1045  * the distribution of pulses between the two blocks in this frame.
1046  * @param s WMA Voice decoding context private data
1047  * @param gb bit I/O context
1048  * @param pitch pitch for each block in this frame
1049  */
1051  const int *pitch)
1052 {
1053  static const int16_t start_offset[94] = {
1054  -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1055  13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1056  27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1057  45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1058  69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1059  93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1060  117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1061  141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1062  };
1063  int bits, offset;
1064 
1065  /* position of pulse */
1066  s->aw_idx_is_ext = 0;
1067  if ((bits = get_bits(gb, 6)) >= 54) {
1068  s->aw_idx_is_ext = 1;
1069  bits += (bits - 54) * 3 + get_bits(gb, 2);
1070  }
1071 
1072  /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
1073  * the distribution of the pulses in each block contained in this frame. */
1074  s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
1075  for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1076  s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
1077  s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
1078  offset += s->aw_n_pulses[0] * pitch[0];
1079  s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
1080  s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
1081 
1082  /* if continuing from a position before the block, reset position to
1083  * start of block (when corrected for the range over which it can be
1084  * spread in aw_pulse_set1()). */
1085  if (start_offset[bits] < MAX_FRAMESIZE / 2) {
1086  while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
1087  s->aw_first_pulse_off[1] -= pitch[1];
1088  if (start_offset[bits] < 0)
1089  while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
1090  s->aw_first_pulse_off[0] -= pitch[0];
1091  }
1092 }
1093 
1094 /**
1095  * Apply second set of pitch-adaptive window pulses.
1096  * @param s WMA Voice decoding context private data
1097  * @param gb bit I/O context
1098  * @param block_idx block index in frame [0, 1]
1099  * @param fcb structure containing fixed codebook vector info
1100  * @return -1 on error, 0 otherwise
1101  */
1103  int block_idx, AMRFixed *fcb)
1104 {
1105  uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
1106  uint16_t *use_mask = use_mask_mem + 2;
1107  /* in this function, idx is the index in the 80-bit (+ padding) use_mask
1108  * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
1109  * of idx are the position of the bit within a particular item in the
1110  * array (0 being the most significant bit, and 15 being the least
1111  * significant bit), and the remainder (>> 4) is the index in the
1112  * use_mask[]-array. This is faster and uses less memory than using a
1113  * 80-byte/80-int array. */
1114  int pulse_off = s->aw_first_pulse_off[block_idx],
1115  pulse_start, n, idx, range, aidx, start_off = 0;
1116 
1117  /* set offset of first pulse to within this block */
1118  if (s->aw_n_pulses[block_idx] > 0)
1119  while (pulse_off + s->aw_pulse_range < 1)
1120  pulse_off += fcb->pitch_lag;
1121 
1122  /* find range per pulse */
1123  if (s->aw_n_pulses[0] > 0) {
1124  if (block_idx == 0) {
1125  range = 32;
1126  } else /* block_idx = 1 */ {
1127  range = 8;
1128  if (s->aw_n_pulses[block_idx] > 0)
1129  pulse_off = s->aw_next_pulse_off_cache;
1130  }
1131  } else
1132  range = 16;
1133  pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1134 
1135  /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
1136  * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
1137  * we exclude that range from being pulsed again in this function. */
1138  memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
1139  memset( use_mask, -1, 5 * sizeof(use_mask[0]));
1140  memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
1141  if (s->aw_n_pulses[block_idx] > 0)
1142  for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
1143  int excl_range = s->aw_pulse_range; // always 16 or 24
1144  uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1145  int first_sh = 16 - (idx & 15);
1146  *use_mask_ptr++ &= 0xFFFFu << first_sh;
1147  excl_range -= first_sh;
1148  if (excl_range >= 16) {
1149  *use_mask_ptr++ = 0;
1150  *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1151  } else
1152  *use_mask_ptr &= 0xFFFF >> excl_range;
1153  }
1154 
1155  /* find the 'aidx'th offset that is not excluded */
1156  aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
1157  for (n = 0; n <= aidx; pulse_start++) {
1158  for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
1159  if (idx >= MAX_FRAMESIZE / 2) { // find from zero
1160  if (use_mask[0]) idx = 0x0F;
1161  else if (use_mask[1]) idx = 0x1F;
1162  else if (use_mask[2]) idx = 0x2F;
1163  else if (use_mask[3]) idx = 0x3F;
1164  else if (use_mask[4]) idx = 0x4F;
1165  else return -1;
1166  idx -= av_log2_16bit(use_mask[idx >> 4]);
1167  }
1168  if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1169  use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1170  n++;
1171  start_off = idx;
1172  }
1173  }
1174 
1175  fcb->x[fcb->n] = start_off;
1176  fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
1177  fcb->n++;
1178 
1179  /* set offset for next block, relative to start of that block */
1180  n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
1181  s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
1182  return 0;
1183 }
1184 
1185 /**
1186  * Apply first set of pitch-adaptive window pulses.
1187  * @param s WMA Voice decoding context private data
1188  * @param gb bit I/O context
1189  * @param block_idx block index in frame [0, 1]
1190  * @param fcb storage location for fixed codebook pulse info
1191  */
1193  int block_idx, AMRFixed *fcb)
1194 {
1195  int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
1196  float v;
1197 
1198  if (s->aw_n_pulses[block_idx] > 0) {
1199  int n, v_mask, i_mask, sh, n_pulses;
1200 
1201  if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
1202  n_pulses = 3;
1203  v_mask = 8;
1204  i_mask = 7;
1205  sh = 4;
1206  } else { // 4 pulses, 1:sign + 2:index each
1207  n_pulses = 4;
1208  v_mask = 4;
1209  i_mask = 3;
1210  sh = 3;
1211  }
1212 
1213  for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1214  fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
1215  fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
1216  s->aw_first_pulse_off[block_idx];
1217  while (fcb->x[fcb->n] < 0)
1218  fcb->x[fcb->n] += fcb->pitch_lag;
1219  if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
1220  fcb->n++;
1221  }
1222  } else {
1223  int num2 = (val & 0x1FF) >> 1, delta, idx;
1224 
1225  if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
1226  else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
1227  else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
1228  else { delta = 7; idx = num2 + 1 - 3 * 75; }
1229  v = (val & 0x200) ? -1.0 : 1.0;
1230 
1231  fcb->no_repeat_mask |= 3 << fcb->n;
1232  fcb->x[fcb->n] = idx - delta;
1233  fcb->y[fcb->n] = v;
1234  fcb->x[fcb->n + 1] = idx;
1235  fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
1236  fcb->n += 2;
1237  }
1238 }
1239 
1240 /**
1241  * @}
1242  *
1243  * Generate a random number from frame_cntr and block_idx, which will live
1244  * in the range [0, 1000 - block_size] (so it can be used as an index in a
1245  * table of size 1000 of which you want to read block_size entries).
1246  *
1247  * @param frame_cntr current frame number
1248  * @param block_num current block index
1249  * @param block_size amount of entries we want to read from a table
1250  * that has 1000 entries
1251  * @return a (non-)random number in the [0, 1000 - block_size] range.
1252  */
1253 static int pRNG(int frame_cntr, int block_num, int block_size)
1254 {
1255  /* array to simplify the calculation of z:
1256  * y = (x % 9) * 5 + 6;
1257  * z = (49995 * x) / y;
1258  * Since y only has 9 values, we can remove the division by using a
1259  * LUT and using FASTDIV-style divisions. For each of the 9 values
1260  * of y, we can rewrite z as:
1261  * z = x * (49995 / y) + x * ((49995 % y) / y)
1262  * In this table, each col represents one possible value of y, the
1263  * first number is 49995 / y, and the second is the FASTDIV variant
1264  * of 49995 % y / y. */
1265  static const unsigned int div_tbl[9][2] = {
1266  { 8332, 3 * 715827883U }, // y = 6
1267  { 4545, 0 * 390451573U }, // y = 11
1268  { 3124, 11 * 268435456U }, // y = 16
1269  { 2380, 15 * 204522253U }, // y = 21
1270  { 1922, 23 * 165191050U }, // y = 26
1271  { 1612, 23 * 138547333U }, // y = 31
1272  { 1388, 27 * 119304648U }, // y = 36
1273  { 1219, 16 * 104755300U }, // y = 41
1274  { 1086, 39 * 93368855U } // y = 46
1275  };
1276  unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
1277  if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,
1278  // so this is effectively a modulo (%)
1279  y = x - 9 * MULH(477218589, x); // x % 9
1280  z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
1281  // z = x * 49995 / (y * 5 + 6)
1282  return z % (1000 - block_size);
1283 }
1284 
1285 /**
1286  * Parse hardcoded signal for a single block.
1287  * @note see #synth_block().
1288  */
1290  int block_idx, int size,
1291  const struct frame_type_desc *frame_desc,
1292  float *excitation)
1293 {
1294  float gain;
1295  int n, r_idx;
1296 
1298 
1299  /* Set the offset from which we start reading wmavoice_std_codebook */
1300  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1301  r_idx = pRNG(s->frame_cntr, block_idx, size);
1302  gain = s->silence_gain;
1303  } else /* FCB_TYPE_HARDCODED */ {
1304  r_idx = get_bits(gb, 8);
1305  gain = wmavoice_gain_universal[get_bits(gb, 6)];
1306  }
1307 
1308  /* Clear gain prediction parameters */
1309  memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
1310 
1311  /* Apply gain to hardcoded codebook and use that as excitation signal */
1312  for (n = 0; n < size; n++)
1313  excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
1314 }
1315 
1316 /**
1317  * Parse FCB/ACB signal for a single block.
1318  * @note see #synth_block().
1319  */
1321  int block_idx, int size,
1322  int block_pitch_sh2,
1323  const struct frame_type_desc *frame_desc,
1324  float *excitation)
1325 {
1326  static const float gain_coeff[6] = {
1327  0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1328  };
1329  float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
1330  int n, idx, gain_weight;
1331  AMRFixed fcb;
1332 
1333  av_assert0(size <= MAX_FRAMESIZE / 2);
1334  memset(pulses, 0, sizeof(*pulses) * size);
1335 
1336  fcb.pitch_lag = block_pitch_sh2 >> 2;
1337  fcb.pitch_fac = 1.0;
1338  fcb.no_repeat_mask = 0;
1339  fcb.n = 0;
1340 
1341  /* For the other frame types, this is where we apply the innovation
1342  * (fixed) codebook pulses of the speech signal. */
1343  if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1344  aw_pulse_set1(s, gb, block_idx, &fcb);
1345  if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
1346  /* Conceal the block with silence and return.
1347  * Skip the correct amount of bits to read the next
1348  * block from the correct offset. */
1349  int r_idx = pRNG(s->frame_cntr, block_idx, size);
1350 
1351  for (n = 0; n < size; n++)
1352  excitation[n] =
1353  wmavoice_std_codebook[r_idx + n] * s->silence_gain;
1354  skip_bits(gb, 7 + 1);
1355  return;
1356  }
1357  } else /* FCB_TYPE_EXC_PULSES */ {
1358  int offset_nbits = 5 - frame_desc->log_n_blocks;
1359 
1360  fcb.no_repeat_mask = -1;
1361  /* similar to ff_decode_10_pulses_35bits(), but with single pulses
1362  * (instead of double) for a subset of pulses */
1363  for (n = 0; n < 5; n++) {
1364  float sign;
1365  int pos1, pos2;
1366 
1367  sign = get_bits1(gb) ? 1.0 : -1.0;
1368  pos1 = get_bits(gb, offset_nbits);
1369  fcb.x[fcb.n] = n + 5 * pos1;
1370  fcb.y[fcb.n++] = sign;
1371  if (n < frame_desc->dbl_pulses) {
1372  pos2 = get_bits(gb, offset_nbits);
1373  fcb.x[fcb.n] = n + 5 * pos2;
1374  fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
1375  }
1376  }
1377  }
1378  ff_set_fixed_vector(pulses, &fcb, 1.0, size);
1379 
1380  /* Calculate gain for adaptive & fixed codebook signal.
1381  * see ff_amr_set_fixed_gain(). */
1382  idx = get_bits(gb, 7);
1383  fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err,
1384  gain_coeff, 6) -
1385  5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
1386  acb_gain = wmavoice_gain_codebook_acb[idx];
1387  pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
1388  -2.9957322736 /* log(0.05) */,
1389  1.6094379124 /* log(5.0) */);
1390 
1391  gain_weight = 8 >> frame_desc->log_n_blocks;
1392  memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
1393  sizeof(*s->gain_pred_err) * (6 - gain_weight));
1394  for (n = 0; n < gain_weight; n++)
1395  s->gain_pred_err[n] = pred_err;
1396 
1397  /* Calculation of adaptive codebook */
1398  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1399  int len;
1400  for (n = 0; n < size; n += len) {
1401  int next_idx_sh16;
1402  int abs_idx = block_idx * size + n;
1403  int pitch_sh16 = (s->last_pitch_val << 16) +
1404  s->pitch_diff_sh16 * abs_idx;
1405  int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1406  int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1407  idx = idx_sh16 >> 16;
1408  if (s->pitch_diff_sh16) {
1409  if (s->pitch_diff_sh16 > 0) {
1410  next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1411  } else
1412  next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1413  len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
1414  1, size - n);
1415  } else
1416  len = size;
1417 
1418  ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
1420  idx, 9, len);
1421  }
1422  } else /* ACB_TYPE_HAMMING */ {
1423  int block_pitch = block_pitch_sh2 >> 2;
1424  idx = block_pitch_sh2 & 3;
1425  if (idx) {
1426  ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
1428  idx, 8, size);
1429  } else
1430  av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
1431  sizeof(float) * size);
1432  }
1433 
1434  /* Interpolate ACB/FCB and use as excitation signal */
1435  ff_weighted_vector_sumf(excitation, excitation, pulses,
1436  acb_gain, fcb_gain, size);
1437 }
1438 
1439 /**
1440  * Parse data in a single block.
1441  *
1442  * @param s WMA Voice decoding context private data
1443  * @param gb bit I/O context
1444  * @param block_idx index of the to-be-read block
1445  * @param size amount of samples to be read in this block
1446  * @param block_pitch_sh2 pitch for this block << 2
1447  * @param lsps LSPs for (the end of) this frame
1448  * @param prev_lsps LSPs for the last frame
1449  * @param frame_desc frame type descriptor
1450  * @param excitation target memory for the ACB+FCB interpolated signal
1451  * @param synth target memory for the speech synthesis filter output
1452  * @return 0 on success, <0 on error.
1453  */
1455  int block_idx, int size,
1456  int block_pitch_sh2,
1457  const double *lsps, const double *prev_lsps,
1458  const struct frame_type_desc *frame_desc,
1459  float *excitation, float *synth)
1460 {
1461  double i_lsps[MAX_LSPS];
1462  float lpcs[MAX_LSPS];
1463  float fac;
1464  int n;
1465 
1466  if (frame_desc->acb_type == ACB_TYPE_NONE)
1467  synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
1468  else
1469  synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
1470  frame_desc, excitation);
1471 
1472  /* convert interpolated LSPs to LPCs */
1473  fac = (block_idx + 0.5) / frame_desc->n_blocks;
1474  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1475  i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1476  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1477 
1478  /* Speech synthesis */
1479  ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
1480 }
1481 
1482 /**
1483  * Synthesize output samples for a single frame.
1484  *
1485  * @param ctx WMA Voice decoder context
1486  * @param gb bit I/O context (s->gb or one for cross-packet superframes)
1487  * @param frame_idx Frame number within superframe [0-2]
1488  * @param samples pointer to output sample buffer, has space for at least 160
1489  * samples
1490  * @param lsps LSP array
1491  * @param prev_lsps array of previous frame's LSPs
1492  * @param excitation target buffer for excitation signal
1493  * @param synth target buffer for synthesized speech data
1494  * @return 0 on success, <0 on error.
1495  */
1496 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
1497  float *samples,
1498  const double *lsps, const double *prev_lsps,
1499  float *excitation, float *synth)
1500 {
1502  int n, n_blocks_x2, log_n_blocks_x2, av_uninit(cur_pitch_val);
1503  int pitch[MAX_BLOCKS], av_uninit(last_block_pitch);
1504 
1505  /* Parse frame type ("frame header"), see frame_descs */
1506  int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc, 6, 3)], block_nsamples;
1507 
1508  if (bd_idx < 0) {
1510  "Invalid frame type VLC code, skipping\n");
1511  return AVERROR_INVALIDDATA;
1512  }
1513 
1514  block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
1515 
1516  /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
1517  if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
1518  /* Pitch is provided per frame, which is interpreted as the pitch of
1519  * the last sample of the last block of this frame. We can interpolate
1520  * the pitch of other blocks (and even pitch-per-sample) by gradually
1521  * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
1522  n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
1523  log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
1524  cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
1525  cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
1526  if (s->last_acb_type == ACB_TYPE_NONE ||
1527  20 * abs(cur_pitch_val - s->last_pitch_val) >
1528  (cur_pitch_val + s->last_pitch_val))
1529  s->last_pitch_val = cur_pitch_val;
1530 
1531  /* pitch per block */
1532  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1533  int fac = n * 2 + 1;
1534 
1535  pitch[n] = (MUL16(fac, cur_pitch_val) +
1536  MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
1537  frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
1538  }
1539 
1540  /* "pitch-diff-per-sample" for calculation of pitch per sample */
1541  s->pitch_diff_sh16 =
1542  (cur_pitch_val - s->last_pitch_val) * (1 << 16) / MAX_FRAMESIZE;
1543  }
1544 
1545  /* Global gain (if silence) and pitch-adaptive window coordinates */
1546  switch (frame_descs[bd_idx].fcb_type) {
1547  case FCB_TYPE_SILENCE:
1548  s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
1549  break;
1550  case FCB_TYPE_AW_PULSES:
1551  aw_parse_coords(s, gb, pitch);
1552  break;
1553  }
1554 
1555  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1556  int bl_pitch_sh2;
1557 
1558  /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
1559  switch (frame_descs[bd_idx].acb_type) {
1560  case ACB_TYPE_HAMMING: {
1561  /* Pitch is given per block. Per-block pitches are encoded as an
1562  * absolute value for the first block, and then delta values
1563  * relative to this value) for all subsequent blocks. The scale of
1564  * this pitch value is semi-logarithmic compared to its use in the
1565  * decoder, so we convert it to normal scale also. */
1566  int block_pitch,
1567  t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
1568  t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
1569  t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
1570 
1571  if (n == 0) {
1572  block_pitch = get_bits(gb, s->block_pitch_nbits);
1573  } else
1574  block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
1575  get_bits(gb, s->block_delta_pitch_nbits);
1576  /* Convert last_ so that any next delta is within _range */
1577  last_block_pitch = av_clip(block_pitch,
1578  s->block_delta_pitch_hrange,
1579  s->block_pitch_range -
1580  s->block_delta_pitch_hrange);
1581 
1582  /* Convert semi-log-style scale back to normal scale */
1583  if (block_pitch < t1) {
1584  bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
1585  } else {
1586  block_pitch -= t1;
1587  if (block_pitch < t2) {
1588  bl_pitch_sh2 =
1589  (s->block_conv_table[1] << 2) + (block_pitch << 1);
1590  } else {
1591  block_pitch -= t2;
1592  if (block_pitch < t3) {
1593  bl_pitch_sh2 =
1594  (s->block_conv_table[2] + block_pitch) << 2;
1595  } else
1596  bl_pitch_sh2 = s->block_conv_table[3] << 2;
1597  }
1598  }
1599  pitch[n] = bl_pitch_sh2 >> 2;
1600  break;
1601  }
1602 
1603  case ACB_TYPE_ASYMMETRIC: {
1604  bl_pitch_sh2 = pitch[n] << 2;
1605  break;
1606  }
1607 
1608  default: // ACB_TYPE_NONE has no pitch
1609  bl_pitch_sh2 = 0;
1610  break;
1611  }
1612 
1613  synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1614  lsps, prev_lsps, &frame_descs[bd_idx],
1615  &excitation[n * block_nsamples],
1616  &synth[n * block_nsamples]);
1617  }
1618 
1619  /* Averaging projection filter, if applicable. Else, just copy samples
1620  * from synthesis buffer */
1621  if (s->do_apf) {
1622  double i_lsps[MAX_LSPS];
1623  float lpcs[MAX_LSPS];
1624 
1625  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1626  i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1627  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1628  postfilter(s, synth, samples, 80, lpcs,
1629  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
1630  frame_descs[bd_idx].fcb_type, pitch[0]);
1631 
1632  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1633  i_lsps[n] = cos(lsps[n]);
1634  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1635  postfilter(s, &synth[80], &samples[80], 80, lpcs,
1636  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
1637  frame_descs[bd_idx].fcb_type, pitch[0]);
1638  } else
1639  memcpy(samples, synth, 160 * sizeof(synth[0]));
1640 
1641  /* Cache values for next frame */
1642  s->frame_cntr++;
1643  if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
1644  s->last_acb_type = frame_descs[bd_idx].acb_type;
1645  switch (frame_descs[bd_idx].acb_type) {
1646  case ACB_TYPE_NONE:
1647  s->last_pitch_val = 0;
1648  break;
1649  case ACB_TYPE_ASYMMETRIC:
1650  s->last_pitch_val = cur_pitch_val;
1651  break;
1652  case ACB_TYPE_HAMMING:
1653  s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
1654  break;
1655  }
1656 
1657  return 0;
1658 }
1659 
1660 /**
1661  * Ensure minimum value for first item, maximum value for last value,
1662  * proper spacing between each value and proper ordering.
1663  *
1664  * @param lsps array of LSPs
1665  * @param num size of LSP array
1666  *
1667  * @note basically a double version of #ff_acelp_reorder_lsf(), might be
1668  * useful to put in a generic location later on. Parts are also
1669  * present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),
1670  * which is in float.
1671  */
1672 static void stabilize_lsps(double *lsps, int num)
1673 {
1674  int n, m, l;
1675 
1676  /* set minimum value for first, maximum value for last and minimum
1677  * spacing between LSF values.
1678  * Very similar to ff_set_min_dist_lsf(), but in double. */
1679  lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
1680  for (n = 1; n < num; n++)
1681  lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
1682  lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
1683 
1684  /* reorder (looks like one-time / non-recursed bubblesort).
1685  * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
1686  for (n = 1; n < num; n++) {
1687  if (lsps[n] < lsps[n - 1]) {
1688  for (m = 1; m < num; m++) {
1689  double tmp = lsps[m];
1690  for (l = m - 1; l >= 0; l--) {
1691  if (lsps[l] <= tmp) break;
1692  lsps[l + 1] = lsps[l];
1693  }
1694  lsps[l + 1] = tmp;
1695  }
1696  break;
1697  }
1698  }
1699 }
1700 
1701 /**
1702  * Synthesize output samples for a single superframe. If we have any data
1703  * cached in s->sframe_cache, that will be used instead of whatever is loaded
1704  * in s->gb.
1705  *
1706  * WMA Voice superframes contain 3 frames, each containing 160 audio samples,
1707  * to give a total of 480 samples per frame. See #synth_frame() for frame
1708  * parsing. In addition to 3 frames, superframes can also contain the LSPs
1709  * (if these are globally specified for all frames (residually); they can
1710  * also be specified individually per-frame. See the s->has_residual_lsps
1711  * option), and can specify the number of samples encoded in this superframe
1712  * (if less than 480), usually used to prevent blanks at track boundaries.
1713  *
1714  * @param ctx WMA Voice decoder context
1715  * @return 0 on success, <0 on error or 1 if there was not enough data to
1716  * fully parse the superframe
1717  */
1719  int *got_frame_ptr)
1720 {
1722  GetBitContext *gb = &s->gb, s_gb;
1723  int n, res, n_samples = MAX_SFRAMESIZE;
1724  double lsps[MAX_FRAMES][MAX_LSPS];
1725  const double *mean_lsf = s->lsps == 16 ?
1726  wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
1727  float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
1728  float synth[MAX_LSPS + MAX_SFRAMESIZE];
1729  float *samples;
1730 
1731  memcpy(synth, s->synth_history,
1732  s->lsps * sizeof(*synth));
1733  memcpy(excitation, s->excitation_history,
1734  s->history_nsamples * sizeof(*excitation));
1735 
1736  if (s->sframe_cache_size > 0) {
1737  gb = &s_gb;
1738  init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
1739  s->sframe_cache_size = 0;
1740  }
1741 
1742  /* First bit is speech/music bit, it differentiates between WMAVoice
1743  * speech samples (the actual codec) and WMAVoice music samples, which
1744  * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
1745  * the wild yet. */
1746  if (!get_bits1(gb)) {
1747  avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");
1748  return AVERROR_PATCHWELCOME;
1749  }
1750 
1751  /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
1752  if (get_bits1(gb)) {
1753  if ((n_samples = get_bits(gb, 12)) > MAX_SFRAMESIZE) {
1755  "Superframe encodes > %d samples (%d), not allowed\n",
1756  MAX_SFRAMESIZE, n_samples);
1757  return AVERROR_INVALIDDATA;
1758  }
1759  }
1760 
1761  /* Parse LSPs, if global for the superframe (can also be per-frame). */
1762  if (s->has_residual_lsps) {
1763  double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
1764 
1765  for (n = 0; n < s->lsps; n++)
1766  prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
1767 
1768  if (s->lsps == 10) {
1769  dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1770  } else /* s->lsps == 16 */
1771  dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1772 
1773  for (n = 0; n < s->lsps; n++) {
1774  lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1775  lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
1776  lsps[2][n] += mean_lsf[n];
1777  }
1778  for (n = 0; n < 3; n++)
1779  stabilize_lsps(lsps[n], s->lsps);
1780  }
1781 
1782  /* synth_superframe can run multiple times per packet
1783  * free potential previous frame */
1785 
1786  /* get output buffer */
1788  if ((res = ff_get_buffer(ctx, frame, 0)) < 0)
1789  return res;
1790  frame->nb_samples = n_samples;
1791  samples = (float *)frame->data[0];
1792 
1793  /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
1794  for (n = 0; n < 3; n++) {
1795  if (!s->has_residual_lsps) {
1796  int m;
1797 
1798  if (s->lsps == 10) {
1799  dequant_lsp10i(gb, lsps[n]);
1800  } else /* s->lsps == 16 */
1801  dequant_lsp16i(gb, lsps[n]);
1802 
1803  for (m = 0; m < s->lsps; m++)
1804  lsps[n][m] += mean_lsf[m];
1805  stabilize_lsps(lsps[n], s->lsps);
1806  }
1807 
1808  if ((res = synth_frame(ctx, gb, n,
1809  &samples[n * MAX_FRAMESIZE],
1810  lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
1811  &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
1812  &synth[s->lsps + n * MAX_FRAMESIZE]))) {
1813  *got_frame_ptr = 0;
1814  return res;
1815  }
1816  }
1817 
1818  /* Statistics? FIXME - we don't check for length, a slight overrun
1819  * will be caught by internal buffer padding, and anything else
1820  * will be skipped, not read. */
1821  if (get_bits1(gb)) {
1822  res = get_bits(gb, 4);
1823  skip_bits(gb, 10 * (res + 1));
1824  }
1825 
1826  if (get_bits_left(gb) < 0) {
1828  return AVERROR_INVALIDDATA;
1829  }
1830 
1831  *got_frame_ptr = 1;
1832 
1833  /* Update history */
1834  memcpy(s->prev_lsps, lsps[2],
1835  s->lsps * sizeof(*s->prev_lsps));
1836  memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
1837  s->lsps * sizeof(*synth));
1838  memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
1839  s->history_nsamples * sizeof(*excitation));
1840  if (s->do_apf)
1841  memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
1842  s->history_nsamples * sizeof(*s->zero_exc_pf));
1843 
1844  return 0;
1845 }
1846 
1847 /**
1848  * Parse the packet header at the start of each packet (input data to this
1849  * decoder).
1850  *
1851  * @param s WMA Voice decoding context private data
1852  * @return <0 on error, nb_superframes on success.
1853  */
1855 {
1856  GetBitContext *gb = &s->gb;
1857  unsigned int res, n_superframes = 0;
1858 
1859  skip_bits(gb, 4); // packet sequence number
1860  s->has_residual_lsps = get_bits1(gb);
1861  do {
1862  if (get_bits_left(gb) < 6 + s->spillover_bitsize)
1863  return AVERROR_INVALIDDATA;
1864 
1865  res = get_bits(gb, 6); // number of superframes per packet
1866  // (minus first one if there is spillover)
1867  n_superframes += res;
1868  } while (res == 0x3F);
1869  s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
1870 
1871  return get_bits_left(gb) >= 0 ? n_superframes : AVERROR_INVALIDDATA;
1872 }
1873 
1874 /**
1875  * Copy (unaligned) bits from gb/data/size to pb.
1876  *
1877  * @param pb target buffer to copy bits into
1878  * @param data source buffer to copy bits from
1879  * @param size size of the source data, in bytes
1880  * @param gb bit I/O context specifying the current position in the source.
1881  * data. This function might use this to align the bit position to
1882  * a whole-byte boundary before calling #ff_copy_bits() on aligned
1883  * source data
1884  * @param nbits the amount of bits to copy from source to target
1885  *
1886  * @note after calling this function, the current position in the input bit
1887  * I/O context is undefined.
1888  */
1889 static void copy_bits(PutBitContext *pb,
1890  const uint8_t *data, int size,
1891  GetBitContext *gb, int nbits)
1892 {
1893  int rmn_bytes, rmn_bits;
1894 
1895  rmn_bits = rmn_bytes = get_bits_left(gb);
1896  if (rmn_bits < nbits)
1897  return;
1898  if (nbits > put_bits_left(pb))
1899  return;
1900  rmn_bits &= 7; rmn_bytes >>= 3;
1901  if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
1902  put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
1903  ff_copy_bits(pb, data + size - rmn_bytes,
1904  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1905 }
1906 
1907 /**
1908  * Packet decoding: a packet is anything that the (ASF) demuxer contains,
1909  * and we expect that the demuxer / application provides it to us as such
1910  * (else you'll probably get garbage as output). Every packet has a size of
1911  * ctx->block_align bytes, starts with a packet header (see
1912  * #parse_packet_header()), and then a series of superframes. Superframe
1913  * boundaries may exceed packets, i.e. superframes can split data over
1914  * multiple (two) packets.
1915  *
1916  * For more information about frames, see #synth_superframe().
1917  */
1919  int *got_frame_ptr, AVPacket *avpkt)
1920 {
1922  GetBitContext *gb = &s->gb;
1923  const uint8_t *buf = avpkt->data;
1924  uint8_t dummy[1];
1925  int size, res, pos;
1926 
1927  /* Packets are sometimes a multiple of ctx->block_align, with a packet
1928  * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
1929  * feeds us ASF packets, which may concatenate multiple "codec" packets
1930  * in a single "muxer" packet, so we artificially emulate that by
1931  * capping the packet size at ctx->block_align. */
1932  for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
1933  buf = size ? buf : dummy;
1934  res = init_get_bits8(&s->gb, buf, size);
1935  if (res < 0)
1936  return res;
1937 
1938  /* size == ctx->block_align is used to indicate whether we are dealing with
1939  * a new packet or a packet of which we already read the packet header
1940  * previously. */
1941  if (!(size % ctx->block_align)) { // new packet header
1942  if (!size) {
1943  s->spillover_nbits = 0;
1944  s->nb_superframes = 0;
1945  } else {
1946  if ((res = parse_packet_header(s)) < 0)
1947  return res;
1948  s->nb_superframes = res;
1949  }
1950 
1951  /* If the packet header specifies a s->spillover_nbits, then we want
1952  * to push out all data of the previous packet (+ spillover) before
1953  * continuing to parse new superframes in the current packet. */
1954  if (s->sframe_cache_size > 0) {
1955  int cnt = get_bits_count(gb);
1956  if (cnt + s->spillover_nbits > avpkt->size * 8) {
1957  s->spillover_nbits = avpkt->size * 8 - cnt;
1958  }
1959  copy_bits(&s->pb, buf, size, gb, s->spillover_nbits);
1960  flush_put_bits(&s->pb);
1961  s->sframe_cache_size += s->spillover_nbits;
1962  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) == 0 &&
1963  *got_frame_ptr) {
1964  cnt += s->spillover_nbits;
1965  s->skip_bits_next = cnt & 7;
1966  res = cnt >> 3;
1967  return res;
1968  } else
1969  skip_bits_long (gb, s->spillover_nbits - cnt +
1970  get_bits_count(gb)); // resync
1971  } else if (s->spillover_nbits) {
1972  skip_bits_long(gb, s->spillover_nbits); // resync
1973  }
1974  } else if (s->skip_bits_next)
1975  skip_bits(gb, s->skip_bits_next);
1976 
1977  /* Try parsing superframes in current packet */
1978  s->sframe_cache_size = 0;
1979  s->skip_bits_next = 0;
1980  pos = get_bits_left(gb);
1981  if (s->nb_superframes-- == 0) {
1982  *got_frame_ptr = 0;
1983  return size;
1984  } else if (s->nb_superframes > 0) {
1985  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) < 0) {
1986  return res;
1987  } else if (*got_frame_ptr) {
1988  int cnt = get_bits_count(gb);
1989  s->skip_bits_next = cnt & 7;
1990  res = cnt >> 3;
1991  return res;
1992  }
1993  } else if ((s->sframe_cache_size = pos) > 0) {
1994  /* ... cache it for spillover in next packet */
1995  init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
1996  copy_bits(&s->pb, buf, size, gb, s->sframe_cache_size);
1997  // FIXME bad - just copy bytes as whole and add use the
1998  // skip_bits_next field
1999  }
2000 
2001  return size;
2002 }
2003 
2005 {
2007 
2008  if (s->do_apf) {
2009  av_tx_uninit(&s->rdft);
2010  av_tx_uninit(&s->irdft);
2011  av_tx_uninit(&s->dct);
2012  av_tx_uninit(&s->dst);
2013  }
2014 
2015  return 0;
2016 }
2017 
2019  .p.name = "wmavoice",
2020  CODEC_LONG_NAME("Windows Media Audio Voice"),
2021  .p.type = AVMEDIA_TYPE_AUDIO,
2022  .p.id = AV_CODEC_ID_WMAVOICE,
2023  .priv_data_size = sizeof(WMAVoiceContext),
2025  .close = wmavoice_decode_end,
2027  .p.capabilities =
2028 #if FF_API_SUBFRAMES
2029  AV_CODEC_CAP_SUBFRAMES |
2030 #endif
2032  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
2033  .flush = wmavoice_flush,
2034 };
WMAVoiceContext::has_residual_lsps
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
Definition: wmavoice.c:192
skip_bits_long
static void skip_bits_long(GetBitContext *s, int n)
Skips the specified number of bits.
Definition: get_bits.h:278
AMRFixed::x
int x[10]
Definition: acelp_vectors.h:55
wmavoice_std_codebook
static const float wmavoice_std_codebook[1000]
Definition: wmavoice_data.h:2585
interpol
static int interpol(MBContext *s, uint32_t *color, int x, int y, int linesize)
Definition: vsrc_mandelbrot.c:184
MAX_LSPS
#define MAX_LSPS
maximum filter order
Definition: wmavoice.c:48
WMAVoiceContext::aw_next_pulse_off_cache
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned,...
Definition: wmavoice.c:241
WMAVoiceContext::max_pitch_val
int max_pitch_val
max value + 1 for pitch parsing
Definition: wmavoice.c:164
av_clip
#define av_clip
Definition: common.h:98
aw_pulse_set2
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
Definition: wmavoice.c:1102
FF_CODEC_CAP_INIT_CLEANUP
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: codec_internal.h:42
acelp_vectors.h
get_bits_left
static int get_bits_left(GetBitContext *gb)
Definition: get_bits.h:695
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
wmavoice_dq_lsp10i
static const uint8_t wmavoice_dq_lsp10i[0xf00]
Definition: wmavoice_data.h:33
mem_internal.h
WMAVoiceContext::tilted_lpcs_pf
float tilted_lpcs_pf[0x82]
aligned buffer for LPC tilting
Definition: wmavoice.c:279
out
FILE * out
Definition: movenc.c:54
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:250
thread.h
frame_descs
static const struct frame_type_desc frame_descs[17]
AVTXContext
Definition: tx_priv.h:235
wmavoice_dq_lsp16r3
static const uint8_t wmavoice_dq_lsp16r3[0x600]
Definition: wmavoice_data.h:1526
dequant_lsps
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
Definition: wmavoice.c:874
init_put_bits
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:62
WMAVoiceContext::excitation_history
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of previous superframes, used as a history for signal generation
Definition: wmavoice.c:251
get_bits_count
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:266
av_log2_16bit
int av_log2_16bit(unsigned v)
Definition: intmath.c:31
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:344
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:222
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
aw_pulse_set1
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
Definition: wmavoice.c:1192
ff_acelp_apply_order_2_transfer_function
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
Definition: acelp_filters.c:121
AVPacket::data
uint8_t * data
Definition: packet.h:522
pRNG
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will live in the range [0,...
Definition: wmavoice.c:1253
FCB_TYPE_EXC_PULSES
@ FCB_TYPE_EXC_PULSES
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.
Definition: wmavoice.c:91
ff_wmavoice_decoder
const FFCodec ff_wmavoice_decoder
Definition: wmavoice.c:2018
table
static const uint16_t table[]
Definition: prosumer.c:205
data
const char data[16]
Definition: mxf.c:148
WMAVoiceContext::silence_gain
float silence_gain
set for use in blocks if ACB_TYPE_NONE
Definition: wmavoice.c:226
expf
#define expf(x)
Definition: libm.h:283
WMAVoiceContext::denoise_filter_cache_size
int denoise_filter_cache_size
samples in denoise_filter_cache
Definition: wmavoice.c:278
wmavoice_denoise_power_table
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
Definition: wmavoice_data.h:3064
wmavoice_gain_codebook_acb
static const float wmavoice_gain_codebook_acb[128]
Definition: wmavoice_data.h:2874
FFCodec
Definition: codec_internal.h:127
base
uint8_t base
Definition: vp3data.h:128
AVComplexFloat
Definition: tx.h:27
t1
#define t1
Definition: regdef.h:29
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_celp_lp_synthesis_filterf
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
Definition: celp_filters.c:85
WMAVoiceContext::aw_idx_is_ext
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
Definition: wmavoice.c:228
init_get_bits
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
Definition: get_bits.h:514
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:902
WMAVoiceContext::dc_level
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
Definition: wmavoice.c:155
wmavoice_dq_lsp16i1
static const uint8_t wmavoice_dq_lsp16i1[0x640]
Definition: wmavoice_data.h:420
WMAVoiceContext::block_conv_table
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
Definition: wmavoice.c:176
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:365
frame_type_desc::log_n_blocks
uint8_t log_n_blocks
log2(n_blocks)
Definition: wmavoice.c:102
skip_bits
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:381
WMAVoiceContext::aw_pulse_range
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
Definition: wmavoice.c:230
get_bits
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:335
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:131
FCB_TYPE_HARDCODED
@ FCB_TYPE_HARDCODED
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:87
FCB_TYPE_SILENCE
@ FCB_TYPE_SILENCE
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:84
av_ceil_log2
#define av_ceil_log2
Definition: common.h:95
AMRFixed::pitch_fac
float pitch_fac
Definition: acelp_vectors.h:59
dummy
int dummy
Definition: motion.c:66
GetBitContext
Definition: get_bits.h:108
MULH
#define MULH
Definition: mathops.h:42
wmavoice_flush
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:328
put_bits_left
static int put_bits_left(PutBitContext *s)
Definition: put_bits.h:125
frame_type_desc::n_blocks
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
Definition: wmavoice.c:100
val
static double val(void *priv, double ch)
Definition: aeval.c:78
WMAVoiceContext::irdft_fn
av_tx_fn irdft_fn
postfilter (for denoise filter)
Definition: wmavoice.c:266
dequant_lsp10i
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
Definition: wmavoice.c:905
synth_block
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
Definition: wmavoice.c:1454
MAX_SFRAMESIZE
#define MAX_SFRAMESIZE
maximum number of samples per superframe
Definition: wmavoice.c:54
wmavoice_gain_codebook_fcb
static const float wmavoice_gain_codebook_fcb[128]
Definition: wmavoice_data.h:2893
WMAVoiceContext::denoise_filter_cache
float denoise_filter_cache[MAX_FRAMESIZE]
Definition: wmavoice.c:277
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
WMAVoiceContext::sin
float sin[511]
Definition: wmavoice.c:269
calc_input_response
static void calc_input_response(WMAVoiceContext *s, float *lpcs_src, int fcb_type, float *coeffs_dst, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
Definition: wmavoice.c:612
a1
#define a1
Definition: regdef.h:47
AV_CODEC_ID_WMAVOICE
@ AV_CODEC_ID_WMAVOICE
Definition: codec_id.h:476
lrint
#define lrint
Definition: tablegen.h:53
MUL16
#define MUL16(ra, rb)
Definition: mathops.h:89
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:205
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
init_get_bits8
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:545
MAX_LSPS_ALIGN16
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
Definition: wmavoice.c:49
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:151
av_memcpy_backptr
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
Overlapping memcpy() implementation.
Definition: mem.c:445
float
float
Definition: af_crystalizer.c:121
wmavoice_dq_lsp10r
static const uint8_t wmavoice_dq_lsp10r[0x1400]
Definition: wmavoice_data.h:749
FF_CODEC_DECODE_CB
#define FF_CODEC_DECODE_CB(func)
Definition: codec_internal.h:287
WMAVoiceContext::sframe_cache_size
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
Definition: wmavoice.c:204
WMAVoiceContext::dst
AVTXContext * dst
contexts for phase shift (in Hilbert
Definition: wmavoice.c:267
s
#define s(width, name)
Definition: cbs_vp9.c:198
WMAVoiceContext::lsp_q_mode
int lsp_q_mode
defines quantizer defaults [0, 1]
Definition: wmavoice.c:159
frame_type_desc::fcb_type
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
Definition: wmavoice.c:104
log_range
#define log_range(var, assign)
WMAVoiceContext::prev_lsps
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
Definition: wmavoice.c:220
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
WMAVoiceContext::aw_n_pulses
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
Definition: wmavoice.c:236
AMRFixed
Sparse representation for the algebraic codebook (fixed) vector.
Definition: acelp_vectors.h:53
bits
uint8_t bits
Definition: vp3data.h:128
adaptive_gain_control
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
Definition: wmavoice.c:512
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
wmavoice_lsp16_intercoeff_a
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
Definition: wmavoice_data.h:2047
ctx
AVFormatContext * ctx
Definition: movenc.c:48
decode.h
get_bits.h
wmavoice_mean_lsf10
static const double wmavoice_mean_lsf10[2][10]
Definition: wmavoice_data.h:2565
WMAVoiceContext::spillover_nbits
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
Definition: wmavoice.c:188
UMULH
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
Definition: mathops.h:69
AMRFixed::y
float y[10]
Definition: acelp_vectors.h:56
WMAVoiceContext::denoise_coeffs_pf
float denoise_coeffs_pf[0x82]
aligned buffer for denoise coefficients
Definition: wmavoice.c:281
wmavoice_gain_silence
static const float wmavoice_gain_silence[256]
Definition: wmavoice_data.h:2788
PutBitContext
Definition: put_bits.h:50
WMAVoiceContext::vbm_tree
int8_t vbm_tree[25]
converts VLC codes to frame type
Definition: wmavoice.c:140
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:272
frame
static AVFrame * frame
Definition: demux_decode.c:54
WMAVoiceContext::dct_fn
av_tx_fn dct_fn
Definition: wmavoice.c:268
wmavoice_dq_lsp16i3
static const uint8_t wmavoice_dq_lsp16i3[0x300]
Definition: wmavoice_data.h:682
if
if(ret)
Definition: filter_design.txt:179
AMRFixed::no_repeat_mask
int no_repeat_mask
Definition: acelp_vectors.h:57
postfilter
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
Definition: wmavoice.c:820
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:203
NULL
#define NULL
Definition: coverity.c:32
sizes
static const int sizes[][2]
Definition: img2dec.c:60
WMAVoiceContext::history_nsamples
int history_nsamples
number of samples in history for signal prediction (through ACB)
Definition: wmavoice.c:145
WMAVoiceContext::synth_history
float synth_history[MAX_LSPS]
see excitation_history
Definition: wmavoice.c:255
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
AVERROR_PATCHWELCOME
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:64
last_coeff
static const uint8_t last_coeff[3]
Definition: qdm2data.h:187
ACB_TYPE_HAMMING
@ ACB_TYPE_HAMMING
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:75
WMAVoiceContext::denoise_strength
int denoise_strength
strength of denoising in Wiener filter [0-11]
Definition: wmavoice.c:151
MAX_SIGNAL_HISTORY
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
Definition: wmavoice.c:53
WMAVoiceContext::sframe_cache
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+AV_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
Definition: wmavoice.c:201
get_bits1
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:388
dequant_lsp10r
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:931
WMAVoiceContext::pitch_nbits
int pitch_nbits
number of bits used to specify the pitch value in the frame header
Definition: wmavoice.c:165
WMAVoiceContext::block_delta_pitch_nbits
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value,...
Definition: wmavoice.c:170
kalman_smoothen
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
Definition: wmavoice.c:553
WMAVoiceContext::skip_bits_next
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
Definition: wmavoice.c:197
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
abs
#define abs(x)
Definition: cuda_runtime.h:35
WMAVoiceContext::dst_fn
av_tx_fn dst_fn
transform, part of postfilter)
Definition: wmavoice.c:268
WMAVoiceContext::rdft
AVTXContext * rdft
Definition: wmavoice.c:265
celp_filters.h
MAX_FRAMESIZE
#define MAX_FRAMESIZE
maximum number of samples per frame
Definition: wmavoice.c:52
av_clipf
av_clipf
Definition: af_crystalizer.c:121
MAX_FRAMES
#define MAX_FRAMES
maximum number of frames per superframe
Definition: wmavoice.c:51
get_vlc2
static av_always_inline int get_vlc2(GetBitContext *s, const VLCElem *table, int bits, int max_depth)
Parse a vlc code.
Definition: get_bits.h:652
decode_vbmtree
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
Definition: wmavoice.c:300
AVOnce
#define AVOnce
Definition: thread.h:202
aw_parse_coords
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
Definition: wmavoice.c:1050
wmavoice_init_static_data
static av_cold void wmavoice_init_static_data(void)
Definition: wmavoice.c:314
float_dsp.h
WMAVoiceContext::dcf_mem
float dcf_mem[2]
DC filter history.
Definition: wmavoice.c:273
ff_get_buffer
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: decode.c:1569
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:365
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:52
parse_packet_header
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder).
Definition: wmavoice.c:1854
AV_TX_FLOAT_DCT_I
@ AV_TX_FLOAT_DCT_I
Discrete Cosine Transform I.
Definition: tx.h:116
AVPacket::size
int size
Definition: packet.h:523
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: vvc_intra.c:291
powf
#define powf(x, y)
Definition: libm.h:50
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:303
codec_internal.h
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:109
WMAVoiceContext::spillover_bitsize
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
Definition: wmavoice.c:142
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
WMAVoiceContext::pb
PutBitContext pb
bitstream writer for sframe_cache
Definition: wmavoice.c:209
WMAVoiceContext::last_pitch_val
int last_pitch_val
pitch value of the previous frame
Definition: wmavoice.c:222
size
int size
Definition: twinvq_data.h:10344
wiener_denoise
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.
Definition: wmavoice.c:736
VLCElem
Definition: vlc.h:32
wmavoice_lsp10_intercoeff_b
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
Definition: wmavoice_data.h:1852
range
enum AVColorRange range
Definition: mediacodec_wrapper.c:2557
dequant_lsp16i
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
Definition: wmavoice.c:967
wmavoice_dq_lsp16r1
static const uint8_t wmavoice_dq_lsp16r1[0x500]
Definition: wmavoice_data.h:1264
WMAVoiceContext::aw_first_pulse_off
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
Definition: wmavoice.c:239
WMAVoiceContext::zero_exc_pf
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e.
Definition: wmavoice.c:274
sinewin.h
wmavoice_dq_lsp16r2
static const uint8_t wmavoice_dq_lsp16r2[0x500]
Definition: wmavoice_data.h:1395
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
frame_type_desc
Description of frame types.
Definition: wmavoice.c:99
WMAVoiceContext::block_pitch_range
int block_pitch_range
range of the block pitch
Definition: wmavoice.c:169
stabilize_lsps
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
Definition: wmavoice.c:1672
M_PI
#define M_PI
Definition: mathematics.h:67
ff_tilt_compensation
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
Definition: acelp_filters.c:138
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:294
wmavoice_energy_table
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
Definition: wmavoice_data.h:3026
ff_sine_window_init
void ff_sine_window_init(float *window, int n)
Generate a sine window.
Definition: sinewin_tablegen.h:59
wmavoice_decode_init
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:359
WMAVoiceContext::block_delta_pitch_hrange
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
Definition: wmavoice.c:174
wmavoice_ipol2_coeffs
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
Definition: wmavoice_data.h:3012
WMAVoiceContext::pitch_diff_sh16
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
Definition: wmavoice.c:224
WMAVoiceContext::gain_pred_err
float gain_pred_err[6]
cache for gain prediction
Definition: wmavoice.c:250
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:424
WMAVoiceContext::rdft_fn
av_tx_fn rdft_fn
Definition: wmavoice.c:266
ACB_TYPE_ASYMMETRIC
@ ACB_TYPE_ASYMMETRIC
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:70
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
WMAVoiceContext::nb_superframes
int nb_superframes
number of superframes in current packet
Definition: wmavoice.c:249
t3
#define t3
Definition: regdef.h:31
WMAVoiceContext::cos
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
Definition: wmavoice.c:269
a2
#define a2
Definition: regdef.h:48
WMAVoiceContext::denoise_tilt_corr
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
Definition: wmavoice.c:153
delta
float delta
Definition: vorbis_enc_data.h:430
wmavoice_lsp16_intercoeff_b
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
Definition: wmavoice_data.h:2306
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:576
acelp_filters.h
ff_weighted_vector_sumf
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
Definition: acelp_vectors.c:182
WMAVoiceContext::lsp_def_mode
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
Definition: wmavoice.c:160
wmavoice_gain_universal
static const float wmavoice_gain_universal[64]
Definition: wmavoice_data.h:2855
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:194
len
int len
Definition: vorbis_enc_data.h:426
WMAVoiceContext::synth_filter_out_buf
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
Definition: wmavoice.c:283
tilt_factor
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
Definition: wmavoice.c:599
VLC_NBITS
#define VLC_NBITS
number of bits to read per VLC iteration
Definition: wmavoice.c:58
wmavoice_data.h
Windows Media Voice (WMAVoice) tables.
avcodec.h
WMAVoiceContext::min_pitch_val
int min_pitch_val
base value for pitch parsing code
Definition: wmavoice.c:163
WMAVoiceContext::last_acb_type
int last_acb_type
frame type [0-2] of the previous frame
Definition: wmavoice.c:223
WMAVoiceContext::dct
AVTXContext * dct
Definition: wmavoice.c:267
av_uninit
#define av_uninit(x)
Definition: attributes.h:154
ret
ret
Definition: filter_design.txt:187
lsp.h
ff_celp_lp_zero_synthesis_filterf
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
Definition: celp_filters.c:200
WMAVoiceContext::do_apf
int do_apf
whether to apply the averaged projection filter (APF)
Definition: wmavoice.c:149
pos
unsigned int pos
Definition: spdifenc.c:413
AMRFixed::n
int n
Definition: acelp_vectors.h:54
wmavoice_dq_lsp16i2
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
Definition: wmavoice_data.h:583
AV_INPUT_BUFFER_PADDING_SIZE
#define AV_INPUT_BUFFER_PADDING_SIZE
Definition: defs.h:40
wmavoice_mean_lsf16
static const double wmavoice_mean_lsf16[2][16]
Definition: wmavoice_data.h:2574
ff_copy_bits
void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
Definition: bitstream.c:49
AV_RL32
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_RL32
Definition: bytestream.h:92
U
#define U(x)
Definition: vpx_arith.h:37
AV_TX_FLOAT_RDFT
@ AV_TX_FLOAT_RDFT
Real to complex and complex to real DFTs.
Definition: tx.h:90
wmavoice_decode_end
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:2004
WMAVoiceContext::lsps
int lsps
number of LSPs per frame [10 or 16]
Definition: wmavoice.c:158
AVCodecContext
main external API structure.
Definition: avcodec.h:445
wmavoice_decode_packet
static int wmavoice_decode_packet(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1918
channel_layout.h
t2
#define t2
Definition: regdef.h:30
WMAVoiceContext::block_pitch_nbits
int block_pitch_nbits
number of bits used to specify the first block's pitch value
Definition: wmavoice.c:167
AV_TX_FLOAT_DST_I
@ AV_TX_FLOAT_DST_I
Discrete Sine Transform I.
Definition: tx.h:128
synth_superframe
static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
Synthesize output samples for a single superframe.
Definition: wmavoice.c:1718
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:432
WMAVoiceContext::frame_cntr
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
Definition: wmavoice.c:247
wmavoice_ipol1_coeffs
static const float wmavoice_ipol1_coeffs[17 *9]
Definition: wmavoice_data.h:2960
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
ff_set_fixed_vector
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
Definition: acelp_vectors.c:224
mean_lsf
static const float mean_lsf[10]
Definition: siprdata.h:27
AV_CODEC_CAP_DELAY
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:76
samples
Filter the word “frame” indicates either a video frame or a group of audio samples
Definition: filter_design.txt:8
ACB_TYPE_NONE
@ ACB_TYPE_NONE
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:69
copy_bits
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
Definition: wmavoice.c:1889
avpriv_scalarproduct_float_c
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors.
Definition: float_dsp.c:124
synth_frame
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
Definition: wmavoice.c:1496
M_LN10
#define M_LN10
Definition: mathematics.h:49
WMAVoiceContext::gb
GetBitContext gb
packet bitreader.
Definition: wmavoice.c:136
avpriv_request_sample
#define avpriv_request_sample(...)
Definition: tableprint_vlc.h:36
synth_block_fcb_acb
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
Definition: wmavoice.c:1320
flush_put_bits
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:143
AV_CHANNEL_LAYOUT_MONO
#define AV_CHANNEL_LAYOUT_MONO
Definition: channel_layout.h:378
VLC_INIT_STATIC_TABLE_FROM_LENGTHS
#define VLC_INIT_STATIC_TABLE_FROM_LENGTHS(vlc_table, nb_bits, nb_codes, lens, lens_wrap, syms, syms_wrap, syms_size, offset, flags)
Definition: vlc.h:277
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
AVPacket
This structure stores compressed data.
Definition: packet.h:499
synth_block_hardcoded
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
Definition: wmavoice.c:1289
SFRAME_CACHE_MAXSIZE
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
Definition: wmavoice.c:56
frame_type_vlc
static VLCElem frame_type_vlc[132]
Frame type VLC coding.
Definition: wmavoice.c:63
AMRFixed::pitch_lag
int pitch_lag
Definition: acelp_vectors.h:58
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
WMAVoiceContext::irdft
AVTXContext * irdft
contexts for FFT-calculation in the
Definition: wmavoice.c:265
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
wmavoice_lsp10_intercoeff_a
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
Definition: wmavoice_data.h:1657
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
dequant_lsp16r
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:1000
frame_type_desc::dbl_pulses
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
Definition: wmavoice.c:105
frame_type_desc::acb_type
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
Definition: wmavoice.c:103
MAX_BLOCKS
#define MAX_BLOCKS
maximum number of blocks per frame
Definition: wmavoice.c:47
ff_acelp_lspd2lpc
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
Definition: lsp.c:220
WMAVoiceContext::postfilter_agc
float postfilter_agc
gain control memory, used in adaptive_gain_control()
Definition: wmavoice.c:271
put_bits.h
pulses
static const int8_t pulses[4]
Number of non-zero pulses in the MP-MLQ excitation.
Definition: g723_1.h:260
ff_acelp_interpolatef
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
Definition: acelp_filters.c:80
AVFormatContext::priv_data
void * priv_data
Format private data.
Definition: avformat.h:1283
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:60
FCB_TYPE_AW_PULSES
@ FCB_TYPE_AW_PULSES
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:89
tx.h
min
float min
Definition: vorbis_enc_data.h:429
WMAVoiceContext
WMA Voice decoding context.
Definition: wmavoice.c:131