FFmpeg
wmavoice.c
Go to the documentation of this file.
1 /*
2  * Windows Media Audio Voice decoder.
3  * Copyright (c) 2009 Ronald S. Bultje
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * @brief Windows Media Audio Voice compatible decoder
25  * @author Ronald S. Bultje <rsbultje@gmail.com>
26  */
27 
28 #include <math.h>
29 
31 #include "libavutil/float_dsp.h"
32 #include "libavutil/mem.h"
33 #include "libavutil/mem_internal.h"
34 #include "libavutil/thread.h"
35 #include "libavutil/tx.h"
36 #include "avcodec.h"
37 #include "codec_internal.h"
38 #include "decode.h"
39 #include "get_bits.h"
40 #include "put_bits.h"
41 #include "wmavoice_data.h"
42 #include "celp_filters.h"
43 #include "acelp_vectors.h"
44 #include "acelp_filters.h"
45 #include "lsp.h"
46 #include "sinewin.h"
47 
48 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame
49 #define MAX_LSPS 16 ///< maximum filter order
50 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple
51  ///< of 16 for ASM input buffer alignment
52 #define MAX_FRAMES 3 ///< maximum number of frames per superframe
53 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame
54 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history
55 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
56  ///< maximum number of samples per superframe
57 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
58  ///< was split over two packets
59 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration
60 
61 /**
62  * Frame type VLC coding.
63  */
64 static VLCElem frame_type_vlc[132];
65 
66 /**
67  * Adaptive codebook types.
68  */
69 enum {
70  ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed)
71  ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with per-frame pitch, which
72  ///< we interpolate to get a per-sample pitch.
73  ///< Signal is generated using an asymmetric sinc
74  ///< window function
75  ///< @note see #wmavoice_ipol1_coeffs
76  ACB_TYPE_HAMMING = 2 ///< Per-block pitch with signal generation using
77  ///< a Hamming sinc window function
78  ///< @note see #wmavoice_ipol2_coeffs
79 };
80 
81 /**
82  * Fixed codebook types.
83  */
84 enum {
85  FCB_TYPE_SILENCE = 0, ///< comfort noise during silence
86  ///< generated from a hardcoded (fixed) codebook
87  ///< with per-frame (low) gain values
88  FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with per-block
89  ///< gain values
90  FCB_TYPE_AW_PULSES = 2, ///< Pitch-adaptive window (AW) pulse signals,
91  ///< used in particular for low-bitrate streams
92  FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in
93  ///< combinations of either single pulses or
94  ///< pulse pairs
95 };
96 
97 /**
98  * Description of frame types.
99  */
100 static const struct frame_type_desc {
101  uint8_t n_blocks; ///< amount of blocks per frame (each block
102  ///< (contains 160/#n_blocks samples)
103  uint8_t log_n_blocks; ///< log2(#n_blocks)
104  uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)
105  uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)
106  uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs
107  ///< (rather than just one single pulse)
108  ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES
109 } frame_descs[17] = {
110  { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0 },
111  { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0 },
127 };
128 
129 /**
130  * WMA Voice decoding context.
131  */
132 typedef struct WMAVoiceContext {
133  /**
134  * @name Global values specified in the stream header / extradata or used all over.
135  * @{
136  */
137  GetBitContext gb; ///< packet bitreader. During decoder init,
138  ///< it contains the extradata from the
139  ///< demuxer. During decoding, it contains
140  ///< packet data.
141  int8_t vbm_tree[25]; ///< converts VLC codes to frame type
142 
143  int spillover_bitsize; ///< number of bits used to specify
144  ///< #spillover_nbits in the packet header
145  ///< = ceil(log2(ctx->block_align << 3))
146  int history_nsamples; ///< number of samples in history for signal
147  ///< prediction (through ACB)
148 
149  /* postfilter specific values */
150  int do_apf; ///< whether to apply the averaged
151  ///< projection filter (APF)
152  int denoise_strength; ///< strength of denoising in Wiener filter
153  ///< [0-11]
154  int denoise_tilt_corr; ///< Whether to apply tilt correction to the
155  ///< Wiener filter coefficients (postfilter)
156  int dc_level; ///< Predicted amount of DC noise, based
157  ///< on which a DC removal filter is used
158 
159  int lsps; ///< number of LSPs per frame [10 or 16]
160  int lsp_q_mode; ///< defines quantizer defaults [0, 1]
161  int lsp_def_mode; ///< defines different sets of LSP defaults
162  ///< [0, 1]
163 
164  int min_pitch_val; ///< base value for pitch parsing code
165  int max_pitch_val; ///< max value + 1 for pitch parsing
166  int pitch_nbits; ///< number of bits used to specify the
167  ///< pitch value in the frame header
168  int block_pitch_nbits; ///< number of bits used to specify the
169  ///< first block's pitch value
170  int block_pitch_range; ///< range of the block pitch
171  int block_delta_pitch_nbits; ///< number of bits used to specify the
172  ///< delta pitch between this and the last
173  ///< block's pitch value, used in all but
174  ///< first block
175  int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is
176  ///< from -this to +this-1)
177  uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale
178  ///< conversion
179 
180  /**
181  * @}
182  *
183  * @name Packet values specified in the packet header or related to a packet.
184  *
185  * A packet is considered to be a single unit of data provided to this
186  * decoder by the demuxer.
187  * @{
188  */
189  int spillover_nbits; ///< number of bits of the previous packet's
190  ///< last superframe preceding this
191  ///< packet's first full superframe (useful
192  ///< for re-synchronization also)
193  int has_residual_lsps; ///< if set, superframes contain one set of
194  ///< LSPs that cover all frames, encoded as
195  ///< independent and residual LSPs; if not
196  ///< set, each frame contains its own, fully
197  ///< independent, LSPs
198  int skip_bits_next; ///< number of bits to skip at the next call
199  ///< to #wmavoice_decode_packet() (since
200  ///< they're part of the previous superframe)
201 
203  ///< cache for superframe data split over
204  ///< multiple packets
205  int sframe_cache_size; ///< set to >0 if we have data from an
206  ///< (incomplete) superframe from a previous
207  ///< packet that spilled over in the current
208  ///< packet; specifies the amount of bits in
209  ///< #sframe_cache
210  PutBitContext pb; ///< bitstream writer for #sframe_cache
211 
212  /**
213  * @}
214  *
215  * @name Frame and superframe values
216  * Superframe and frame data - these can change from frame to frame,
217  * although some of them do in that case serve as a cache / history for
218  * the next frame or superframe.
219  * @{
220  */
221  double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous
222  ///< superframe
223  int last_pitch_val; ///< pitch value of the previous frame
224  int last_acb_type; ///< frame type [0-2] of the previous frame
225  int pitch_diff_sh16; ///< ((cur_pitch_val - #last_pitch_val)
226  ///< << 16) / #MAX_FRAMESIZE
227  float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE
228 
229  int aw_idx_is_ext; ///< whether the AW index was encoded in
230  ///< 8 bits (instead of 6)
231  int aw_pulse_range; ///< the range over which #aw_pulse_set1()
232  ///< can apply the pulse, relative to the
233  ///< value in aw_first_pulse_off. The exact
234  ///< position of the first AW-pulse is within
235  ///< [pulse_off, pulse_off + this], and
236  ///< depends on bitstream values; [16 or 24]
237  int aw_n_pulses[2]; ///< number of AW-pulses in each block; note
238  ///< that this number can be negative (in
239  ///< which case it basically means "zero")
240  int aw_first_pulse_off[2]; ///< index of first sample to which to
241  ///< apply AW-pulses, or -0xff if unset
242  int aw_next_pulse_off_cache; ///< the position (relative to start of the
243  ///< second block) at which pulses should
244  ///< start to be positioned, serves as a
245  ///< cache for pitch-adaptive window pulses
246  ///< between blocks
247 
248  int frame_cntr; ///< current frame index [0 - 0xFFFE]; is
249  ///< only used for comfort noise in #pRNG()
250  int nb_superframes; ///< number of superframes in current packet
251  float gain_pred_err[6]; ///< cache for gain prediction
253  ///< cache of the signal of previous
254  ///< superframes, used as a history for
255  ///< signal generation
256  float synth_history[MAX_LSPS]; ///< see #excitation_history
257  /**
258  * @}
259  *
260  * @name Postfilter values
261  *
262  * Variables used for postfilter implementation, mostly history for
263  * smoothing and so on, and context variables for FFT/iFFT.
264  * @{
265  */
266  AVTXContext *rdft, *irdft; ///< contexts for FFT-calculation in the
267  av_tx_fn rdft_fn, irdft_fn; ///< postfilter (for denoise filter)
268  AVTXContext *dct, *dst; ///< contexts for phase shift (in Hilbert
269  av_tx_fn dct_fn, dst_fn; ///< transform, part of postfilter)
270  float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
271  ///< range
272  float postfilter_agc; ///< gain control memory, used in
273  ///< #adaptive_gain_control()
274  float dcf_mem[2]; ///< DC filter history
276  ///< zero filter output (i.e. excitation)
277  ///< by postfilter
279  int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
280  DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x82];
281  ///< aligned buffer for LPC tilting
283  ///< aligned buffer for denoise coefficients
285  ///< aligned buffer for postfilter speech
286  ///< synthesis
287  /**
288  * @}
289  */
291 
292 /**
293  * Set up the variable bit mode (VBM) tree from container extradata.
294  * @param gb bit I/O context.
295  * The bit context (s->gb) should be loaded with byte 23-46 of the
296  * container extradata (i.e. the ones containing the VBM tree).
297  * @param vbm_tree pointer to array to which the decoded VBM tree will be
298  * written.
299  * @return 0 on success, <0 on error.
300  */
301 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
302 {
303  int cntr[8] = { 0 }, n, res;
304 
305  memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
306  for (n = 0; n < 17; n++) {
307  res = get_bits(gb, 3);
308  if (cntr[res] > 3) // should be >= 3 + (res == 7))
309  return -1;
310  vbm_tree[res * 3 + cntr[res]++] = n;
311  }
312  return 0;
313 }
314 
316 {
317  static const uint8_t bits[] = {
318  2, 2, 2, 4, 4, 4,
319  6, 6, 6, 8, 8, 8,
320  10, 10, 10, 12, 12, 12,
321  14, 14, 14, 14
322  };
323 
326  1, NULL, 0, 0, 0, 0);
327 }
328 
330 {
332  int n;
333 
334  s->postfilter_agc = 0;
335  s->sframe_cache_size = 0;
336  s->skip_bits_next = 0;
337  for (n = 0; n < s->lsps; n++)
338  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
339  memset(s->excitation_history, 0,
340  sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
341  memset(s->synth_history, 0,
342  sizeof(*s->synth_history) * MAX_LSPS);
343  memset(s->gain_pred_err, 0,
344  sizeof(s->gain_pred_err));
345 
346  if (s->do_apf) {
347  memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
348  sizeof(*s->synth_filter_out_buf) * s->lsps);
349  memset(s->dcf_mem, 0,
350  sizeof(*s->dcf_mem) * 2);
351  memset(s->zero_exc_pf, 0,
352  sizeof(*s->zero_exc_pf) * s->history_nsamples);
353  memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
354  }
355 }
356 
357 /**
358  * Set up decoder with parameters from demuxer (extradata etc.).
359  */
361 {
362  static AVOnce init_static_once = AV_ONCE_INIT;
363  int n, flags, pitch_range, lsp16_flag, ret;
365 
366  ff_thread_once(&init_static_once, wmavoice_init_static_data);
367 
368  /**
369  * Extradata layout:
370  * - byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
371  * - byte 19-22: flags field (annoyingly in LE; see below for known
372  * values),
373  * - byte 23-46: variable bitmode tree (really just 17 * 3 bits,
374  * rest is 0).
375  */
376  if (ctx->extradata_size != 46) {
378  "Invalid extradata size %d (should be 46)\n",
379  ctx->extradata_size);
380  return AVERROR_INVALIDDATA;
381  }
382  if (ctx->block_align <= 0 || ctx->block_align > (1<<22)) {
383  av_log(ctx, AV_LOG_ERROR, "Invalid block alignment %d.\n", ctx->block_align);
384  return AVERROR_INVALIDDATA;
385  }
386 
387  flags = AV_RL32(ctx->extradata + 18);
388  s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
389  s->do_apf = flags & 0x1;
390  if (s->do_apf) {
391  float scale = 1.0f;
392 
393  ret = av_tx_init(&s->rdft, &s->rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, &scale, 0);
394  if (ret < 0)
395  return ret;
396 
397  ret = av_tx_init(&s->irdft, &s->irdft_fn, AV_TX_FLOAT_RDFT, 1, 1 << 7, &scale, 0);
398  if (ret < 0)
399  return ret;
400 
401  scale = 1.0 / (1 << 6);
402  ret = av_tx_init(&s->dct, &s->dct_fn, AV_TX_FLOAT_DCT_I, 0, 1 << 6, &scale, 0);
403  if (ret < 0)
404  return ret;
405 
406  scale = 1.0 / (1 << 6);
407  ret = av_tx_init(&s->dst, &s->dst_fn, AV_TX_FLOAT_DST_I, 0, 1 << 6, &scale, 0);
408  if (ret < 0)
409  return ret;
410 
411  ff_sine_window_init(s->cos, 256);
412  memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
413  for (n = 0; n < 255; n++) {
414  s->sin[n] = -s->sin[510 - n];
415  s->cos[510 - n] = s->cos[n];
416  }
417  }
418  s->denoise_strength = (flags >> 2) & 0xF;
419  if (s->denoise_strength >= 12) {
421  "Invalid denoise filter strength %d (max=11)\n",
422  s->denoise_strength);
423  return AVERROR_INVALIDDATA;
424  }
425  s->denoise_tilt_corr = !!(flags & 0x40);
426  s->dc_level = (flags >> 7) & 0xF;
427  s->lsp_q_mode = !!(flags & 0x2000);
428  s->lsp_def_mode = !!(flags & 0x4000);
429  lsp16_flag = flags & 0x1000;
430  if (lsp16_flag) {
431  s->lsps = 16;
432  } else {
433  s->lsps = 10;
434  }
435  for (n = 0; n < s->lsps; n++)
436  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
437 
438  init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
439  if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
440  av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
441  return AVERROR_INVALIDDATA;
442  }
443 
444  if (ctx->sample_rate >= INT_MAX / (256 * 37))
445  return AVERROR_INVALIDDATA;
446 
447  s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
448  s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
449  pitch_range = s->max_pitch_val - s->min_pitch_val;
450  if (pitch_range <= 0) {
451  av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
452  return AVERROR_INVALIDDATA;
453  }
454  s->pitch_nbits = av_ceil_log2(pitch_range);
455  s->last_pitch_val = 40;
456  s->last_acb_type = ACB_TYPE_NONE;
457  s->history_nsamples = s->max_pitch_val + 8;
458 
459  if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
460  int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
461  max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
462 
464  "Unsupported samplerate %d (min=%d, max=%d)\n",
465  ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
466 
467  return AVERROR(ENOSYS);
468  }
469 
470  s->block_conv_table[0] = s->min_pitch_val;
471  s->block_conv_table[1] = (pitch_range * 25) >> 6;
472  s->block_conv_table[2] = (pitch_range * 44) >> 6;
473  s->block_conv_table[3] = s->max_pitch_val - 1;
474  s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
475  if (s->block_delta_pitch_hrange <= 0) {
476  av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
477  return AVERROR_INVALIDDATA;
478  }
479  s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
480  s->block_pitch_range = s->block_conv_table[2] +
481  s->block_conv_table[3] + 1 +
482  2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
483  s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
484 
485  av_channel_layout_uninit(&ctx->ch_layout);
487  ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
488 
489  return 0;
490 }
491 
492 /**
493  * @name Postfilter functions
494  * Postfilter functions (gain control, wiener denoise filter, DC filter,
495  * kalman smoothening, plus surrounding code to wrap it)
496  * @{
497  */
498 /**
499  * Adaptive gain control (as used in postfilter).
500  *
501  * Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except
502  * that the energy here is calculated using sum(abs(...)), whereas the
503  * other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).
504  *
505  * @param out output buffer for filtered samples
506  * @param in input buffer containing the samples as they are after the
507  * postfilter steps so far
508  * @param speech_synth input buffer containing speech synth before postfilter
509  * @param size input buffer size
510  * @param alpha exponential filter factor
511  * @param gain_mem pointer to filter memory (single float)
512  */
513 static void adaptive_gain_control(float *out, const float *in,
514  const float *speech_synth,
515  int size, float alpha, float *gain_mem)
516 {
517  int i;
518  float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
519  float mem = *gain_mem;
520 
521  for (i = 0; i < size; i++) {
522  speech_energy += fabsf(speech_synth[i]);
523  postfilter_energy += fabsf(in[i]);
524  }
525  gain_scale_factor = postfilter_energy == 0.0 ? 0.0 :
526  (1.0 - alpha) * speech_energy / postfilter_energy;
527 
528  for (i = 0; i < size; i++) {
529  mem = alpha * mem + gain_scale_factor;
530  out[i] = in[i] * mem;
531  }
532 
533  *gain_mem = mem;
534 }
535 
536 /**
537  * Kalman smoothing function.
538  *
539  * This function looks back pitch +/- 3 samples back into history to find
540  * the best fitting curve (that one giving the optimal gain of the two
541  * signals, i.e. the highest dot product between the two), and then
542  * uses that signal history to smoothen the output of the speech synthesis
543  * filter.
544  *
545  * @param s WMA Voice decoding context
546  * @param pitch pitch of the speech signal
547  * @param in input speech signal
548  * @param out output pointer for smoothened signal
549  * @param size input/output buffer size
550  *
551  * @returns -1 if no smoothening took place, e.g. because no optimal
552  * fit could be found, or 0 on success.
553  */
554 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
555  const float *in, float *out, int size)
556 {
557  int n;
558  float optimal_gain = 0, dot;
559  const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
560  *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
561  *best_hist_ptr = NULL;
562 
563  /* find best fitting point in history */
564  do {
565  dot = avpriv_scalarproduct_float_c(in, ptr, size);
566  if (dot > optimal_gain) {
567  optimal_gain = dot;
568  best_hist_ptr = ptr;
569  }
570  } while (--ptr >= end);
571 
572  if (optimal_gain <= 0)
573  return -1;
574  dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
575  if (dot <= 0) // would be 1.0
576  return -1;
577 
578  if (optimal_gain <= dot) {
579  dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
580  } else
581  dot = 0.625;
582 
583  /* actual smoothing */
584  for (n = 0; n < size; n++)
585  out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
586 
587  return 0;
588 }
589 
590 /**
591  * Get the tilt factor of a formant filter from its transfer function
592  * @see #tilt_factor() in amrnbdec.c, which does essentially the same,
593  * but somehow (??) it does a speech synthesis filter in the
594  * middle, which is missing here
595  *
596  * @param lpcs LPC coefficients
597  * @param n_lpcs Size of LPC buffer
598  * @returns the tilt factor
599  */
600 static float tilt_factor(const float *lpcs, int n_lpcs)
601 {
602  float rh0, rh1;
603 
604  rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
605  rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
606 
607  return rh1 / rh0;
608 }
609 
610 /**
611  * Derive denoise filter coefficients (in real domain) from the LPCs.
612  */
613 static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
614  int fcb_type, float *coeffs_dst, int remainder)
615 {
616  float last_coeff, min = 15.0, max = -15.0;
617  float irange, angle_mul, gain_mul, range, sq;
618  LOCAL_ALIGNED_32(float, coeffs, [0x82]);
619  LOCAL_ALIGNED_32(float, lpcs, [0x82]);
620  LOCAL_ALIGNED_32(float, lpcs_dct, [0x82]);
621  int n, idx;
622 
623  memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));
624 
625  /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
626  s->rdft_fn(s->rdft, lpcs, lpcs_src, sizeof(float));
627 #define log_range(var, assign) do { \
628  float tmp = log10f(assign); var = tmp; \
629  max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
630  } while (0)
631  log_range(last_coeff, lpcs[64] * lpcs[64]);
632  for (n = 1; n < 64; n++)
633  log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
634  lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
635  log_range(lpcs[0], lpcs[0] * lpcs[0]);
636 #undef log_range
637  range = max - min;
638  lpcs[64] = last_coeff;
639 
640  /* Now, use this spectrum to pick out these frequencies with higher
641  * (relative) power/energy (which we then take to be "not noise"),
642  * and set up a table (still in lpc[]) of (relative) gains per frequency.
643  * These frequencies will be maintained, while others ("noise") will be
644  * decreased in the filter output. */
645  irange = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
646  gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
647  (5.0 / 14.7));
648  angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
649  for (n = 0; n <= 64; n++) {
650  float pwr;
651 
652  idx = lrint((max - lpcs[n]) * irange - 1);
653  idx = FFMAX(0, idx);
654  pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
655  lpcs[n] = angle_mul * pwr;
656 
657  /* 70.57 =~ 1/log10(1.0331663) */
658  idx = av_clipd((pwr * gain_mul - 0.0295) * 70.570526123, 0, INT_MAX / 2);
659 
660  if (idx > 127) { // fall back if index falls outside table range
661  coeffs[n] = wmavoice_energy_table[127] *
662  powf(1.0331663, idx - 127);
663  } else
664  coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
665  }
666 
667  /* calculate the Hilbert transform of the gains, which we do (since this
668  * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
669  * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
670  * "moment" of the LPCs in this filter. */
671  s->dct_fn(s->dct, lpcs_dct, lpcs, sizeof(float));
672  s->dst_fn(s->dst, lpcs, lpcs_dct, sizeof(float));
673 
674  /* Split out the coefficient indexes into phase/magnitude pairs */
675  idx = 255 + av_clip(lpcs[64], -255, 255);
676  coeffs[0] = coeffs[0] * s->cos[idx];
677  idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
678  last_coeff = coeffs[64] * s->cos[idx];
679  for (n = 63;; n--) {
680  idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
681  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
682  coeffs[n * 2] = coeffs[n] * s->cos[idx];
683 
684  if (!--n) break;
685 
686  idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
687  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
688  coeffs[n * 2] = coeffs[n] * s->cos[idx];
689  }
690  coeffs[64] = last_coeff;
691 
692  /* move into real domain */
693  s->irdft_fn(s->irdft, coeffs_dst, coeffs, sizeof(AVComplexFloat));
694 
695  /* tilt correction and normalize scale */
696  memset(&coeffs_dst[remainder], 0, sizeof(coeffs_dst[0]) * (128 - remainder));
697  if (s->denoise_tilt_corr) {
698  float tilt_mem = 0;
699 
700  coeffs_dst[remainder - 1] = 0;
701  ff_tilt_compensation(&tilt_mem,
702  -1.8 * tilt_factor(coeffs_dst, remainder - 1),
703  coeffs_dst, remainder);
704  }
705  sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs_dst, coeffs_dst,
706  remainder));
707  for (n = 0; n < remainder; n++)
708  coeffs_dst[n] *= sq;
709 }
710 
711 /**
712  * This function applies a Wiener filter on the (noisy) speech signal as
713  * a means to denoise it.
714  *
715  * - take RDFT of LPCs to get the power spectrum of the noise + speech;
716  * - using this power spectrum, calculate (for each frequency) the Wiener
717  * filter gain, which depends on the frequency power and desired level
718  * of noise subtraction (when set too high, this leads to artifacts)
719  * We can do this symmetrically over the X-axis (so 0-4kHz is the inverse
720  * of 4-8kHz);
721  * - by doing a phase shift, calculate the Hilbert transform of this array
722  * of per-frequency filter-gains to get the filtering coefficients;
723  * - smoothen/normalize/de-tilt these filter coefficients as desired;
724  * - take RDFT of noisy sound, apply the coefficients and take its IRDFT
725  * to get the denoised speech signal;
726  * - the leftover (i.e. output of the IRDFT on denoised speech data beyond
727  * the frame boundary) are saved and applied to subsequent frames by an
728  * overlap-add method (otherwise you get clicking-artifacts).
729  *
730  * @param s WMA Voice decoding context
731  * @param fcb_type Frame (codebook) type
732  * @param synth_pf input: the noisy speech signal, output: denoised speech
733  * data; should be 16-byte aligned (for ASM purposes)
734  * @param size size of the speech data
735  * @param lpcs LPCs used to synthesize this frame's speech data
736  */
737 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
738  float *synth_pf, int size,
739  const float *lpcs)
740 {
741  int remainder, lim, n;
742 
743  if (fcb_type != FCB_TYPE_SILENCE) {
744  LOCAL_ALIGNED_32(float, coeffs_f, [0x82]);
745  LOCAL_ALIGNED_32(float, synth_f, [0x82]);
746  float *tilted_lpcs = s->tilted_lpcs_pf,
747  *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
748 
749  tilted_lpcs[0] = 1.0;
750  memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
751  memset(&tilted_lpcs[s->lsps + 1], 0,
752  sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
753  ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
754  tilted_lpcs, s->lsps + 2);
755 
756  /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
757  * size is applied to the next frame. All input beyond this is zero,
758  * and thus all output beyond this will go towards zero, hence we can
759  * limit to min(size-1, 127-size) as a performance consideration. */
760  remainder = FFMIN(127 - size, size - 1);
761  calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
762 
763  /* apply coefficients (in frequency spectrum domain), i.e. complex
764  * number multiplication */
765  memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
766  s->rdft_fn(s->rdft, synth_f, synth_pf, sizeof(float));
767  s->rdft_fn(s->rdft, coeffs_f, coeffs, sizeof(float));
768  synth_f[0] *= coeffs_f[0];
769  synth_f[1] *= coeffs_f[1];
770  for (n = 1; n <= 64; n++) {
771  float v1 = synth_f[n * 2], v2 = synth_f[n * 2 + 1];
772  synth_f[n * 2] = v1 * coeffs_f[n * 2] - v2 * coeffs_f[n * 2 + 1];
773  synth_f[n * 2 + 1] = v2 * coeffs_f[n * 2] + v1 * coeffs_f[n * 2 + 1];
774  }
775  s->irdft_fn(s->irdft, synth_pf, synth_f, sizeof(AVComplexFloat));
776  }
777 
778  /* merge filter output with the history of previous runs */
779  if (s->denoise_filter_cache_size) {
780  lim = FFMIN(s->denoise_filter_cache_size, size);
781  for (n = 0; n < lim; n++)
782  synth_pf[n] += s->denoise_filter_cache[n];
783  s->denoise_filter_cache_size -= lim;
784  memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
785  sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
786  }
787 
788  /* move remainder of filter output into a cache for future runs */
789  if (fcb_type != FCB_TYPE_SILENCE) {
790  lim = FFMIN(remainder, s->denoise_filter_cache_size);
791  for (n = 0; n < lim; n++)
792  s->denoise_filter_cache[n] += synth_pf[size + n];
793  if (lim < remainder) {
794  memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
795  sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
796  s->denoise_filter_cache_size = remainder;
797  }
798  }
799 }
800 
801 /**
802  * Averaging projection filter, the postfilter used in WMAVoice.
803  *
804  * This uses the following steps:
805  * - A zero-synthesis filter (generate excitation from synth signal)
806  * - Kalman smoothing on excitation, based on pitch
807  * - Re-synthesized smoothened output
808  * - Iterative Wiener denoise filter
809  * - Adaptive gain filter
810  * - DC filter
811  *
812  * @param s WMAVoice decoding context
813  * @param synth Speech synthesis output (before postfilter)
814  * @param samples Output buffer for filtered samples
815  * @param size Buffer size of synth & samples
816  * @param lpcs Generated LPCs used for speech synthesis
817  * @param zero_exc_pf destination for zero synthesis filter (16-byte aligned)
818  * @param fcb_type Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
819  * @param pitch Pitch of the input signal
820  */
821 static void postfilter(WMAVoiceContext *s, const float *synth,
822  float *samples, int size,
823  const float *lpcs, float *zero_exc_pf,
824  int fcb_type, int pitch)
825 {
826  float synth_filter_in_buf[MAX_FRAMESIZE / 2],
827  *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
828  *synth_filter_in = zero_exc_pf;
829 
830  av_assert0(size <= MAX_FRAMESIZE / 2);
831 
832  /* generate excitation from input signal */
833  ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
834 
835  if (fcb_type >= FCB_TYPE_AW_PULSES &&
836  !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
837  synth_filter_in = synth_filter_in_buf;
838 
839  /* re-synthesize speech after smoothening, and keep history */
840  ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
841  synth_filter_in, size, s->lsps);
842  memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
843  sizeof(synth_pf[0]) * s->lsps);
844 
845  wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
846 
847  adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
848  &s->postfilter_agc);
849 
850  if (s->dc_level > 8) {
851  /* remove ultra-low frequency DC noise / highpass filter;
852  * coefficients are identical to those used in SIPR decoding,
853  * and very closely resemble those used in AMR-NB decoding. */
855  (const float[2]) { -1.99997, 1.0 },
856  (const float[2]) { -1.9330735188, 0.93589198496 },
857  0.93980580475, s->dcf_mem, size);
858  }
859 }
860 /**
861  * @}
862  */
863 
864 /**
865  * Dequantize LSPs
866  * @param lsps output pointer to the array that will hold the LSPs
867  * @param num number of LSPs to be dequantized
868  * @param values quantized values, contains n_stages values
869  * @param sizes range (i.e. max value) of each quantized value
870  * @param n_stages number of dequantization runs
871  * @param table dequantization table to be used
872  * @param mul_q LSF multiplier
873  * @param base_q base (lowest) LSF values
874  */
875 static void dequant_lsps(double *lsps, int num,
876  const uint16_t *values,
877  const uint16_t *sizes,
878  int n_stages, const uint8_t *table,
879  const double *mul_q,
880  const double *base_q)
881 {
882  int n, m;
883 
884  memset(lsps, 0, num * sizeof(*lsps));
885  for (n = 0; n < n_stages; n++) {
886  const uint8_t *t_off = &table[values[n] * num];
887  double base = base_q[n], mul = mul_q[n];
888 
889  for (m = 0; m < num; m++)
890  lsps[m] += base + mul * t_off[m];
891 
892  table += sizes[n] * num;
893  }
894 }
895 
896 /**
897  * @name LSP dequantization routines
898  * LSP dequantization routines, for 10/16LSPs and independent/residual coding.
899  * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;
900  * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
901  * @{
902  */
903 /**
904  * Parse 10 independently-coded LSPs.
905  */
906 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
907 {
908  static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
909  static const double mul_lsf[4] = {
910  5.2187144800e-3, 1.4626986422e-3,
911  9.6179549166e-4, 1.1325736225e-3
912  };
913  static const double base_lsf[4] = {
914  M_PI * -2.15522e-1, M_PI * -6.1646e-2,
915  M_PI * -3.3486e-2, M_PI * -5.7408e-2
916  };
917  uint16_t v[4];
918 
919  v[0] = get_bits(gb, 8);
920  v[1] = get_bits(gb, 6);
921  v[2] = get_bits(gb, 5);
922  v[3] = get_bits(gb, 5);
923 
924  dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
925  mul_lsf, base_lsf);
926 }
927 
928 /**
929  * Parse 10 independently-coded LSPs, and then derive the tables to
930  * generate LSPs for the other frames from them (residual coding).
931  */
933  double *i_lsps, const double *old,
934  double *a1, double *a2, int q_mode)
935 {
936  static const uint16_t vec_sizes[3] = { 128, 64, 64 };
937  static const double mul_lsf[3] = {
938  2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
939  };
940  static const double base_lsf[3] = {
941  M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
942  };
943  const float (*ipol_tab)[2][10] = q_mode ?
945  uint16_t interpol, v[3];
946  int n;
947 
948  dequant_lsp10i(gb, i_lsps);
949 
950  interpol = get_bits(gb, 5);
951  v[0] = get_bits(gb, 7);
952  v[1] = get_bits(gb, 6);
953  v[2] = get_bits(gb, 6);
954 
955  for (n = 0; n < 10; n++) {
956  double delta = old[n] - i_lsps[n];
957  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
958  a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
959  }
960 
961  dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
962  mul_lsf, base_lsf);
963 }
964 
965 /**
966  * Parse 16 independently-coded LSPs.
967  */
968 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
969 {
970  static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
971  static const double mul_lsf[5] = {
972  3.3439586280e-3, 6.9908173703e-4,
973  3.3216608306e-3, 1.0334960326e-3,
974  3.1899104283e-3
975  };
976  static const double base_lsf[5] = {
977  M_PI * -1.27576e-1, M_PI * -2.4292e-2,
978  M_PI * -1.28094e-1, M_PI * -3.2128e-2,
979  M_PI * -1.29816e-1
980  };
981  uint16_t v[5];
982 
983  v[0] = get_bits(gb, 8);
984  v[1] = get_bits(gb, 6);
985  v[2] = get_bits(gb, 7);
986  v[3] = get_bits(gb, 6);
987  v[4] = get_bits(gb, 7);
988 
989  dequant_lsps( lsps, 5, v, vec_sizes, 2,
990  wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
991  dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
992  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
993  dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
994  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
995 }
996 
997 /**
998  * Parse 16 independently-coded LSPs, and then derive the tables to
999  * generate LSPs for the other frames from them (residual coding).
1000  */
1002  double *i_lsps, const double *old,
1003  double *a1, double *a2, int q_mode)
1004 {
1005  static const uint16_t vec_sizes[3] = { 128, 128, 128 };
1006  static const double mul_lsf[3] = {
1007  1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
1008  };
1009  static const double base_lsf[3] = {
1010  M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
1011  };
1012  const float (*ipol_tab)[2][16] = q_mode ?
1014  uint16_t interpol, v[3];
1015  int n;
1016 
1017  dequant_lsp16i(gb, i_lsps);
1018 
1019  interpol = get_bits(gb, 5);
1020  v[0] = get_bits(gb, 7);
1021  v[1] = get_bits(gb, 7);
1022  v[2] = get_bits(gb, 7);
1023 
1024  for (n = 0; n < 16; n++) {
1025  double delta = old[n] - i_lsps[n];
1026  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
1027  a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
1028  }
1029 
1030  dequant_lsps( a2, 10, v, vec_sizes, 1,
1031  wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
1032  dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
1033  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
1034  dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
1035  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
1036 }
1037 
1038 /**
1039  * @}
1040  * @name Pitch-adaptive window coding functions
1041  * The next few functions are for pitch-adaptive window coding.
1042  * @{
1043  */
1044 /**
1045  * Parse the offset of the first pitch-adaptive window pulses, and
1046  * the distribution of pulses between the two blocks in this frame.
1047  * @param s WMA Voice decoding context private data
1048  * @param gb bit I/O context
1049  * @param pitch pitch for each block in this frame
1050  */
1052  const int *pitch)
1053 {
1054  static const int16_t start_offset[94] = {
1055  -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1056  13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1057  27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1058  45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1059  69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1060  93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1061  117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1062  141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1063  };
1064  int bits, offset;
1065 
1066  /* position of pulse */
1067  s->aw_idx_is_ext = 0;
1068  if ((bits = get_bits(gb, 6)) >= 54) {
1069  s->aw_idx_is_ext = 1;
1070  bits += (bits - 54) * 3 + get_bits(gb, 2);
1071  }
1072 
1073  /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
1074  * the distribution of the pulses in each block contained in this frame. */
1075  s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
1076  for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1077  s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
1078  s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
1079  offset += s->aw_n_pulses[0] * pitch[0];
1080  s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
1081  s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
1082 
1083  /* if continuing from a position before the block, reset position to
1084  * start of block (when corrected for the range over which it can be
1085  * spread in aw_pulse_set1()). */
1086  if (start_offset[bits] < MAX_FRAMESIZE / 2) {
1087  while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
1088  s->aw_first_pulse_off[1] -= pitch[1];
1089  if (start_offset[bits] < 0)
1090  while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
1091  s->aw_first_pulse_off[0] -= pitch[0];
1092  }
1093 }
1094 
1095 /**
1096  * Apply second set of pitch-adaptive window pulses.
1097  * @param s WMA Voice decoding context private data
1098  * @param gb bit I/O context
1099  * @param block_idx block index in frame [0, 1]
1100  * @param fcb structure containing fixed codebook vector info
1101  * @return -1 on error, 0 otherwise
1102  */
1104  int block_idx, AMRFixed *fcb)
1105 {
1106  uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
1107  uint16_t *use_mask = use_mask_mem + 2;
1108  /* in this function, idx is the index in the 80-bit (+ padding) use_mask
1109  * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
1110  * of idx are the position of the bit within a particular item in the
1111  * array (0 being the most significant bit, and 15 being the least
1112  * significant bit), and the remainder (>> 4) is the index in the
1113  * use_mask[]-array. This is faster and uses less memory than using a
1114  * 80-byte/80-int array. */
1115  int pulse_off = s->aw_first_pulse_off[block_idx],
1116  pulse_start, n, idx, range, aidx, start_off = 0;
1117 
1118  /* set offset of first pulse to within this block */
1119  if (s->aw_n_pulses[block_idx] > 0)
1120  while (pulse_off + s->aw_pulse_range < 1)
1121  pulse_off += fcb->pitch_lag;
1122 
1123  /* find range per pulse */
1124  if (s->aw_n_pulses[0] > 0) {
1125  if (block_idx == 0) {
1126  range = 32;
1127  } else /* block_idx = 1 */ {
1128  range = 8;
1129  if (s->aw_n_pulses[block_idx] > 0)
1130  pulse_off = s->aw_next_pulse_off_cache;
1131  }
1132  } else
1133  range = 16;
1134  pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1135 
1136  /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
1137  * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
1138  * we exclude that range from being pulsed again in this function. */
1139  memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
1140  memset( use_mask, -1, 5 * sizeof(use_mask[0]));
1141  memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
1142  if (s->aw_n_pulses[block_idx] > 0)
1143  for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
1144  int excl_range = s->aw_pulse_range; // always 16 or 24
1145  uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1146  int first_sh = 16 - (idx & 15);
1147  *use_mask_ptr++ &= 0xFFFFu << first_sh;
1148  excl_range -= first_sh;
1149  if (excl_range >= 16) {
1150  *use_mask_ptr++ = 0;
1151  *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1152  } else
1153  *use_mask_ptr &= 0xFFFF >> excl_range;
1154  }
1155 
1156  /* find the 'aidx'th offset that is not excluded */
1157  aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
1158  for (n = 0; n <= aidx; pulse_start++) {
1159  for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
1160  if (idx >= MAX_FRAMESIZE / 2) { // find from zero
1161  if (use_mask[0]) idx = 0x0F;
1162  else if (use_mask[1]) idx = 0x1F;
1163  else if (use_mask[2]) idx = 0x2F;
1164  else if (use_mask[3]) idx = 0x3F;
1165  else if (use_mask[4]) idx = 0x4F;
1166  else return -1;
1167  idx -= av_log2_16bit(use_mask[idx >> 4]);
1168  }
1169  if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1170  use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1171  n++;
1172  start_off = idx;
1173  }
1174  }
1175 
1176  fcb->x[fcb->n] = start_off;
1177  fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
1178  fcb->n++;
1179 
1180  /* set offset for next block, relative to start of that block */
1181  n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
1182  s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
1183  return 0;
1184 }
1185 
1186 /**
1187  * Apply first set of pitch-adaptive window pulses.
1188  * @param s WMA Voice decoding context private data
1189  * @param gb bit I/O context
1190  * @param block_idx block index in frame [0, 1]
1191  * @param fcb storage location for fixed codebook pulse info
1192  */
1194  int block_idx, AMRFixed *fcb)
1195 {
1196  int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
1197  float v;
1198 
1199  if (s->aw_n_pulses[block_idx] > 0) {
1200  int n, v_mask, i_mask, sh, n_pulses;
1201 
1202  if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
1203  n_pulses = 3;
1204  v_mask = 8;
1205  i_mask = 7;
1206  sh = 4;
1207  } else { // 4 pulses, 1:sign + 2:index each
1208  n_pulses = 4;
1209  v_mask = 4;
1210  i_mask = 3;
1211  sh = 3;
1212  }
1213 
1214  for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1215  fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
1216  fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
1217  s->aw_first_pulse_off[block_idx];
1218  while (fcb->x[fcb->n] < 0)
1219  fcb->x[fcb->n] += fcb->pitch_lag;
1220  if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
1221  fcb->n++;
1222  }
1223  } else {
1224  int num2 = (val & 0x1FF) >> 1, delta, idx;
1225 
1226  if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
1227  else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
1228  else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
1229  else { delta = 7; idx = num2 + 1 - 3 * 75; }
1230  v = (val & 0x200) ? -1.0 : 1.0;
1231 
1232  fcb->no_repeat_mask |= 3 << fcb->n;
1233  fcb->x[fcb->n] = idx - delta;
1234  fcb->y[fcb->n] = v;
1235  fcb->x[fcb->n + 1] = idx;
1236  fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
1237  fcb->n += 2;
1238  }
1239 }
1240 
1241 /**
1242  * @}
1243  *
1244  * Generate a random number from frame_cntr and block_idx, which will live
1245  * in the range [0, 1000 - block_size] (so it can be used as an index in a
1246  * table of size 1000 of which you want to read block_size entries).
1247  *
1248  * @param frame_cntr current frame number
1249  * @param block_num current block index
1250  * @param block_size amount of entries we want to read from a table
1251  * that has 1000 entries
1252  * @return a (non-)random number in the [0, 1000 - block_size] range.
1253  */
1254 static int pRNG(int frame_cntr, int block_num, int block_size)
1255 {
1256  /* array to simplify the calculation of z:
1257  * y = (x % 9) * 5 + 6;
1258  * z = (49995 * x) / y;
1259  * Since y only has 9 values, we can remove the division by using a
1260  * LUT and using FASTDIV-style divisions. For each of the 9 values
1261  * of y, we can rewrite z as:
1262  * z = x * (49995 / y) + x * ((49995 % y) / y)
1263  * In this table, each col represents one possible value of y, the
1264  * first number is 49995 / y, and the second is the FASTDIV variant
1265  * of 49995 % y / y. */
1266  static const unsigned int div_tbl[9][2] = {
1267  { 8332, 3 * 715827883U }, // y = 6
1268  { 4545, 0 * 390451573U }, // y = 11
1269  { 3124, 11 * 268435456U }, // y = 16
1270  { 2380, 15 * 204522253U }, // y = 21
1271  { 1922, 23 * 165191050U }, // y = 26
1272  { 1612, 23 * 138547333U }, // y = 31
1273  { 1388, 27 * 119304648U }, // y = 36
1274  { 1219, 16 * 104755300U }, // y = 41
1275  { 1086, 39 * 93368855U } // y = 46
1276  };
1277  unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
1278  if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,
1279  // so this is effectively a modulo (%)
1280  y = x - 9 * MULH(477218589, x); // x % 9
1281  z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
1282  // z = x * 49995 / (y * 5 + 6)
1283  return z % (1000 - block_size);
1284 }
1285 
1286 /**
1287  * Parse hardcoded signal for a single block.
1288  * @note see #synth_block().
1289  */
1291  int block_idx, int size,
1292  const struct frame_type_desc *frame_desc,
1293  float *excitation)
1294 {
1295  float gain;
1296  int n, r_idx;
1297 
1299 
1300  /* Set the offset from which we start reading wmavoice_std_codebook */
1301  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1302  r_idx = pRNG(s->frame_cntr, block_idx, size);
1303  gain = s->silence_gain;
1304  } else /* FCB_TYPE_HARDCODED */ {
1305  r_idx = get_bits(gb, 8);
1306  gain = wmavoice_gain_universal[get_bits(gb, 6)];
1307  }
1308 
1309  /* Clear gain prediction parameters */
1310  memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
1311 
1312  /* Apply gain to hardcoded codebook and use that as excitation signal */
1313  for (n = 0; n < size; n++)
1314  excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
1315 }
1316 
1317 /**
1318  * Parse FCB/ACB signal for a single block.
1319  * @note see #synth_block().
1320  */
1322  int block_idx, int size,
1323  int block_pitch_sh2,
1324  const struct frame_type_desc *frame_desc,
1325  float *excitation)
1326 {
1327  static const float gain_coeff[6] = {
1328  0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1329  };
1330  float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
1331  int n, idx, gain_weight;
1332  AMRFixed fcb;
1333 
1334  av_assert0(size <= MAX_FRAMESIZE / 2);
1335  memset(pulses, 0, sizeof(*pulses) * size);
1336 
1337  fcb.pitch_lag = block_pitch_sh2 >> 2;
1338  fcb.pitch_fac = 1.0;
1339  fcb.no_repeat_mask = 0;
1340  fcb.n = 0;
1341 
1342  /* For the other frame types, this is where we apply the innovation
1343  * (fixed) codebook pulses of the speech signal. */
1344  if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1345  aw_pulse_set1(s, gb, block_idx, &fcb);
1346  if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
1347  /* Conceal the block with silence and return.
1348  * Skip the correct amount of bits to read the next
1349  * block from the correct offset. */
1350  int r_idx = pRNG(s->frame_cntr, block_idx, size);
1351 
1352  for (n = 0; n < size; n++)
1353  excitation[n] =
1354  wmavoice_std_codebook[r_idx + n] * s->silence_gain;
1355  skip_bits(gb, 7 + 1);
1356  return;
1357  }
1358  } else /* FCB_TYPE_EXC_PULSES */ {
1359  int offset_nbits = 5 - frame_desc->log_n_blocks;
1360 
1361  fcb.no_repeat_mask = -1;
1362  /* similar to ff_decode_10_pulses_35bits(), but with single pulses
1363  * (instead of double) for a subset of pulses */
1364  for (n = 0; n < 5; n++) {
1365  float sign;
1366  int pos1, pos2;
1367 
1368  sign = get_bits1(gb) ? 1.0 : -1.0;
1369  pos1 = get_bits(gb, offset_nbits);
1370  fcb.x[fcb.n] = n + 5 * pos1;
1371  fcb.y[fcb.n++] = sign;
1372  if (n < frame_desc->dbl_pulses) {
1373  pos2 = get_bits(gb, offset_nbits);
1374  fcb.x[fcb.n] = n + 5 * pos2;
1375  fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
1376  }
1377  }
1378  }
1379  ff_set_fixed_vector(pulses, &fcb, 1.0, size);
1380 
1381  /* Calculate gain for adaptive & fixed codebook signal.
1382  * see ff_amr_set_fixed_gain(). */
1383  idx = get_bits(gb, 7);
1384  fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err,
1385  gain_coeff, 6) -
1386  5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
1387  acb_gain = wmavoice_gain_codebook_acb[idx];
1388  pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
1389  -2.9957322736 /* log(0.05) */,
1390  1.6094379124 /* log(5.0) */);
1391 
1392  gain_weight = 8 >> frame_desc->log_n_blocks;
1393  memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
1394  sizeof(*s->gain_pred_err) * (6 - gain_weight));
1395  for (n = 0; n < gain_weight; n++)
1396  s->gain_pred_err[n] = pred_err;
1397 
1398  /* Calculation of adaptive codebook */
1399  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1400  int len;
1401  for (n = 0; n < size; n += len) {
1402  int next_idx_sh16;
1403  int abs_idx = block_idx * size + n;
1404  int pitch_sh16 = (s->last_pitch_val << 16) +
1405  s->pitch_diff_sh16 * abs_idx;
1406  int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1407  int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1408  idx = idx_sh16 >> 16;
1409  if (s->pitch_diff_sh16) {
1410  if (s->pitch_diff_sh16 > 0) {
1411  next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1412  } else
1413  next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1414  len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
1415  1, size - n);
1416  } else
1417  len = size;
1418 
1419  ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
1421  idx, 9, len);
1422  }
1423  } else /* ACB_TYPE_HAMMING */ {
1424  int block_pitch = block_pitch_sh2 >> 2;
1425  idx = block_pitch_sh2 & 3;
1426  if (idx) {
1427  ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
1429  idx, 8, size);
1430  } else
1431  av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
1432  sizeof(float) * size);
1433  }
1434 
1435  /* Interpolate ACB/FCB and use as excitation signal */
1436  ff_weighted_vector_sumf(excitation, excitation, pulses,
1437  acb_gain, fcb_gain, size);
1438 }
1439 
1440 /**
1441  * Parse data in a single block.
1442  *
1443  * @param s WMA Voice decoding context private data
1444  * @param gb bit I/O context
1445  * @param block_idx index of the to-be-read block
1446  * @param size amount of samples to be read in this block
1447  * @param block_pitch_sh2 pitch for this block << 2
1448  * @param lsps LSPs for (the end of) this frame
1449  * @param prev_lsps LSPs for the last frame
1450  * @param frame_desc frame type descriptor
1451  * @param excitation target memory for the ACB+FCB interpolated signal
1452  * @param synth target memory for the speech synthesis filter output
1453  * @return 0 on success, <0 on error.
1454  */
1456  int block_idx, int size,
1457  int block_pitch_sh2,
1458  const double *lsps, const double *prev_lsps,
1459  const struct frame_type_desc *frame_desc,
1460  float *excitation, float *synth)
1461 {
1462  double i_lsps[MAX_LSPS];
1463  float lpcs[MAX_LSPS];
1464  float fac;
1465  int n;
1466 
1467  if (frame_desc->acb_type == ACB_TYPE_NONE)
1468  synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
1469  else
1470  synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
1471  frame_desc, excitation);
1472 
1473  /* convert interpolated LSPs to LPCs */
1474  fac = (block_idx + 0.5) / frame_desc->n_blocks;
1475  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1476  i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1477  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1478 
1479  /* Speech synthesis */
1480  ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
1481 }
1482 
1483 /**
1484  * Synthesize output samples for a single frame.
1485  *
1486  * @param ctx WMA Voice decoder context
1487  * @param gb bit I/O context (s->gb or one for cross-packet superframes)
1488  * @param frame_idx Frame number within superframe [0-2]
1489  * @param samples pointer to output sample buffer, has space for at least 160
1490  * samples
1491  * @param lsps LSP array
1492  * @param prev_lsps array of previous frame's LSPs
1493  * @param excitation target buffer for excitation signal
1494  * @param synth target buffer for synthesized speech data
1495  * @return 0 on success, <0 on error.
1496  */
1497 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
1498  float *samples,
1499  const double *lsps, const double *prev_lsps,
1500  float *excitation, float *synth)
1501 {
1503  int n, n_blocks_x2, log_n_blocks_x2, av_uninit(cur_pitch_val);
1504  int pitch[MAX_BLOCKS], av_uninit(last_block_pitch);
1505 
1506  /* Parse frame type ("frame header"), see frame_descs */
1507  int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc, 6, 3)], block_nsamples;
1508 
1509  pitch[0] = INT_MAX;
1510 
1511  if (bd_idx < 0) {
1513  "Invalid frame type VLC code, skipping\n");
1514  return AVERROR_INVALIDDATA;
1515  }
1516 
1517  block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
1518 
1519  /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
1520  if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
1521  /* Pitch is provided per frame, which is interpreted as the pitch of
1522  * the last sample of the last block of this frame. We can interpolate
1523  * the pitch of other blocks (and even pitch-per-sample) by gradually
1524  * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
1525  n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
1526  log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
1527  cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
1528  cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
1529  if (s->last_acb_type == ACB_TYPE_NONE ||
1530  20 * abs(cur_pitch_val - s->last_pitch_val) >
1531  (cur_pitch_val + s->last_pitch_val))
1532  s->last_pitch_val = cur_pitch_val;
1533 
1534  /* pitch per block */
1535  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1536  int fac = n * 2 + 1;
1537 
1538  pitch[n] = (MUL16(fac, cur_pitch_val) +
1539  MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
1540  frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
1541  }
1542 
1543  /* "pitch-diff-per-sample" for calculation of pitch per sample */
1544  s->pitch_diff_sh16 =
1545  (cur_pitch_val - s->last_pitch_val) * (1 << 16) / MAX_FRAMESIZE;
1546  }
1547 
1548  /* Global gain (if silence) and pitch-adaptive window coordinates */
1549  switch (frame_descs[bd_idx].fcb_type) {
1550  case FCB_TYPE_SILENCE:
1551  s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
1552  break;
1553  case FCB_TYPE_AW_PULSES:
1554  aw_parse_coords(s, gb, pitch);
1555  break;
1556  }
1557 
1558  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1559  int bl_pitch_sh2;
1560 
1561  /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
1562  switch (frame_descs[bd_idx].acb_type) {
1563  case ACB_TYPE_HAMMING: {
1564  /* Pitch is given per block. Per-block pitches are encoded as an
1565  * absolute value for the first block, and then delta values
1566  * relative to this value) for all subsequent blocks. The scale of
1567  * this pitch value is semi-logarithmic compared to its use in the
1568  * decoder, so we convert it to normal scale also. */
1569  int block_pitch,
1570  t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
1571  t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
1572  t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
1573 
1574  if (n == 0) {
1575  block_pitch = get_bits(gb, s->block_pitch_nbits);
1576  } else
1577  block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
1578  get_bits(gb, s->block_delta_pitch_nbits);
1579  /* Convert last_ so that any next delta is within _range */
1580  last_block_pitch = av_clip(block_pitch,
1581  s->block_delta_pitch_hrange,
1582  s->block_pitch_range -
1583  s->block_delta_pitch_hrange);
1584 
1585  /* Convert semi-log-style scale back to normal scale */
1586  if (block_pitch < t1) {
1587  bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
1588  } else {
1589  block_pitch -= t1;
1590  if (block_pitch < t2) {
1591  bl_pitch_sh2 =
1592  (s->block_conv_table[1] << 2) + (block_pitch << 1);
1593  } else {
1594  block_pitch -= t2;
1595  if (block_pitch < t3) {
1596  bl_pitch_sh2 =
1597  (s->block_conv_table[2] + block_pitch) << 2;
1598  } else
1599  bl_pitch_sh2 = s->block_conv_table[3] << 2;
1600  }
1601  }
1602  pitch[n] = bl_pitch_sh2 >> 2;
1603  break;
1604  }
1605 
1606  case ACB_TYPE_ASYMMETRIC: {
1607  bl_pitch_sh2 = pitch[n] << 2;
1608  break;
1609  }
1610 
1611  default: // ACB_TYPE_NONE has no pitch
1612  bl_pitch_sh2 = 0;
1613  break;
1614  }
1615 
1616  synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1617  lsps, prev_lsps, &frame_descs[bd_idx],
1618  &excitation[n * block_nsamples],
1619  &synth[n * block_nsamples]);
1620  }
1621 
1622  /* Averaging projection filter, if applicable. Else, just copy samples
1623  * from synthesis buffer */
1624  if (s->do_apf) {
1625  double i_lsps[MAX_LSPS];
1626  float lpcs[MAX_LSPS];
1627 
1628  if(frame_descs[bd_idx].fcb_type >= FCB_TYPE_AW_PULSES && pitch[0] == INT_MAX)
1629  return AVERROR_INVALIDDATA;
1630 
1631  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1632  i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1633  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1634  postfilter(s, synth, samples, 80, lpcs,
1635  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
1636  frame_descs[bd_idx].fcb_type, pitch[0]);
1637 
1638  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1639  i_lsps[n] = cos(lsps[n]);
1640  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1641  postfilter(s, &synth[80], &samples[80], 80, lpcs,
1642  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
1643  frame_descs[bd_idx].fcb_type, pitch[0]);
1644  } else
1645  memcpy(samples, synth, 160 * sizeof(synth[0]));
1646 
1647  /* Cache values for next frame */
1648  s->frame_cntr++;
1649  if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
1650  s->last_acb_type = frame_descs[bd_idx].acb_type;
1651  switch (frame_descs[bd_idx].acb_type) {
1652  case ACB_TYPE_NONE:
1653  s->last_pitch_val = 0;
1654  break;
1655  case ACB_TYPE_ASYMMETRIC:
1656  s->last_pitch_val = cur_pitch_val;
1657  break;
1658  case ACB_TYPE_HAMMING:
1659  s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
1660  break;
1661  }
1662 
1663  return 0;
1664 }
1665 
1666 /**
1667  * Ensure minimum value for first item, maximum value for last value,
1668  * proper spacing between each value and proper ordering.
1669  *
1670  * @param lsps array of LSPs
1671  * @param num size of LSP array
1672  *
1673  * @note basically a double version of #ff_acelp_reorder_lsf(), might be
1674  * useful to put in a generic location later on. Parts are also
1675  * present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),
1676  * which is in float.
1677  */
1678 static void stabilize_lsps(double *lsps, int num)
1679 {
1680  int n, m, l;
1681 
1682  /* set minimum value for first, maximum value for last and minimum
1683  * spacing between LSF values.
1684  * Very similar to ff_set_min_dist_lsf(), but in double. */
1685  lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
1686  for (n = 1; n < num; n++)
1687  lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
1688  lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
1689 
1690  /* reorder (looks like one-time / non-recursed bubblesort).
1691  * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
1692  for (n = 1; n < num; n++) {
1693  if (lsps[n] < lsps[n - 1]) {
1694  for (m = 1; m < num; m++) {
1695  double tmp = lsps[m];
1696  for (l = m - 1; l >= 0; l--) {
1697  if (lsps[l] <= tmp) break;
1698  lsps[l + 1] = lsps[l];
1699  }
1700  lsps[l + 1] = tmp;
1701  }
1702  break;
1703  }
1704  }
1705 }
1706 
1707 /**
1708  * Synthesize output samples for a single superframe. If we have any data
1709  * cached in s->sframe_cache, that will be used instead of whatever is loaded
1710  * in s->gb.
1711  *
1712  * WMA Voice superframes contain 3 frames, each containing 160 audio samples,
1713  * to give a total of 480 samples per frame. See #synth_frame() for frame
1714  * parsing. In addition to 3 frames, superframes can also contain the LSPs
1715  * (if these are globally specified for all frames (residually); they can
1716  * also be specified individually per-frame. See the s->has_residual_lsps
1717  * option), and can specify the number of samples encoded in this superframe
1718  * (if less than 480), usually used to prevent blanks at track boundaries.
1719  *
1720  * @param ctx WMA Voice decoder context
1721  * @return 0 on success, <0 on error or 1 if there was not enough data to
1722  * fully parse the superframe
1723  */
1725  int *got_frame_ptr)
1726 {
1728  GetBitContext *gb = &s->gb, s_gb;
1729  int n, res, n_samples = MAX_SFRAMESIZE;
1730  double lsps[MAX_FRAMES][MAX_LSPS];
1731  const double *mean_lsf = s->lsps == 16 ?
1732  wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
1733  float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
1734  float synth[MAX_LSPS + MAX_SFRAMESIZE];
1735  float *samples;
1736 
1737  memcpy(synth, s->synth_history,
1738  s->lsps * sizeof(*synth));
1739  memcpy(excitation, s->excitation_history,
1740  s->history_nsamples * sizeof(*excitation));
1741 
1742  if (s->sframe_cache_size > 0) {
1743  gb = &s_gb;
1744  init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
1745  s->sframe_cache_size = 0;
1746  }
1747 
1748  /* First bit is speech/music bit, it differentiates between WMAVoice
1749  * speech samples (the actual codec) and WMAVoice music samples, which
1750  * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
1751  * the wild yet. */
1752  if (!get_bits1(gb)) {
1753  avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");
1754  return AVERROR_PATCHWELCOME;
1755  }
1756 
1757  /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
1758  if (get_bits1(gb)) {
1759  if ((n_samples = get_bits(gb, 12)) > MAX_SFRAMESIZE) {
1761  "Superframe encodes > %d samples (%d), not allowed\n",
1762  MAX_SFRAMESIZE, n_samples);
1763  return AVERROR_INVALIDDATA;
1764  }
1765  }
1766 
1767  /* Parse LSPs, if global for the superframe (can also be per-frame). */
1768  if (s->has_residual_lsps) {
1769  double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
1770 
1771  for (n = 0; n < s->lsps; n++)
1772  prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
1773 
1774  if (s->lsps == 10) {
1775  dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1776  } else /* s->lsps == 16 */
1777  dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1778 
1779  for (n = 0; n < s->lsps; n++) {
1780  lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1781  lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
1782  lsps[2][n] += mean_lsf[n];
1783  }
1784  for (n = 0; n < 3; n++)
1785  stabilize_lsps(lsps[n], s->lsps);
1786  }
1787 
1788  /* synth_superframe can run multiple times per packet
1789  * free potential previous frame */
1791 
1792  /* get output buffer */
1793  frame->nb_samples = MAX_SFRAMESIZE;
1794  if ((res = ff_get_buffer(ctx, frame, 0)) < 0)
1795  return res;
1796  frame->nb_samples = n_samples;
1797  samples = (float *)frame->data[0];
1798 
1799  /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
1800  for (n = 0; n < 3; n++) {
1801  if (!s->has_residual_lsps) {
1802  int m;
1803 
1804  if (s->lsps == 10) {
1805  dequant_lsp10i(gb, lsps[n]);
1806  } else /* s->lsps == 16 */
1807  dequant_lsp16i(gb, lsps[n]);
1808 
1809  for (m = 0; m < s->lsps; m++)
1810  lsps[n][m] += mean_lsf[m];
1811  stabilize_lsps(lsps[n], s->lsps);
1812  }
1813 
1814  if ((res = synth_frame(ctx, gb, n,
1815  &samples[n * MAX_FRAMESIZE],
1816  lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
1817  &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
1818  &synth[s->lsps + n * MAX_FRAMESIZE]))) {
1819  *got_frame_ptr = 0;
1820  return res;
1821  }
1822  }
1823 
1824  /* Statistics? FIXME - we don't check for length, a slight overrun
1825  * will be caught by internal buffer padding, and anything else
1826  * will be skipped, not read. */
1827  if (get_bits1(gb)) {
1828  res = get_bits(gb, 4);
1829  skip_bits(gb, 10 * (res + 1));
1830  }
1831 
1832  if (get_bits_left(gb) < 0) {
1834  return AVERROR_INVALIDDATA;
1835  }
1836 
1837  *got_frame_ptr = 1;
1838 
1839  /* Update history */
1840  memcpy(s->prev_lsps, lsps[2],
1841  s->lsps * sizeof(*s->prev_lsps));
1842  memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
1843  s->lsps * sizeof(*synth));
1844  memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
1845  s->history_nsamples * sizeof(*excitation));
1846  if (s->do_apf)
1847  memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
1848  s->history_nsamples * sizeof(*s->zero_exc_pf));
1849 
1850  return 0;
1851 }
1852 
1853 /**
1854  * Parse the packet header at the start of each packet (input data to this
1855  * decoder).
1856  *
1857  * @param s WMA Voice decoding context private data
1858  * @return <0 on error, nb_superframes on success.
1859  */
1861 {
1862  GetBitContext *gb = &s->gb;
1863  unsigned int res, n_superframes = 0;
1864 
1865  skip_bits(gb, 4); // packet sequence number
1866  s->has_residual_lsps = get_bits1(gb);
1867  do {
1868  if (get_bits_left(gb) < 6 + s->spillover_bitsize)
1869  return AVERROR_INVALIDDATA;
1870 
1871  res = get_bits(gb, 6); // number of superframes per packet
1872  // (minus first one if there is spillover)
1873  n_superframes += res;
1874  } while (res == 0x3F);
1875  s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
1876 
1877  return get_bits_left(gb) >= 0 ? n_superframes : AVERROR_INVALIDDATA;
1878 }
1879 
1880 /**
1881  * Copy (unaligned) bits from gb/data/size to pb.
1882  *
1883  * @param pb target buffer to copy bits into
1884  * @param data source buffer to copy bits from
1885  * @param size size of the source data, in bytes
1886  * @param gb bit I/O context specifying the current position in the source.
1887  * data. This function might use this to align the bit position to
1888  * a whole-byte boundary before calling #ff_copy_bits() on aligned
1889  * source data
1890  * @param nbits the amount of bits to copy from source to target
1891  *
1892  * @note after calling this function, the current position in the input bit
1893  * I/O context is undefined.
1894  */
1895 static void copy_bits(PutBitContext *pb,
1896  const uint8_t *data, int size,
1897  GetBitContext *gb, int nbits)
1898 {
1899  int rmn_bytes, rmn_bits;
1900 
1901  rmn_bits = rmn_bytes = get_bits_left(gb);
1902  if (rmn_bits < nbits)
1903  return;
1904  if (nbits > put_bits_left(pb))
1905  return;
1906  rmn_bits &= 7; rmn_bytes >>= 3;
1907  if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
1908  put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
1909  ff_copy_bits(pb, data + size - rmn_bytes,
1910  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1911 }
1912 
1913 /**
1914  * Packet decoding: a packet is anything that the (ASF) demuxer contains,
1915  * and we expect that the demuxer / application provides it to us as such
1916  * (else you'll probably get garbage as output). Every packet has a size of
1917  * ctx->block_align bytes, starts with a packet header (see
1918  * #parse_packet_header()), and then a series of superframes. Superframe
1919  * boundaries may exceed packets, i.e. superframes can split data over
1920  * multiple (two) packets.
1921  *
1922  * For more information about frames, see #synth_superframe().
1923  */
1925  int *got_frame_ptr, AVPacket *avpkt)
1926 {
1928  GetBitContext *gb = &s->gb;
1929  const uint8_t *buf = avpkt->data;
1930  uint8_t dummy[1];
1931  int size, res, pos;
1932 
1933  /* Packets are sometimes a multiple of ctx->block_align, with a packet
1934  * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
1935  * feeds us ASF packets, which may concatenate multiple "codec" packets
1936  * in a single "muxer" packet, so we artificially emulate that by
1937  * capping the packet size at ctx->block_align. */
1938  for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
1939  buf = size ? buf : dummy;
1940  res = init_get_bits8(&s->gb, buf, size);
1941  if (res < 0)
1942  return res;
1943 
1944  /* size == ctx->block_align is used to indicate whether we are dealing with
1945  * a new packet or a packet of which we already read the packet header
1946  * previously. */
1947  if (!(size % ctx->block_align)) { // new packet header
1948  if (!size) {
1949  s->spillover_nbits = 0;
1950  s->nb_superframes = 0;
1951  } else {
1952  if ((res = parse_packet_header(s)) < 0)
1953  return res;
1954  s->nb_superframes = res;
1955  }
1956 
1957  /* If the packet header specifies a s->spillover_nbits, then we want
1958  * to push out all data of the previous packet (+ spillover) before
1959  * continuing to parse new superframes in the current packet. */
1960  if (s->sframe_cache_size > 0) {
1961  int cnt = get_bits_count(gb);
1962  if (cnt + s->spillover_nbits > avpkt->size * 8) {
1963  s->spillover_nbits = avpkt->size * 8 - cnt;
1964  }
1965  copy_bits(&s->pb, buf, size, gb, s->spillover_nbits);
1966  flush_put_bits(&s->pb);
1967  s->sframe_cache_size += s->spillover_nbits;
1968  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) == 0 &&
1969  *got_frame_ptr) {
1970  cnt += s->spillover_nbits;
1971  s->skip_bits_next = cnt & 7;
1972  res = cnt >> 3;
1973  return res;
1974  } else
1975  skip_bits_long (gb, s->spillover_nbits - cnt +
1976  get_bits_count(gb)); // resync
1977  } else if (s->spillover_nbits) {
1978  skip_bits_long(gb, s->spillover_nbits); // resync
1979  }
1980  } else if (s->skip_bits_next)
1981  skip_bits(gb, s->skip_bits_next);
1982 
1983  /* Try parsing superframes in current packet */
1984  s->sframe_cache_size = 0;
1985  s->skip_bits_next = 0;
1986  pos = get_bits_left(gb);
1987  if (s->nb_superframes-- == 0) {
1988  *got_frame_ptr = 0;
1989  return size;
1990  } else if (s->nb_superframes > 0) {
1991  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) < 0) {
1992  return res;
1993  } else if (*got_frame_ptr) {
1994  int cnt = get_bits_count(gb);
1995  s->skip_bits_next = cnt & 7;
1996  res = cnt >> 3;
1997  return res;
1998  }
1999  } else if ((s->sframe_cache_size = pos) > 0) {
2000  /* ... cache it for spillover in next packet */
2001  init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
2002  copy_bits(&s->pb, buf, size, gb, s->sframe_cache_size);
2003  // FIXME bad - just copy bytes as whole and add use the
2004  // skip_bits_next field
2005  }
2006 
2007  return size;
2008 }
2009 
2011 {
2013 
2014  if (s->do_apf) {
2015  av_tx_uninit(&s->rdft);
2016  av_tx_uninit(&s->irdft);
2017  av_tx_uninit(&s->dct);
2018  av_tx_uninit(&s->dst);
2019  }
2020 
2021  return 0;
2022 }
2023 
2025  .p.name = "wmavoice",
2026  CODEC_LONG_NAME("Windows Media Audio Voice"),
2027  .p.type = AVMEDIA_TYPE_AUDIO,
2028  .p.id = AV_CODEC_ID_WMAVOICE,
2029  .priv_data_size = sizeof(WMAVoiceContext),
2031  .close = wmavoice_decode_end,
2033  .p.capabilities =
2034 #if FF_API_SUBFRAMES
2035  AV_CODEC_CAP_SUBFRAMES |
2036 #endif
2038  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
2039  .flush = wmavoice_flush,
2040 };
WMAVoiceContext::has_residual_lsps
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
Definition: wmavoice.c:193
skip_bits_long
static void skip_bits_long(GetBitContext *s, int n)
Skips the specified number of bits.
Definition: get_bits.h:278
AMRFixed::x
int x[10]
Definition: acelp_vectors.h:55
wmavoice_std_codebook
static const float wmavoice_std_codebook[1000]
Definition: wmavoice_data.h:2585
interpol
static int interpol(MBContext *s, uint32_t *color, int x, int y, int linesize)
Definition: vsrc_mandelbrot.c:186
MAX_LSPS
#define MAX_LSPS
maximum filter order
Definition: wmavoice.c:49
WMAVoiceContext::aw_next_pulse_off_cache
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned,...
Definition: wmavoice.c:242
WMAVoiceContext::max_pitch_val
int max_pitch_val
max value + 1 for pitch parsing
Definition: wmavoice.c:165
av_clip
#define av_clip
Definition: common.h:100
aw_pulse_set2
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
Definition: wmavoice.c:1103
FF_CODEC_CAP_INIT_CLEANUP
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: codec_internal.h:43
acelp_vectors.h
get_bits_left
static int get_bits_left(GetBitContext *gb)
Definition: get_bits.h:695
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ACB_TYPE_ASYMMETRIC
@ ACB_TYPE_ASYMMETRIC
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:71
wmavoice_dq_lsp10i
static const uint8_t wmavoice_dq_lsp10i[0xf00]
Definition: wmavoice_data.h:33
mem_internal.h
WMAVoiceContext::tilted_lpcs_pf
float tilted_lpcs_pf[0x82]
aligned buffer for LPC tilting
Definition: wmavoice.c:280
out
FILE * out
Definition: movenc.c:55
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:251
thread.h
frame_descs
static const struct frame_type_desc frame_descs[17]
AVTXContext
Definition: tx_priv.h:235
wmavoice_dq_lsp16r3
static const uint8_t wmavoice_dq_lsp16r3[0x600]
Definition: wmavoice_data.h:1526
dequant_lsps
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
Definition: wmavoice.c:875
init_put_bits
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:62
WMAVoiceContext::excitation_history
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of previous superframes, used as a history for signal generation
Definition: wmavoice.c:252
get_bits_count
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:266
av_log2_16bit
int av_log2_16bit(unsigned v)
Definition: intmath.c:31
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:389
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:223
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
aw_pulse_set1
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
Definition: wmavoice.c:1193
ACB_TYPE_HAMMING
@ ACB_TYPE_HAMMING
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:76
ff_acelp_apply_order_2_transfer_function
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
Definition: acelp_filters.c:121
AVPacket::data
uint8_t * data
Definition: packet.h:539
pRNG
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will live in the range [0,...
Definition: wmavoice.c:1254
ff_wmavoice_decoder
const FFCodec ff_wmavoice_decoder
Definition: wmavoice.c:2024
table
static const uint16_t table[]
Definition: prosumer.c:203
data
const char data[16]
Definition: mxf.c:149
WMAVoiceContext::silence_gain
float silence_gain
set for use in blocks if ACB_TYPE_NONE
Definition: wmavoice.c:227
expf
#define expf(x)
Definition: libm.h:283
WMAVoiceContext::denoise_filter_cache_size
int denoise_filter_cache_size
samples in denoise_filter_cache
Definition: wmavoice.c:279
wmavoice_denoise_power_table
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
Definition: wmavoice_data.h:3064
wmavoice_gain_codebook_acb
static const float wmavoice_gain_codebook_acb[128]
Definition: wmavoice_data.h:2874
FFCodec
Definition: codec_internal.h:127
base
uint8_t base
Definition: vp3data.h:128
AVComplexFloat
Definition: tx.h:27
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
FCB_TYPE_AW_PULSES
@ FCB_TYPE_AW_PULSES
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:90
ff_celp_lp_synthesis_filterf
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
Definition: celp_filters.c:85
WMAVoiceContext::aw_idx_is_ext
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
Definition: wmavoice.c:229
init_get_bits
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
Definition: get_bits.h:514
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:903
WMAVoiceContext::dc_level
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
Definition: wmavoice.c:156
wmavoice_dq_lsp16i1
static const uint8_t wmavoice_dq_lsp16i1[0x640]
Definition: wmavoice_data.h:420
WMAVoiceContext::block_conv_table
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
Definition: wmavoice.c:177
frame_type_desc::log_n_blocks
uint8_t log_n_blocks
log2(n_blocks)
Definition: wmavoice.c:103
skip_bits
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:381
WMAVoiceContext::aw_pulse_range
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
Definition: wmavoice.c:231
get_bits
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:335
ff_copy_bits
void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
Definition: bitstream.c:49
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:131
av_ceil_log2
#define av_ceil_log2
Definition: common.h:97
FCB_TYPE_HARDCODED
@ FCB_TYPE_HARDCODED
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:88
AMRFixed::pitch_fac
float pitch_fac
Definition: acelp_vectors.h:59
dummy
int dummy
Definition: motion.c:66
GetBitContext
Definition: get_bits.h:108
MULH
#define MULH
Definition: mathops.h:42
wmavoice_flush
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:329
put_bits_left
static int put_bits_left(PutBitContext *s)
Definition: put_bits.h:125
frame_type_desc::n_blocks
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
Definition: wmavoice.c:101
val
static double val(void *priv, double ch)
Definition: aeval.c:77
WMAVoiceContext::irdft_fn
av_tx_fn irdft_fn
postfilter (for denoise filter)
Definition: wmavoice.c:267
dequant_lsp10i
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
Definition: wmavoice.c:906
synth_block
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
Definition: wmavoice.c:1455
MAX_SFRAMESIZE
#define MAX_SFRAMESIZE
maximum number of samples per superframe
Definition: wmavoice.c:55
wmavoice_gain_codebook_fcb
static const float wmavoice_gain_codebook_fcb[128]
Definition: wmavoice_data.h:2893
a2
static double a2(void *priv, double x, double y)
Definition: vf_xfade.c:2030
WMAVoiceContext::denoise_filter_cache
float denoise_filter_cache[MAX_FRAMESIZE]
Definition: wmavoice.c:278
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
WMAVoiceContext::sin
float sin[511]
Definition: wmavoice.c:270
calc_input_response
static void calc_input_response(WMAVoiceContext *s, float *lpcs_src, int fcb_type, float *coeffs_dst, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
Definition: wmavoice.c:613
AV_CODEC_ID_WMAVOICE
@ AV_CODEC_ID_WMAVOICE
Definition: codec_id.h:482
lrint
#define lrint
Definition: tablegen.h:53
MUL16
#define MUL16(ra, rb)
Definition: mathops.h:87
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:205
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
init_get_bits8
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:545
MAX_LSPS_ALIGN16
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
Definition: wmavoice.c:50
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:151
av_memcpy_backptr
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
Overlapping memcpy() implementation.
Definition: mem.c:447
float
float
Definition: af_crystalizer.c:122
wmavoice_dq_lsp10r
static const uint8_t wmavoice_dq_lsp10r[0x1400]
Definition: wmavoice_data.h:749
FF_CODEC_DECODE_CB
#define FF_CODEC_DECODE_CB(func)
Definition: codec_internal.h:311
WMAVoiceContext::sframe_cache_size
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
Definition: wmavoice.c:205
WMAVoiceContext::dst
AVTXContext * dst
contexts for phase shift (in Hilbert
Definition: wmavoice.c:268
s
#define s(width, name)
Definition: cbs_vp9.c:198
WMAVoiceContext::lsp_q_mode
int lsp_q_mode
defines quantizer defaults [0, 1]
Definition: wmavoice.c:160
frame_type_desc::fcb_type
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
Definition: wmavoice.c:105
log_range
#define log_range(var, assign)
WMAVoiceContext::prev_lsps
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
Definition: wmavoice.c:221
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
WMAVoiceContext::aw_n_pulses
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
Definition: wmavoice.c:237
AMRFixed
Sparse representation for the algebraic codebook (fixed) vector.
Definition: acelp_vectors.h:53
bits
uint8_t bits
Definition: vp3data.h:128
adaptive_gain_control
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
Definition: wmavoice.c:513
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
wmavoice_lsp16_intercoeff_a
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
Definition: wmavoice_data.h:2047
ctx
AVFormatContext * ctx
Definition: movenc.c:49
decode.h
FCB_TYPE_EXC_PULSES
@ FCB_TYPE_EXC_PULSES
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.
Definition: wmavoice.c:92
get_bits.h
wmavoice_mean_lsf10
static const double wmavoice_mean_lsf10[2][10]
Definition: wmavoice_data.h:2565
WMAVoiceContext::spillover_nbits
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
Definition: wmavoice.c:189
UMULH
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
Definition: mathops.h:67
AMRFixed::y
float y[10]
Definition: acelp_vectors.h:56
WMAVoiceContext::denoise_coeffs_pf
float denoise_coeffs_pf[0x82]
aligned buffer for denoise coefficients
Definition: wmavoice.c:282
wmavoice_gain_silence
static const float wmavoice_gain_silence[256]
Definition: wmavoice_data.h:2788
PutBitContext
Definition: put_bits.h:50
WMAVoiceContext::vbm_tree
int8_t vbm_tree[25]
converts VLC codes to frame type
Definition: wmavoice.c:141
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:296
WMAVoiceContext::dct_fn
av_tx_fn dct_fn
Definition: wmavoice.c:269
wmavoice_dq_lsp16i3
static const uint8_t wmavoice_dq_lsp16i3[0x300]
Definition: wmavoice_data.h:682
if
if(ret)
Definition: filter_design.txt:179
AMRFixed::no_repeat_mask
int no_repeat_mask
Definition: acelp_vectors.h:57
postfilter
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
Definition: wmavoice.c:821
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:203
NULL
#define NULL
Definition: coverity.c:32
sizes
static const int sizes[][2]
Definition: img2dec.c:60
WMAVoiceContext::history_nsamples
int history_nsamples
number of samples in history for signal prediction (through ACB)
Definition: wmavoice.c:146
WMAVoiceContext::synth_history
float synth_history[MAX_LSPS]
see excitation_history
Definition: wmavoice.c:256
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
AVERROR_PATCHWELCOME
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:64
last_coeff
static const uint8_t last_coeff[3]
Definition: qdm2data.h:187
WMAVoiceContext::denoise_strength
int denoise_strength
strength of denoising in Wiener filter [0-11]
Definition: wmavoice.c:152
MAX_SIGNAL_HISTORY
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
Definition: wmavoice.c:54
WMAVoiceContext::sframe_cache
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+AV_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
Definition: wmavoice.c:202
get_bits1
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:388
dequant_lsp10r
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:932
WMAVoiceContext::pitch_nbits
int pitch_nbits
number of bits used to specify the pitch value in the frame header
Definition: wmavoice.c:166
WMAVoiceContext::block_delta_pitch_nbits
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value,...
Definition: wmavoice.c:171
kalman_smoothen
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
Definition: wmavoice.c:554
WMAVoiceContext::skip_bits_next
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
Definition: wmavoice.c:198
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
abs
#define abs(x)
Definition: cuda_runtime.h:35
WMAVoiceContext::dst_fn
av_tx_fn dst_fn
transform, part of postfilter)
Definition: wmavoice.c:269
WMAVoiceContext::rdft
AVTXContext * rdft
Definition: wmavoice.c:266
celp_filters.h
MAX_FRAMESIZE
#define MAX_FRAMESIZE
maximum number of samples per frame
Definition: wmavoice.c:53
av_clipf
av_clipf
Definition: af_crystalizer.c:122
MAX_FRAMES
#define MAX_FRAMES
maximum number of frames per superframe
Definition: wmavoice.c:52
get_vlc2
static av_always_inline int get_vlc2(GetBitContext *s, const VLCElem *table, int bits, int max_depth)
Parse a vlc code.
Definition: get_bits.h:652
decode_vbmtree
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
Definition: wmavoice.c:301
AVOnce
#define AVOnce
Definition: thread.h:202
aw_parse_coords
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
Definition: wmavoice.c:1051
wmavoice_init_static_data
static av_cold void wmavoice_init_static_data(void)
Definition: wmavoice.c:315
float_dsp.h
WMAVoiceContext::dcf_mem
float dcf_mem[2]
DC filter history.
Definition: wmavoice.c:274
ff_get_buffer
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: decode.c:1697
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:52
parse_packet_header
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder).
Definition: wmavoice.c:1860
AV_TX_FLOAT_DCT_I
@ AV_TX_FLOAT_DCT_I
Discrete Cosine Transform I.
Definition: tx.h:116
AVPacket::size
int size
Definition: packet.h:540
powf
#define powf(x, y)
Definition: libm.h:50
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:317
codec_internal.h
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:109
WMAVoiceContext::spillover_bitsize
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
Definition: wmavoice.c:143
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
WMAVoiceContext::pb
PutBitContext pb
bitstream writer for sframe_cache
Definition: wmavoice.c:210
WMAVoiceContext::last_pitch_val
int last_pitch_val
pitch value of the previous frame
Definition: wmavoice.c:223
size
int size
Definition: twinvq_data.h:10344
wiener_denoise
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.
Definition: wmavoice.c:737
VLCElem
Definition: vlc.h:32
wmavoice_lsp10_intercoeff_b
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
Definition: wmavoice_data.h:1852
range
enum AVColorRange range
Definition: mediacodec_wrapper.c:2594
dequant_lsp16i
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
Definition: wmavoice.c:968
wmavoice_dq_lsp16r1
static const uint8_t wmavoice_dq_lsp16r1[0x500]
Definition: wmavoice_data.h:1264
WMAVoiceContext::aw_first_pulse_off
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
Definition: wmavoice.c:240
WMAVoiceContext::zero_exc_pf
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e.
Definition: wmavoice.c:275
sinewin.h
wmavoice_dq_lsp16r2
static const uint8_t wmavoice_dq_lsp16r2[0x500]
Definition: wmavoice_data.h:1395
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
frame_type_desc
Description of frame types.
Definition: wmavoice.c:100
WMAVoiceContext::block_pitch_range
int block_pitch_range
range of the block pitch
Definition: wmavoice.c:170
stabilize_lsps
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
Definition: wmavoice.c:1678
M_PI
#define M_PI
Definition: mathematics.h:67
ff_tilt_compensation
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
Definition: acelp_filters.c:138
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:295
wmavoice_energy_table
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
Definition: wmavoice_data.h:3026
ff_sine_window_init
void ff_sine_window_init(float *window, int n)
Generate a sine window.
Definition: sinewin_tablegen.h:59
wmavoice_decode_init
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:360
WMAVoiceContext::block_delta_pitch_hrange
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
Definition: wmavoice.c:175
wmavoice_ipol2_coeffs
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
Definition: wmavoice_data.h:3012
WMAVoiceContext::pitch_diff_sh16
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
Definition: wmavoice.c:225
WMAVoiceContext::gain_pred_err
float gain_pred_err[6]
cache for gain prediction
Definition: wmavoice.c:251
WMAVoiceContext::rdft_fn
av_tx_fn rdft_fn
Definition: wmavoice.c:267
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
WMAVoiceContext::nb_superframes
int nb_superframes
number of superframes in current packet
Definition: wmavoice.c:250
WMAVoiceContext::cos
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
Definition: wmavoice.c:270
WMAVoiceContext::denoise_tilt_corr
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
Definition: wmavoice.c:154
delta
float delta
Definition: vorbis_enc_data.h:430
wmavoice_lsp16_intercoeff_b
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
Definition: wmavoice_data.h:2306
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:622
acelp_filters.h
ff_weighted_vector_sumf
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
Definition: acelp_vectors.c:182
WMAVoiceContext::lsp_def_mode
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
Definition: wmavoice.c:161
wmavoice_gain_universal
static const float wmavoice_gain_universal[64]
Definition: wmavoice_data.h:2855
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:194
len
int len
Definition: vorbis_enc_data.h:426
WMAVoiceContext::synth_filter_out_buf
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
Definition: wmavoice.c:284
tilt_factor
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
Definition: wmavoice.c:600
VLC_NBITS
#define VLC_NBITS
number of bits to read per VLC iteration
Definition: wmavoice.c:59
wmavoice_data.h
Windows Media Voice (WMAVoice) tables.
avcodec.h
WMAVoiceContext::min_pitch_val
int min_pitch_val
base value for pitch parsing code
Definition: wmavoice.c:164
WMAVoiceContext::last_acb_type
int last_acb_type
frame type [0-2] of the previous frame
Definition: wmavoice.c:224
WMAVoiceContext::dct
AVTXContext * dct
Definition: wmavoice.c:268
av_uninit
#define av_uninit(x)
Definition: attributes.h:154
ret
ret
Definition: filter_design.txt:187
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
lsp.h
ff_celp_lp_zero_synthesis_filterf
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
Definition: celp_filters.c:200
WMAVoiceContext::do_apf
int do_apf
whether to apply the averaged projection filter (APF)
Definition: wmavoice.c:150
pos
unsigned int pos
Definition: spdifenc.c:414
AMRFixed::n
int n
Definition: acelp_vectors.h:54
wmavoice_dq_lsp16i2
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
Definition: wmavoice_data.h:583
AV_INPUT_BUFFER_PADDING_SIZE
#define AV_INPUT_BUFFER_PADDING_SIZE
Definition: defs.h:40
wmavoice_mean_lsf16
static const double wmavoice_mean_lsf16[2][16]
Definition: wmavoice_data.h:2574
AV_RL32
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_RL32
Definition: bytestream.h:92
U
#define U(x)
Definition: vpx_arith.h:37
AV_TX_FLOAT_RDFT
@ AV_TX_FLOAT_RDFT
Real to complex and complex to real DFTs.
Definition: tx.h:90
wmavoice_decode_end
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:2010
WMAVoiceContext::lsps
int lsps
number of LSPs per frame [10 or 16]
Definition: wmavoice.c:159
AVCodecContext
main external API structure.
Definition: avcodec.h:451
wmavoice_decode_packet
static int wmavoice_decode_packet(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1924
channel_layout.h
WMAVoiceContext::block_pitch_nbits
int block_pitch_nbits
number of bits used to specify the first block's pitch value
Definition: wmavoice.c:168
AV_TX_FLOAT_DST_I
@ AV_TX_FLOAT_DST_I
Discrete Sine Transform I.
Definition: tx.h:128
ACB_TYPE_NONE
@ ACB_TYPE_NONE
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:70
synth_superframe
static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
Synthesize output samples for a single superframe.
Definition: wmavoice.c:1724
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:440
WMAVoiceContext::frame_cntr
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
Definition: wmavoice.c:248
wmavoice_ipol1_coeffs
static const float wmavoice_ipol1_coeffs[17 *9]
Definition: wmavoice_data.h:2960
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
ff_set_fixed_vector
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
Definition: acelp_vectors.c:224
mean_lsf
static const float mean_lsf[10]
Definition: siprdata.h:27
AV_CODEC_CAP_DELAY
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:76
samples
Filter the word “frame” indicates either a video frame or a group of audio samples
Definition: filter_design.txt:8
copy_bits
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
Definition: wmavoice.c:1895
avpriv_scalarproduct_float_c
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors of floats.
Definition: float_dsp.c:124
synth_frame
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
Definition: wmavoice.c:1497
mem.h
M_LN10
#define M_LN10
Definition: mathematics.h:49
WMAVoiceContext::gb
GetBitContext gb
packet bitreader.
Definition: wmavoice.c:137
avpriv_request_sample
#define avpriv_request_sample(...)
Definition: tableprint_vlc.h:36
synth_block_fcb_acb
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
Definition: wmavoice.c:1321
flush_put_bits
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:143
AV_CHANNEL_LAYOUT_MONO
#define AV_CHANNEL_LAYOUT_MONO
Definition: channel_layout.h:392
VLC_INIT_STATIC_TABLE_FROM_LENGTHS
#define VLC_INIT_STATIC_TABLE_FROM_LENGTHS(vlc_table, nb_bits, nb_codes, lens, lens_wrap, syms, syms_wrap, syms_size, offset, flags)
Definition: vlc.h:280
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:291
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
AVPacket
This structure stores compressed data.
Definition: packet.h:516
synth_block_hardcoded
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
Definition: wmavoice.c:1290
SFRAME_CACHE_MAXSIZE
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
Definition: wmavoice.c:57
frame_type_vlc
static VLCElem frame_type_vlc[132]
Frame type VLC coding.
Definition: wmavoice.c:64
AMRFixed::pitch_lag
int pitch_lag
Definition: acelp_vectors.h:58
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
WMAVoiceContext::irdft
AVTXContext * irdft
contexts for FFT-calculation in the
Definition: wmavoice.c:266
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
wmavoice_lsp10_intercoeff_a
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
Definition: wmavoice_data.h:1657
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
a1
static double a1(void *priv, double x, double y)
Definition: vf_xfade.c:2029
dequant_lsp16r
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:1001
frame_type_desc::dbl_pulses
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
Definition: wmavoice.c:106
frame_type_desc::acb_type
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
Definition: wmavoice.c:104
MAX_BLOCKS
#define MAX_BLOCKS
maximum number of blocks per frame
Definition: wmavoice.c:48
ff_acelp_lspd2lpc
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
Definition: lsp.c:220
WMAVoiceContext::postfilter_agc
float postfilter_agc
gain control memory, used in adaptive_gain_control()
Definition: wmavoice.c:272
put_bits.h
pulses
static const int8_t pulses[4]
Number of non-zero pulses in the MP-MLQ excitation.
Definition: g723_1.h:260
ff_acelp_interpolatef
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
Definition: acelp_filters.c:80
AVFormatContext::priv_data
void * priv_data
Format private data.
Definition: avformat.h:1328
FCB_TYPE_SILENCE
@ FCB_TYPE_SILENCE
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:85
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:60
av_clipd
av_clipd
Definition: af_crystalizer.c:132
tx.h
min
float min
Definition: vorbis_enc_data.h:429
WMAVoiceContext
WMA Voice decoding context.
Definition: wmavoice.c:132