FFmpeg
Data Structures | Macros | Enumerations | Functions | Variables
wmavoice.c File Reference

Windows Media Audio Voice compatible decoder. More...

#include <math.h>
#include "libavutil/channel_layout.h"
#include "libavutil/float_dsp.h"
#include "libavutil/mem.h"
#include "libavutil/mem_internal.h"
#include "libavutil/thread.h"
#include "libavutil/tx.h"
#include "avcodec.h"
#include "codec_internal.h"
#include "decode.h"
#include "get_bits.h"
#include "put_bits.h"
#include "wmavoice_data.h"
#include "celp_filters.h"
#include "acelp_vectors.h"
#include "acelp_filters.h"
#include "lsp.h"
#include "sinewin.h"

Go to the source code of this file.

Data Structures

struct  frame_type_desc
 Description of frame types. More...
 
struct  WMAVoiceContext
 WMA Voice decoding context. More...
 

Macros

#define MAX_BLOCKS   8
 maximum number of blocks per frame More...
 
#define MAX_LSPS   16
 maximum filter order More...
 
#define MAX_LSPS_ALIGN16   16
 same as MAX_LSPS; needs to be multiple More...
 
#define MAX_FRAMES   3
 maximum number of frames per superframe More...
 
#define MAX_FRAMESIZE   160
 maximum number of samples per frame More...
 
#define MAX_SIGNAL_HISTORY   416
 maximum excitation signal history More...
 
#define MAX_SFRAMESIZE   (MAX_FRAMESIZE * MAX_FRAMES)
 maximum number of samples per superframe More...
 
#define SFRAME_CACHE_MAXSIZE   256
 maximum cache size for frame data that More...
 
#define VLC_NBITS   6
 number of bits to read per VLC iteration More...
 
#define log_range(var, assign)
 

Enumerations

enum  { ACB_TYPE_NONE = 0, ACB_TYPE_ASYMMETRIC = 1, ACB_TYPE_HAMMING = 2 }
 Adaptive codebook types. More...
 
enum  { FCB_TYPE_SILENCE = 0, FCB_TYPE_HARDCODED = 1, FCB_TYPE_AW_PULSES = 2, FCB_TYPE_EXC_PULSES = 3 }
 Fixed codebook types. More...
 

Functions

static av_cold int decode_vbmtree (GetBitContext *gb, int8_t vbm_tree[25])
 Set up the variable bit mode (VBM) tree from container extradata. More...
 
static av_cold void wmavoice_init_static_data (void)
 
static av_cold void wmavoice_flush (AVCodecContext *ctx)
 
static av_cold int wmavoice_decode_init (AVCodecContext *ctx)
 Set up decoder with parameters from demuxer (extradata etc.). More...
 
static void dequant_lsps (double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
 Dequantize LSPs. More...
 
static int pRNG (int frame_cntr, int block_num, int block_size)
 Generate a random number from frame_cntr and block_idx, which will live in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries). More...
 
static void synth_block_hardcoded (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
 Parse hardcoded signal for a single block. More...
 
static void synth_block_fcb_acb (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
 Parse FCB/ACB signal for a single block. More...
 
static void synth_block (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
 Parse data in a single block. More...
 
static int synth_frame (AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
 Synthesize output samples for a single frame. More...
 
static void stabilize_lsps (double *lsps, int num)
 Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering. More...
 
static int synth_superframe (AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
 Synthesize output samples for a single superframe. More...
 
static int parse_packet_header (WMAVoiceContext *s)
 Parse the packet header at the start of each packet (input data to this decoder). More...
 
static void copy_bits (PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
 Copy (unaligned) bits from gb/data/size to pb. More...
 
static int wmavoice_decode_packet (AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
 Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output). More...
 
static av_cold int wmavoice_decode_end (AVCodecContext *ctx)
 
Postfilter functions

Postfilter functions (gain control, wiener denoise filter, DC filter, kalman smoothening, plus surrounding code to wrap it)

static void adaptive_gain_control (float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
 Adaptive gain control (as used in postfilter). More...
 
static int kalman_smoothen (WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
 Kalman smoothing function. More...
 
static float tilt_factor (const float *lpcs, int n_lpcs)
 Get the tilt factor of a formant filter from its transfer function. More...
 
static void calc_input_response (WMAVoiceContext *s, float *lpcs_src, int fcb_type, float *coeffs_dst, int remainder)
 Derive denoise filter coefficients (in real domain) from the LPCs. More...
 
static void wiener_denoise (WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
 This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it. More...
 
static void postfilter (WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
 Averaging projection filter, the postfilter used in WMAVoice. More...
 
LSP dequantization routines

LSP dequantization routines, for 10/16LSPs and independent/residual coding.

lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits; lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.

static void dequant_lsp10i (GetBitContext *gb, double *lsps)
 Parse 10 independently-coded LSPs. More...
 
static void dequant_lsp10r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
 Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). More...
 
static void dequant_lsp16i (GetBitContext *gb, double *lsps)
 Parse 16 independently-coded LSPs. More...
 
static void dequant_lsp16r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
 Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). More...
 
Pitch-adaptive window coding functions

The next few functions are for pitch-adaptive window coding.

static void aw_parse_coords (WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
 Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame. More...
 
static int aw_pulse_set2 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
 Apply second set of pitch-adaptive window pulses. More...
 
static void aw_pulse_set1 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
 Apply first set of pitch-adaptive window pulses. More...
 

Variables

static VLCElem frame_type_vlc [132]
 Frame type VLC coding. More...
 
static const struct frame_type_desc frame_descs [17]
 
const FFCodec ff_wmavoice_decoder
 

Detailed Description

Windows Media Audio Voice compatible decoder.

Author
Ronald S. Bultje rsbul.nosp@m.tje@.nosp@m.gmail.nosp@m..com

Definition in file wmavoice.c.

Macro Definition Documentation

◆ MAX_BLOCKS

#define MAX_BLOCKS   8

maximum number of blocks per frame

Definition at line 48 of file wmavoice.c.

◆ MAX_LSPS

#define MAX_LSPS   16

maximum filter order

Definition at line 49 of file wmavoice.c.

◆ MAX_LSPS_ALIGN16

#define MAX_LSPS_ALIGN16   16

same as MAX_LSPS; needs to be multiple

of 16 for ASM input buffer alignment

Definition at line 50 of file wmavoice.c.

◆ MAX_FRAMES

#define MAX_FRAMES   3

maximum number of frames per superframe

Definition at line 52 of file wmavoice.c.

◆ MAX_FRAMESIZE

#define MAX_FRAMESIZE   160

maximum number of samples per frame

Definition at line 53 of file wmavoice.c.

◆ MAX_SIGNAL_HISTORY

#define MAX_SIGNAL_HISTORY   416

maximum excitation signal history

Definition at line 54 of file wmavoice.c.

◆ MAX_SFRAMESIZE

#define MAX_SFRAMESIZE   (MAX_FRAMESIZE * MAX_FRAMES)

maximum number of samples per superframe

Definition at line 55 of file wmavoice.c.

◆ SFRAME_CACHE_MAXSIZE

#define SFRAME_CACHE_MAXSIZE   256

maximum cache size for frame data that

was split over two packets

Definition at line 57 of file wmavoice.c.

◆ VLC_NBITS

#define VLC_NBITS   6

number of bits to read per VLC iteration

Definition at line 59 of file wmavoice.c.

◆ log_range

#define log_range (   var,
  assign 
)
Value:
do { \
float tmp = log10f(assign); var = tmp; \
max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
} while (0)

Enumeration Type Documentation

◆ anonymous enum

anonymous enum

Adaptive codebook types.

Enumerator
ACB_TYPE_NONE 

no adaptive codebook (only hardcoded fixed)

ACB_TYPE_ASYMMETRIC 

adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.

Signal is generated using an asymmetric sinc window function

Note
see wmavoice_ipol1_coeffs
ACB_TYPE_HAMMING 

Per-block pitch with signal generation using a Hamming sinc window function.

Note
see wmavoice_ipol2_coeffs

Definition at line 69 of file wmavoice.c.

◆ anonymous enum

anonymous enum

Fixed codebook types.

Enumerator
FCB_TYPE_SILENCE 

comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain values

FCB_TYPE_HARDCODED 

hardcoded (fixed) codebook with per-block gain values

FCB_TYPE_AW_PULSES 

Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.

FCB_TYPE_EXC_PULSES 

Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.

Definition at line 84 of file wmavoice.c.

Function Documentation

◆ decode_vbmtree()

static av_cold int decode_vbmtree ( GetBitContext gb,
int8_t  vbm_tree[25] 
)
static

Set up the variable bit mode (VBM) tree from container extradata.

Parameters
gbbit I/O context. The bit context (s->gb) should be loaded with byte 23-46 of the container extradata (i.e. the ones containing the VBM tree).
vbm_treepointer to array to which the decoded VBM tree will be written.
Returns
0 on success, <0 on error.

Definition at line 301 of file wmavoice.c.

Referenced by wmavoice_decode_init().

◆ wmavoice_init_static_data()

static av_cold void wmavoice_init_static_data ( void  )
static

Definition at line 315 of file wmavoice.c.

Referenced by wmavoice_decode_init().

◆ wmavoice_flush()

static av_cold void wmavoice_flush ( AVCodecContext ctx)
static

Definition at line 329 of file wmavoice.c.

Referenced by synth_superframe().

◆ wmavoice_decode_init()

static av_cold int wmavoice_decode_init ( AVCodecContext ctx)
static

Set up decoder with parameters from demuxer (extradata etc.).

Extradata layout:

  • byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
  • byte 19-22: flags field (annoyingly in LE; see below for known values),
  • byte 23-46: variable bitmode tree (really just 17 * 3 bits, rest is 0).

Definition at line 360 of file wmavoice.c.

◆ adaptive_gain_control()

static void adaptive_gain_control ( float out,
const float in,
const float speech_synth,
int  size,
float  alpha,
float gain_mem 
)
static

Adaptive gain control (as used in postfilter).

Identical to ff_adaptive_gain_control() in acelp_vectors.c, except that the energy here is calculated using sum(abs(...)), whereas the other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).

Parameters
outoutput buffer for filtered samples
ininput buffer containing the samples as they are after the postfilter steps so far
speech_synthinput buffer containing speech synth before postfilter
sizeinput buffer size
alphaexponential filter factor
gain_mempointer to filter memory (single float)

Definition at line 513 of file wmavoice.c.

Referenced by postfilter().

◆ kalman_smoothen()

static int kalman_smoothen ( WMAVoiceContext s,
int  pitch,
const float in,
float out,
int  size 
)
static

Kalman smoothing function.

This function looks back pitch +/- 3 samples back into history to find the best fitting curve (that one giving the optimal gain of the two signals, i.e. the highest dot product between the two), and then uses that signal history to smoothen the output of the speech synthesis filter.

Parameters
sWMA Voice decoding context
pitchpitch of the speech signal
ininput speech signal
outoutput pointer for smoothened signal
sizeinput/output buffer size
Returns
-1 if no smoothening took place, e.g. because no optimal fit could be found, or 0 on success.

Definition at line 554 of file wmavoice.c.

Referenced by postfilter().

◆ tilt_factor()

static float tilt_factor ( const float lpcs,
int  n_lpcs 
)
static

Get the tilt factor of a formant filter from its transfer function.

See also
tilt_factor() in amrnbdec.c, which does essentially the same, but somehow (??) it does a speech synthesis filter in the middle, which is missing here
Parameters
lpcsLPC coefficients
n_lpcsSize of LPC buffer
Returns
the tilt factor

Definition at line 600 of file wmavoice.c.

Referenced by calc_input_response(), and wiener_denoise().

◆ calc_input_response()

static void calc_input_response ( WMAVoiceContext s,
float lpcs_src,
int  fcb_type,
float coeffs_dst,
int  remainder 
)
static

Derive denoise filter coefficients (in real domain) from the LPCs.

Definition at line 613 of file wmavoice.c.

Referenced by wiener_denoise().

◆ wiener_denoise()

static void wiener_denoise ( WMAVoiceContext s,
int  fcb_type,
float synth_pf,
int  size,
const float lpcs 
)
static

This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.

  • take RDFT of LPCs to get the power spectrum of the noise + speech;
  • using this power spectrum, calculate (for each frequency) the Wiener filter gain, which depends on the frequency power and desired level of noise subtraction (when set too high, this leads to artifacts) We can do this symmetrically over the X-axis (so 0-4kHz is the inverse of 4-8kHz);
  • by doing a phase shift, calculate the Hilbert transform of this array of per-frequency filter-gains to get the filtering coefficients;
  • smoothen/normalize/de-tilt these filter coefficients as desired;
  • take RDFT of noisy sound, apply the coefficients and take its IRDFT to get the denoised speech signal;
  • the leftover (i.e. output of the IRDFT on denoised speech data beyond the frame boundary) are saved and applied to subsequent frames by an overlap-add method (otherwise you get clicking-artifacts).
Parameters
sWMA Voice decoding context
fcb_typeFrame (codebook) type
synth_pfinput: the noisy speech signal, output: denoised speech data; should be 16-byte aligned (for ASM purposes)
sizesize of the speech data
lpcsLPCs used to synthesize this frame's speech data

Definition at line 737 of file wmavoice.c.

Referenced by postfilter().

◆ postfilter()

static void postfilter ( WMAVoiceContext s,
const float synth,
float samples,
int  size,
const float lpcs,
float zero_exc_pf,
int  fcb_type,
int  pitch 
)
static

Averaging projection filter, the postfilter used in WMAVoice.

This uses the following steps:

  • A zero-synthesis filter (generate excitation from synth signal)
  • Kalman smoothing on excitation, based on pitch
  • Re-synthesized smoothened output
  • Iterative Wiener denoise filter
  • Adaptive gain filter
  • DC filter
Parameters
sWMAVoice decoding context
synthSpeech synthesis output (before postfilter)
samplesOutput buffer for filtered samples
sizeBuffer size of synth & samples
lpcsGenerated LPCs used for speech synthesis
zero_exc_pfdestination for zero synthesis filter (16-byte aligned)
fcb_typeFrame type (silence, hardcoded, AW-pulses or FCB-pulses)
pitchPitch of the input signal

Definition at line 821 of file wmavoice.c.

Referenced by synth_frame().

◆ dequant_lsps()

static void dequant_lsps ( double lsps,
int  num,
const uint16_t *  values,
const uint16_t *  sizes,
int  n_stages,
const uint8_t *  table,
const double mul_q,
const double base_q 
)
static

Dequantize LSPs.

Parameters
lspsoutput pointer to the array that will hold the LSPs
numnumber of LSPs to be dequantized
valuesquantized values, contains n_stages values
sizesrange (i.e. max value) of each quantized value
n_stagesnumber of dequantization runs
tabledequantization table to be used
mul_qLSF multiplier
base_qbase (lowest) LSF values

Definition at line 875 of file wmavoice.c.

Referenced by dequant_lsp10i(), dequant_lsp10r(), dequant_lsp16i(), and dequant_lsp16r().

◆ dequant_lsp10i()

static void dequant_lsp10i ( GetBitContext gb,
double lsps 
)
static

Parse 10 independently-coded LSPs.

Definition at line 906 of file wmavoice.c.

Referenced by dequant_lsp10r(), and synth_superframe().

◆ dequant_lsp10r()

static void dequant_lsp10r ( GetBitContext gb,
double i_lsps,
const double old,
double a1,
double a2,
int  q_mode 
)
static

Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

Definition at line 932 of file wmavoice.c.

Referenced by synth_superframe().

◆ dequant_lsp16i()

static void dequant_lsp16i ( GetBitContext gb,
double lsps 
)
static

Parse 16 independently-coded LSPs.

Definition at line 968 of file wmavoice.c.

Referenced by dequant_lsp16r(), and synth_superframe().

◆ dequant_lsp16r()

static void dequant_lsp16r ( GetBitContext gb,
double i_lsps,
const double old,
double a1,
double a2,
int  q_mode 
)
static

Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

Definition at line 1001 of file wmavoice.c.

Referenced by synth_superframe().

◆ aw_parse_coords()

static void aw_parse_coords ( WMAVoiceContext s,
GetBitContext gb,
const int *  pitch 
)
static

Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame.

Parameters
sWMA Voice decoding context private data
gbbit I/O context
pitchpitch for each block in this frame

Definition at line 1051 of file wmavoice.c.

Referenced by synth_frame().

◆ aw_pulse_set2()

static int aw_pulse_set2 ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
AMRFixed fcb 
)
static

Apply second set of pitch-adaptive window pulses.

Parameters
sWMA Voice decoding context private data
gbbit I/O context
block_idxblock index in frame [0, 1]
fcbstructure containing fixed codebook vector info
Returns
-1 on error, 0 otherwise

Definition at line 1103 of file wmavoice.c.

Referenced by synth_block_fcb_acb().

◆ aw_pulse_set1()

static void aw_pulse_set1 ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
AMRFixed fcb 
)
static

Apply first set of pitch-adaptive window pulses.

Parameters
sWMA Voice decoding context private data
gbbit I/O context
block_idxblock index in frame [0, 1]
fcbstorage location for fixed codebook pulse info

Definition at line 1193 of file wmavoice.c.

Referenced by synth_block_fcb_acb().

◆ pRNG()

static int pRNG ( int  frame_cntr,
int  block_num,
int  block_size 
)
static

Generate a random number from frame_cntr and block_idx, which will live in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).

Parameters
frame_cntrcurrent frame number
block_numcurrent block index
block_sizeamount of entries we want to read from a table that has 1000 entries
Returns
a (non-)random number in the [0, 1000 - block_size] range.

Definition at line 1254 of file wmavoice.c.

Referenced by synth_block_fcb_acb(), and synth_block_hardcoded().

◆ synth_block_hardcoded()

static void synth_block_hardcoded ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
int  size,
const struct frame_type_desc frame_desc,
float excitation 
)
static

Parse hardcoded signal for a single block.

Note
see synth_block().

Definition at line 1290 of file wmavoice.c.

Referenced by synth_block().

◆ synth_block_fcb_acb()

static void synth_block_fcb_acb ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
int  size,
int  block_pitch_sh2,
const struct frame_type_desc frame_desc,
float excitation 
)
static

Parse FCB/ACB signal for a single block.

Note
see synth_block().

Definition at line 1321 of file wmavoice.c.

Referenced by synth_block().

◆ synth_block()

static void synth_block ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
int  size,
int  block_pitch_sh2,
const double lsps,
const double prev_lsps,
const struct frame_type_desc frame_desc,
float excitation,
float synth 
)
static

Parse data in a single block.

Parameters
sWMA Voice decoding context private data
gbbit I/O context
block_idxindex of the to-be-read block
sizeamount of samples to be read in this block
block_pitch_sh2pitch for this block << 2
lspsLSPs for (the end of) this frame
prev_lspsLSPs for the last frame
frame_descframe type descriptor
excitationtarget memory for the ACB+FCB interpolated signal
synthtarget memory for the speech synthesis filter output
Returns
0 on success, <0 on error.

Definition at line 1455 of file wmavoice.c.

Referenced by synth_frame().

◆ synth_frame()

static int synth_frame ( AVCodecContext ctx,
GetBitContext gb,
int  frame_idx,
float samples,
const double lsps,
const double prev_lsps,
float excitation,
float synth 
)
static

Synthesize output samples for a single frame.

Parameters
ctxWMA Voice decoder context
gbbit I/O context (s->gb or one for cross-packet superframes)
frame_idxFrame number within superframe [0-2]
samplespointer to output sample buffer, has space for at least 160 samples
lspsLSP array
prev_lspsarray of previous frame's LSPs
excitationtarget buffer for excitation signal
synthtarget buffer for synthesized speech data
Returns
0 on success, <0 on error.

Definition at line 1497 of file wmavoice.c.

Referenced by synth_superframe().

◆ stabilize_lsps()

static void stabilize_lsps ( double lsps,
int  num 
)
static

Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.

Parameters
lspsarray of LSPs
numsize of LSP array
Note
basically a double version of ff_acelp_reorder_lsf(), might be useful to put in a generic location later on. Parts are also present in ff_set_min_dist_lsf() + ff_sort_nearly_sorted_floats(), which is in float.

Definition at line 1678 of file wmavoice.c.

Referenced by synth_superframe().

◆ synth_superframe()

static int synth_superframe ( AVCodecContext ctx,
AVFrame frame,
int *  got_frame_ptr 
)
static

Synthesize output samples for a single superframe.

If we have any data cached in s->sframe_cache, that will be used instead of whatever is loaded in s->gb.

WMA Voice superframes contain 3 frames, each containing 160 audio samples, to give a total of 480 samples per frame. See synth_frame() for frame parsing. In addition to 3 frames, superframes can also contain the LSPs (if these are globally specified for all frames (residually); they can also be specified individually per-frame. See the s->has_residual_lsps option), and can specify the number of samples encoded in this superframe (if less than 480), usually used to prevent blanks at track boundaries.

Parameters
ctxWMA Voice decoder context
Returns
0 on success, <0 on error or 1 if there was not enough data to fully parse the superframe

Definition at line 1724 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

◆ parse_packet_header()

static int parse_packet_header ( WMAVoiceContext s)
static

Parse the packet header at the start of each packet (input data to this decoder).

Parameters
sWMA Voice decoding context private data
Returns
<0 on error, nb_superframes on success.

Definition at line 1860 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

◆ copy_bits()

static void copy_bits ( PutBitContext pb,
const uint8_t *  data,
int  size,
GetBitContext gb,
int  nbits 
)
static

Copy (unaligned) bits from gb/data/size to pb.

Parameters
pbtarget buffer to copy bits into
datasource buffer to copy bits from
sizesize of the source data, in bytes
gbbit I/O context specifying the current position in the source. data. This function might use this to align the bit position to a whole-byte boundary before calling ff_copy_bits() on aligned source data
nbitsthe amount of bits to copy from source to target
Note
after calling this function, the current position in the input bit I/O context is undefined.

Definition at line 1895 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

◆ wmavoice_decode_packet()

static int wmavoice_decode_packet ( AVCodecContext ctx,
AVFrame frame,
int *  got_frame_ptr,
AVPacket avpkt 
)
static

Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).

Every packet has a size of ctx->block_align bytes, starts with a packet header (see parse_packet_header()), and then a series of superframes. Superframe boundaries may exceed packets, i.e. superframes can split data over multiple (two) packets.

For more information about frames, see synth_superframe().

Definition at line 1924 of file wmavoice.c.

◆ wmavoice_decode_end()

static av_cold int wmavoice_decode_end ( AVCodecContext ctx)
static

Definition at line 2010 of file wmavoice.c.

Variable Documentation

◆ frame_type_vlc

VLCElem frame_type_vlc[132]
static

Frame type VLC coding.

Definition at line 64 of file wmavoice.c.

Referenced by synth_frame(), and wmavoice_init_static_data().

◆ frame_descs

const struct frame_type_desc frame_descs[17]
static

◆ ff_wmavoice_decoder

const FFCodec ff_wmavoice_decoder
Initial value:
= {
.p.name = "wmavoice",
CODEC_LONG_NAME("Windows Media Audio Voice"),
.p.type = AVMEDIA_TYPE_AUDIO,
.priv_data_size = sizeof(WMAVoiceContext),
.p.capabilities =
AV_CODEC_CAP_SUBFRAMES |
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
.flush = wmavoice_flush,
}

Definition at line 2024 of file wmavoice.c.

FF_CODEC_CAP_INIT_CLEANUP
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: codec_internal.h:43
FCB_TYPE_EXC_PULSES
@ FCB_TYPE_EXC_PULSES
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.
Definition: wmavoice.c:92
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
log10f
#define log10f(x)
Definition: libm.h:414
wmavoice_flush
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:329
FCB_TYPE_AW_PULSES
@ FCB_TYPE_AW_PULSES
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:90
AV_CODEC_ID_WMAVOICE
@ AV_CODEC_ID_WMAVOICE
Definition: codec_id.h:484
FF_CODEC_DECODE_CB
#define FF_CODEC_DECODE_CB(func)
Definition: codec_internal.h:311
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
ACB_TYPE_ASYMMETRIC
@ ACB_TYPE_ASYMMETRIC
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:71
FCB_TYPE_SILENCE
@ FCB_TYPE_SILENCE
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:85
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:296
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:52
FCB_TYPE_HARDCODED
@ FCB_TYPE_HARDCODED
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:88
wmavoice_decode_init
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:360
ACB_TYPE_HAMMING
@ ACB_TYPE_HAMMING
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:76
assign
#define assign(var, type, n)
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ACB_TYPE_NONE
@ ACB_TYPE_NONE
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:70
wmavoice_decode_end
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:2010
wmavoice_decode_packet
static int wmavoice_decode_packet(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1924
AV_CODEC_CAP_DELAY
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:76
min
float min
Definition: vorbis_enc_data.h:429
WMAVoiceContext
WMA Voice decoding context.
Definition: wmavoice.c:132