FFmpeg
Main Page
Related Pages
Modules
Data Structures
Files
Examples
File List
Globals
All
Data Structures
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
libavcodec
aac.h
Go to the documentation of this file.
1
/*
2
* AAC definitions and structures
3
* Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4
* Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5
*
6
* This file is part of FFmpeg.
7
*
8
* FFmpeg is free software; you can redistribute it and/or
9
* modify it under the terms of the GNU Lesser General Public
10
* License as published by the Free Software Foundation; either
11
* version 2.1 of the License, or (at your option) any later version.
12
*
13
* FFmpeg is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
* Lesser General Public License for more details.
17
*
18
* You should have received a copy of the GNU Lesser General Public
19
* License along with FFmpeg; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
*/
22
23
/**
24
* @file
25
* AAC definitions and structures
26
* @author Oded Shimon ( ods15 ods15 dyndns org )
27
* @author Maxim Gavrilov ( maxim.gavrilov gmail com )
28
*/
29
30
#ifndef AVCODEC_AAC_H
31
#define AVCODEC_AAC_H
32
33
#include "
libavutil/float_dsp.h
"
34
#include "
avcodec.h
"
35
#include "
dsputil.h
"
36
#include "
fft.h
"
37
#include "
mpeg4audio.h
"
38
#include "
sbr.h
"
39
#include "
fmtconvert.h
"
40
41
#include <stdint.h>
42
43
#define MAX_CHANNELS 64
44
#define MAX_ELEM_ID 16
45
46
#define TNS_MAX_ORDER 20
47
#define MAX_LTP_LONG_SFB 40
48
49
enum
RawDataBlockType
{
50
TYPE_SCE
,
51
TYPE_CPE
,
52
TYPE_CCE
,
53
TYPE_LFE
,
54
TYPE_DSE
,
55
TYPE_PCE
,
56
TYPE_FIL
,
57
TYPE_END
,
58
};
59
60
enum
ExtensionPayloadID
{
61
EXT_FILL
,
62
EXT_FILL_DATA
,
63
EXT_DATA_ELEMENT
,
64
EXT_DYNAMIC_RANGE
= 0xb,
65
EXT_SBR_DATA
= 0xd,
66
EXT_SBR_DATA_CRC
= 0xe,
67
};
68
69
enum
WindowSequence
{
70
ONLY_LONG_SEQUENCE
,
71
LONG_START_SEQUENCE
,
72
EIGHT_SHORT_SEQUENCE
,
73
LONG_STOP_SEQUENCE
,
74
};
75
76
enum
BandType
{
77
ZERO_BT
= 0,
///< Scalefactors and spectral data are all zero.
78
FIRST_PAIR_BT
= 5,
///< This and later band types encode two values (rather than four) with one code word.
79
ESC_BT
= 11,
///< Spectral data are coded with an escape sequence.
80
NOISE_BT
= 13,
///< Spectral data are scaled white noise not coded in the bitstream.
81
INTENSITY_BT2
= 14,
///< Scalefactor data are intensity stereo positions.
82
INTENSITY_BT
= 15,
///< Scalefactor data are intensity stereo positions.
83
};
84
85
#define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10)
86
87
enum
ChannelPosition
{
88
AAC_CHANNEL_OFF
= 0,
89
AAC_CHANNEL_FRONT
= 1,
90
AAC_CHANNEL_SIDE
= 2,
91
AAC_CHANNEL_BACK
= 3,
92
AAC_CHANNEL_LFE
= 4,
93
AAC_CHANNEL_CC
= 5,
94
};
95
96
/**
97
* The point during decoding at which channel coupling is applied.
98
*/
99
enum
CouplingPoint
{
100
BEFORE_TNS
,
101
BETWEEN_TNS_AND_IMDCT
,
102
AFTER_IMDCT
= 3,
103
};
104
105
/**
106
* Output configuration status
107
*/
108
enum
OCStatus
{
109
OC_NONE
,
///< Output unconfigured
110
OC_TRIAL_PCE
,
///< Output configuration under trial specified by an inband PCE
111
OC_TRIAL_FRAME
,
///< Output configuration under trial specified by a frame header
112
OC_GLOBAL_HDR
,
///< Output configuration set in a global header but not yet locked
113
OC_LOCKED
,
///< Output configuration locked in place
114
};
115
116
typedef
struct
OutputConfiguration
{
117
MPEG4AudioConfig
m4ac
;
118
uint8_t
layout_map
[
MAX_ELEM_ID
*4][3];
119
int
layout_map_tags
;
120
int
channels
;
121
uint64_t
channel_layout
;
122
enum
OCStatus
status
;
123
}
OutputConfiguration
;
124
125
/**
126
* Predictor State
127
*/
128
typedef
struct
PredictorState
{
129
float
cor0
;
130
float
cor1
;
131
float
var0
;
132
float
var1
;
133
float
r0
;
134
float
r1
;
135
}
PredictorState
;
136
137
#define MAX_PREDICTORS 672
138
139
#define SCALE_DIV_512 36
///< scalefactor difference that corresponds to scale difference in 512 times
140
#define SCALE_ONE_POS 140
///< scalefactor index that corresponds to scale=1.0
141
#define SCALE_MAX_POS 255
///< scalefactor index maximum value
142
#define SCALE_MAX_DIFF 60
///< maximum scalefactor difference allowed by standard
143
#define SCALE_DIFF_ZERO 60
///< codebook index corresponding to zero scalefactor indices difference
144
145
/**
146
* Long Term Prediction
147
*/
148
typedef
struct
LongTermPrediction
{
149
int8_t
present
;
150
int16_t
lag
;
151
float
coef
;
152
int8_t
used
[
MAX_LTP_LONG_SFB
];
153
}
LongTermPrediction
;
154
155
/**
156
* Individual Channel Stream
157
*/
158
typedef
struct
IndividualChannelStream
{
159
uint8_t
max_sfb
;
///< number of scalefactor bands per group
160
enum
WindowSequence
window_sequence
[2];
161
uint8_t
use_kb_window
[2];
///< If set, use Kaiser-Bessel window, otherwise use a sinus window.
162
int
num_window_groups
;
163
uint8_t
group_len
[8];
164
LongTermPrediction
ltp
;
165
const
uint16_t *
swb_offset
;
///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
166
const
uint8_t
*
swb_sizes
;
///< table of scalefactor band sizes for a particular window
167
int
num_swb
;
///< number of scalefactor window bands
168
int
num_windows
;
169
int
tns_max_bands
;
170
int
predictor_present
;
171
int
predictor_initialized
;
172
int
predictor_reset_group
;
173
uint8_t
prediction_used
[41];
174
}
IndividualChannelStream
;
175
176
/**
177
* Temporal Noise Shaping
178
*/
179
typedef
struct
TemporalNoiseShaping
{
180
int
present
;
181
int
n_filt
[8];
182
int
length
[8][4];
183
int
direction
[8][4];
184
int
order
[8][4];
185
float
coef
[8][4][
TNS_MAX_ORDER
];
186
}
TemporalNoiseShaping
;
187
188
/**
189
* Dynamic Range Control - decoded from the bitstream but not processed further.
190
*/
191
typedef
struct
DynamicRangeControl
{
192
int
pce_instance_tag
;
///< Indicates with which program the DRC info is associated.
193
int
dyn_rng_sgn
[17];
///< DRC sign information; 0 - positive, 1 - negative
194
int
dyn_rng_ctl
[17];
///< DRC magnitude information
195
int
exclude_mask
[
MAX_CHANNELS
];
///< Channels to be excluded from DRC processing.
196
int
band_incr
;
///< Number of DRC bands greater than 1 having DRC info.
197
int
interpolation_scheme
;
///< Indicates the interpolation scheme used in the SBR QMF domain.
198
int
band_top
[17];
///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
199
int
prog_ref_level
;
/**< A reference level for the long-term program audio level for all
200
* channels combined.
201
*/
202
}
DynamicRangeControl
;
203
204
typedef
struct
Pulse
{
205
int
num_pulse
;
206
int
start
;
207
int
pos
[4];
208
int
amp
[4];
209
}
Pulse
;
210
211
/**
212
* coupling parameters
213
*/
214
typedef
struct
ChannelCoupling
{
215
enum
CouplingPoint
coupling_point
;
///< The point during decoding at which coupling is applied.
216
int
num_coupled
;
///< number of target elements
217
enum
RawDataBlockType
type
[8];
///< Type of channel element to be coupled - SCE or CPE.
218
int
id_select
[8];
///< element id
219
int
ch_select
[8];
/**< [0] shared list of gains; [1] list of gains for right channel;
220
* [2] list of gains for left channel; [3] lists of gains for both channels
221
*/
222
float
gain
[16][120];
223
}
ChannelCoupling
;
224
225
/**
226
* Single Channel Element - used for both SCE and LFE elements.
227
*/
228
typedef
struct
SingleChannelElement
{
229
IndividualChannelStream
ics
;
230
TemporalNoiseShaping
tns
;
231
Pulse
pulse
;
232
enum
BandType
band_type
[128];
///< band types
233
int
band_type_run_end
[120];
///< band type run end points
234
float
sf
[120];
///< scalefactors
235
int
sf_idx
[128];
///< scalefactor indices (used by encoder)
236
uint8_t
zeroes
[128];
///< band is not coded (used by encoder)
237
DECLARE_ALIGNED
(32,
float
,
coeffs
)[1024];
///< coefficients for IMDCT
238
DECLARE_ALIGNED
(32,
float
,
saved
)[1024];
///< overlap
239
DECLARE_ALIGNED
(32,
float
,
ret_buf
)[2048];
///< PCM output buffer
240
DECLARE_ALIGNED
(16,
float
,
ltp_state
)[3072];
///< time signal for LTP
241
PredictorState
predictor_state
[
MAX_PREDICTORS
];
242
float
*
ret
;
///< PCM output
243
}
SingleChannelElement
;
244
245
/**
246
* channel element - generic struct for SCE/CPE/CCE/LFE
247
*/
248
typedef
struct
ChannelElement
{
249
// CPE specific
250
int
common_window
;
///< Set if channels share a common 'IndividualChannelStream' in bitstream.
251
int
ms_mode
;
///< Signals mid/side stereo flags coding mode (used by encoder)
252
uint8_t
ms_mask
[128];
///< Set if mid/side stereo is used for each scalefactor window band
253
// shared
254
SingleChannelElement
ch
[2];
255
// CCE specific
256
ChannelCoupling
coup
;
257
SpectralBandReplication
sbr
;
258
}
ChannelElement
;
259
260
/**
261
* main AAC context
262
*/
263
typedef
struct
AACContext
{
264
AVClass
*
class
;
265
AVCodecContext
*
avctx
;
266
AVFrame
frame
;
267
268
int
is_saved
;
///< Set if elements have stored overlap from previous frame.
269
DynamicRangeControl
che_drc
;
270
271
/**
272
* @name Channel element related data
273
* @{
274
*/
275
ChannelElement
*
che
[4][
MAX_ELEM_ID
];
276
ChannelElement
*
tag_che_map
[4][
MAX_ELEM_ID
];
277
int
tags_mapped
;
278
/** @} */
279
280
/**
281
* @name temporary aligned temporary buffers
282
* (We do not want to have these on the stack.)
283
* @{
284
*/
285
DECLARE_ALIGNED
(32,
float
,
buf_mdct
)[1024];
286
/** @} */
287
288
/**
289
* @name Computed / set up during initialization
290
* @{
291
*/
292
FFTContext
mdct
;
293
FFTContext
mdct_small
;
294
FFTContext
mdct_ltp
;
295
DSPContext
dsp
;
296
FmtConvertContext
fmt_conv
;
297
AVFloatDSPContext
fdsp
;
298
int
random_state
;
299
/** @} */
300
301
/**
302
* @name Members used for output
303
* @{
304
*/
305
SingleChannelElement
*
output_element
[
MAX_CHANNELS
];
///< Points to each SingleChannelElement
306
/** @} */
307
308
309
/**
310
* @name Japanese DTV specific extension
311
* @{
312
*/
313
int
force_dmono_mode
;
///< 0->not dmono, 1->use first channel, 2->use second channel
314
int
dmono_mode
;
///< 0->not dmono, 1->use first channel, 2->use second channel
315
/** @} */
316
317
DECLARE_ALIGNED
(32,
float
,
temp
)[128];
318
319
OutputConfiguration
oc
[2];
320
int
warned_num_aac_frames
;
321
}
AACContext
;
322
323
#endif
/* AVCODEC_AAC_H */
Generated on Sat May 25 2013 03:58:30 for FFmpeg by
1.8.2