FFmpeg
aacpsy.c
Go to the documentation of this file.
1 /*
2  * AAC encoder psychoacoustic model
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder psychoacoustic model
25  */
26 
27 #include "libavutil/attributes.h"
28 #include "libavutil/ffmath.h"
29 
30 #include "avcodec.h"
31 #include "aactab.h"
32 #include "psymodel.h"
33 
34 /***********************************
35  * TODOs:
36  * try other bitrate controlling mechanism (maybe use ratecontrol.c?)
37  * control quality for quality-based output
38  **********************************/
39 
40 /**
41  * constants for 3GPP AAC psychoacoustic model
42  * @{
43  */
44 #define PSY_3GPP_THR_SPREAD_HI 1.5f // spreading factor for low-to-hi threshold spreading (15 dB/Bark)
45 #define PSY_3GPP_THR_SPREAD_LOW 3.0f // spreading factor for hi-to-low threshold spreading (30 dB/Bark)
46 /* spreading factor for low-to-hi energy spreading, long block, > 22kbps/channel (20dB/Bark) */
47 #define PSY_3GPP_EN_SPREAD_HI_L1 2.0f
48 /* spreading factor for low-to-hi energy spreading, long block, <= 22kbps/channel (15dB/Bark) */
49 #define PSY_3GPP_EN_SPREAD_HI_L2 1.5f
50 /* spreading factor for low-to-hi energy spreading, short block (15 dB/Bark) */
51 #define PSY_3GPP_EN_SPREAD_HI_S 1.5f
52 /* spreading factor for hi-to-low energy spreading, long block (30dB/Bark) */
53 #define PSY_3GPP_EN_SPREAD_LOW_L 3.0f
54 /* spreading factor for hi-to-low energy spreading, short block (20dB/Bark) */
55 #define PSY_3GPP_EN_SPREAD_LOW_S 2.0f
56 
57 #define PSY_3GPP_RPEMIN 0.01f
58 #define PSY_3GPP_RPELEV 2.0f
59 
60 #define PSY_3GPP_C1 3.0f /* log2(8) */
61 #define PSY_3GPP_C2 1.3219281f /* log2(2.5) */
62 #define PSY_3GPP_C3 0.55935729f /* 1 - C2 / C1 */
63 
64 #define PSY_SNR_1DB 7.9432821e-1f /* -1dB */
65 #define PSY_SNR_25DB 3.1622776e-3f /* -25dB */
66 
67 #define PSY_3GPP_SAVE_SLOPE_L -0.46666667f
68 #define PSY_3GPP_SAVE_SLOPE_S -0.36363637f
69 #define PSY_3GPP_SAVE_ADD_L -0.84285712f
70 #define PSY_3GPP_SAVE_ADD_S -0.75f
71 #define PSY_3GPP_SPEND_SLOPE_L 0.66666669f
72 #define PSY_3GPP_SPEND_SLOPE_S 0.81818181f
73 #define PSY_3GPP_SPEND_ADD_L -0.35f
74 #define PSY_3GPP_SPEND_ADD_S -0.26111111f
75 #define PSY_3GPP_CLIP_LO_L 0.2f
76 #define PSY_3GPP_CLIP_LO_S 0.2f
77 #define PSY_3GPP_CLIP_HI_L 0.95f
78 #define PSY_3GPP_CLIP_HI_S 0.75f
79 
80 #define PSY_3GPP_AH_THR_LONG 0.5f
81 #define PSY_3GPP_AH_THR_SHORT 0.63f
82 
83 #define PSY_PE_FORGET_SLOPE 511
84 
85 enum {
89 };
90 
91 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f)
92 #define PSY_3GPP_PE_TO_BITS(bits) ((bits) / 1.18f)
93 
94 /* LAME psy model constants */
95 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order
96 #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size
97 #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size
98 #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence
99 #define PSY_LAME_NUM_SUBBLOCKS 3 ///< Number of sub-blocks in each short block
100 
101 /**
102  * @}
103  */
104 
105 /**
106  * information for single band used by 3GPP TS26.403-inspired psychoacoustic model
107  */
108 typedef struct AacPsyBand{
109  float energy; ///< band energy
110  float thr; ///< energy threshold
111  float thr_quiet; ///< threshold in quiet
112  float nz_lines; ///< number of non-zero spectral lines
113  float active_lines; ///< number of active spectral lines
114  float pe; ///< perceptual entropy
115  float pe_const; ///< constant part of the PE calculation
116  float norm_fac; ///< normalization factor for linearization
117  int avoid_holes; ///< hole avoidance flag
118 }AacPsyBand;
119 
120 /**
121  * single/pair channel context for psychoacoustic model
122  */
123 typedef struct AacPsyChannel{
124  AacPsyBand band[128]; ///< bands information
125  AacPsyBand prev_band[128]; ///< bands information from the previous frame
126 
127  float win_energy; ///< sliding average of channel energy
128  float iir_state[2]; ///< hi-pass IIR filter state
129  uint8_t next_grouping; ///< stored grouping scheme for the next frame (in case of 8 short window sequence)
130  enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame
131  /* LAME psy model specific members */
132  float attack_threshold; ///< attack threshold for this channel
134  int prev_attack; ///< attack value for the last short block in the previous sequence
136 
137 /**
138  * psychoacoustic model frame type-dependent coefficients
139  */
140 typedef struct AacPsyCoeffs{
141  float ath; ///< absolute threshold of hearing per bands
142  float barks; ///< Bark value for each spectral band in long frame
143  float spread_low[2]; ///< spreading factor for low-to-high threshold spreading in long frame
144  float spread_hi [2]; ///< spreading factor for high-to-low threshold spreading in long frame
145  float min_snr; ///< minimal SNR
146 }AacPsyCoeffs;
147 
148 /**
149  * 3GPP TS26.403-inspired psychoacoustic model specific data
150  */
151 typedef struct AacPsyContext{
152  int chan_bitrate; ///< bitrate per channel
153  int frame_bits; ///< average bits per frame
154  int fill_level; ///< bit reservoir fill level
155  struct {
156  float min; ///< minimum allowed PE for bit factor calculation
157  float max; ///< maximum allowed PE for bit factor calculation
158  float previous; ///< allowed PE of the previous frame
159  float correction; ///< PE correction factor
160  } pe;
163  float global_quality; ///< normalized global quality taken from avctx
165 
166 /**
167  * LAME psy model preset struct
168  */
169 typedef struct PsyLamePreset {
170  int quality; ///< Quality to map the rest of the vaules to.
171  /* This is overloaded to be both kbps per channel in ABR mode, and
172  * requested quality in constant quality mode.
173  */
174  float st_lrm; ///< short threshold for L, R, and M channels
175 } PsyLamePreset;
176 
177 /**
178  * LAME psy model preset table for ABR
179  */
180 static const PsyLamePreset psy_abr_map[] = {
181 /* TODO: Tuning. These were taken from LAME. */
182 /* kbps/ch st_lrm */
183  { 8, 6.60},
184  { 16, 6.60},
185  { 24, 6.60},
186  { 32, 6.60},
187  { 40, 6.60},
188  { 48, 6.60},
189  { 56, 6.60},
190  { 64, 6.40},
191  { 80, 6.00},
192  { 96, 5.60},
193  {112, 5.20},
194  {128, 5.20},
195  {160, 5.20}
196 };
197 
198 /**
199 * LAME psy model preset table for constant quality
200 */
201 static const PsyLamePreset psy_vbr_map[] = {
202 /* vbr_q st_lrm */
203  { 0, 4.20},
204  { 1, 4.20},
205  { 2, 4.20},
206  { 3, 4.20},
207  { 4, 4.20},
208  { 5, 4.20},
209  { 6, 4.20},
210  { 7, 4.20},
211  { 8, 4.20},
212  { 9, 4.20},
213  {10, 4.20}
214 };
215 
216 /**
217  * LAME psy model FIR coefficient table
218  */
219 static const float psy_fir_coeffs[] = {
220  -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
221  -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
222  -5.52212e-17 * 2, -0.313819 * 2
223 };
224 
225 #if ARCH_MIPS
226 # include "mips/aacpsy_mips.h"
227 #endif /* ARCH_MIPS */
228 
229 /**
230  * Calculate the ABR attack threshold from the above LAME psymodel table.
231  */
233 {
234  /* Assume max bitrate to start with */
235  int lower_range = 12, upper_range = 12;
236  int lower_range_kbps = psy_abr_map[12].quality;
237  int upper_range_kbps = psy_abr_map[12].quality;
238  int i;
239 
240  /* Determine which bitrates the value specified falls between.
241  * If the loop ends without breaking our above assumption of 320kbps was correct.
242  */
243  for (i = 1; i < 13; i++) {
245  upper_range = i;
246  upper_range_kbps = psy_abr_map[i ].quality;
247  lower_range = i - 1;
248  lower_range_kbps = psy_abr_map[i - 1].quality;
249  break; /* Upper range found */
250  }
251  }
252 
253  /* Determine which range the value specified is closer to */
254  if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
255  return psy_abr_map[lower_range].st_lrm;
256  return psy_abr_map[upper_range].st_lrm;
257 }
258 
259 /**
260  * LAME psy model specific initialization
261  */
263 {
264  int i, j;
265 
266  for (i = 0; i < avctx->channels; i++) {
267  AacPsyChannel *pch = &ctx->ch[i];
268 
269  if (avctx->flags & AV_CODEC_FLAG_QSCALE)
271  else
272  pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate / avctx->channels / 1000);
273 
274  for (j = 0; j < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; j++)
275  pch->prev_energy_subshort[j] = 10.0f;
276  }
277 }
278 
279 /**
280  * Calculate Bark value for given line.
281  */
282 static av_cold float calc_bark(float f)
283 {
284  return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
285 }
286 
287 #define ATH_ADD 4
288 /**
289  * Calculate ATH value for given frequency.
290  * Borrowed from Lame.
291  */
292 static av_cold float ath(float f, float add)
293 {
294  f /= 1000.0f;
295  return 3.64 * pow(f, -0.8)
296  - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))
297  + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))
298  + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
299 }
300 
302  AacPsyContext *pctx;
303  float bark;
304  int i, j, g, start;
305  float prev, minscale, minath, minsnr, pe_min;
306  int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->channels);
307 
308  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
309  const float num_bark = calc_bark((float)bandwidth);
310 
311  if (bandwidth <= 0)
312  return AVERROR(EINVAL);
313 
314  ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
315  if (!ctx->model_priv_data)
316  return AVERROR(ENOMEM);
317  pctx = ctx->model_priv_data;
318  pctx->global_quality = (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) * 0.01f;
319 
320  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
321  /* Use the target average bitrate to compute spread parameters */
322  chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120));
323  }
324 
325  pctx->chan_bitrate = chan_bitrate;
326  pctx->frame_bits = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate);
327  pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
328  pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
329  ctx->bitres.size = 6144 - pctx->frame_bits;
330  ctx->bitres.size -= ctx->bitres.size % 8;
331  pctx->fill_level = ctx->bitres.size;
332  minath = ath(3410 - 0.733 * ATH_ADD, ATH_ADD);
333  for (j = 0; j < 2; j++) {
334  AacPsyCoeffs *coeffs = pctx->psy_coef[j];
335  const uint8_t *band_sizes = ctx->bands[j];
336  float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
337  float avg_chan_bits = chan_bitrate * (j ? 128.0f : 1024.0f) / ctx->avctx->sample_rate;
338  /* reference encoder uses 2.4% here instead of 60% like the spec says */
339  float bark_pe = 0.024f * PSY_3GPP_BITS_TO_PE(avg_chan_bits) / num_bark;
340  float en_spread_low = j ? PSY_3GPP_EN_SPREAD_LOW_S : PSY_3GPP_EN_SPREAD_LOW_L;
341  /* High energy spreading for long blocks <= 22kbps/channel and short blocks are the same. */
342  float en_spread_hi = (j || (chan_bitrate <= 22.0f)) ? PSY_3GPP_EN_SPREAD_HI_S : PSY_3GPP_EN_SPREAD_HI_L1;
343 
344  i = 0;
345  prev = 0.0;
346  for (g = 0; g < ctx->num_bands[j]; g++) {
347  i += band_sizes[g];
348  bark = calc_bark((i-1) * line_to_frequency);
349  coeffs[g].barks = (bark + prev) / 2.0;
350  prev = bark;
351  }
352  for (g = 0; g < ctx->num_bands[j] - 1; g++) {
353  AacPsyCoeffs *coeff = &coeffs[g];
354  float bark_width = coeffs[g+1].barks - coeffs->barks;
355  coeff->spread_low[0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_LOW);
356  coeff->spread_hi [0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_HI);
357  coeff->spread_low[1] = ff_exp10(-bark_width * en_spread_low);
358  coeff->spread_hi [1] = ff_exp10(-bark_width * en_spread_hi);
359  pe_min = bark_pe * bark_width;
360  minsnr = exp2(pe_min / band_sizes[g]) - 1.5f;
361  coeff->min_snr = av_clipf(1.0f / minsnr, PSY_SNR_25DB, PSY_SNR_1DB);
362  }
363  start = 0;
364  for (g = 0; g < ctx->num_bands[j]; g++) {
365  minscale = ath(start * line_to_frequency, ATH_ADD);
366  for (i = 1; i < band_sizes[g]; i++)
367  minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));
368  coeffs[g].ath = minscale - minath;
369  start += band_sizes[g];
370  }
371  }
372 
373  pctx->ch = av_mallocz_array(ctx->avctx->channels, sizeof(AacPsyChannel));
374  if (!pctx->ch) {
375  av_freep(&ctx->model_priv_data);
376  return AVERROR(ENOMEM);
377  }
378 
379  lame_window_init(pctx, ctx->avctx);
380 
381  return 0;
382 }
383 
384 /**
385  * IIR filter used in block switching decision
386  */
387 static float iir_filter(int in, float state[2])
388 {
389  float ret;
390 
391  ret = 0.7548f * (in - state[0]) + 0.5095f * state[1];
392  state[0] = in;
393  state[1] = ret;
394  return ret;
395 }
396 
397 /**
398  * window grouping information stored as bits (0 - new group, 1 - group continues)
399  */
400 static const uint8_t window_grouping[9] = {
401  0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
402 };
403 
404 /**
405  * Tell encoder which window types to use.
406  * @see 3GPP TS26.403 5.4.1 "Blockswitching"
407  */
409  const int16_t *audio,
410  const int16_t *la,
411  int channel, int prev_type)
412 {
413  int i, j;
414  int br = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate;
415  int attack_ratio = br <= 16000 ? 18 : 10;
416  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
417  AacPsyChannel *pch = &pctx->ch[channel];
418  uint8_t grouping = 0;
419  int next_type = pch->next_window_seq;
420  FFPsyWindowInfo wi = { { 0 } };
421 
422  if (la) {
423  float s[8], v;
424  int switch_to_eight = 0;
425  float sum = 0.0, sum2 = 0.0;
426  int attack_n = 0;
427  int stay_short = 0;
428  for (i = 0; i < 8; i++) {
429  for (j = 0; j < 128; j++) {
430  v = iir_filter(la[i*128+j], pch->iir_state);
431  sum += v*v;
432  }
433  s[i] = sum;
434  sum2 += sum;
435  }
436  for (i = 0; i < 8; i++) {
437  if (s[i] > pch->win_energy * attack_ratio) {
438  attack_n = i + 1;
439  switch_to_eight = 1;
440  break;
441  }
442  }
443  pch->win_energy = pch->win_energy*7/8 + sum2/64;
444 
445  wi.window_type[1] = prev_type;
446  switch (prev_type) {
447  case ONLY_LONG_SEQUENCE:
448  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
449  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
450  break;
451  case LONG_START_SEQUENCE:
452  wi.window_type[0] = EIGHT_SHORT_SEQUENCE;
453  grouping = pch->next_grouping;
454  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
455  break;
456  case LONG_STOP_SEQUENCE:
457  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
458  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
459  break;
461  stay_short = next_type == EIGHT_SHORT_SEQUENCE || switch_to_eight;
462  wi.window_type[0] = stay_short ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
463  grouping = next_type == EIGHT_SHORT_SEQUENCE ? pch->next_grouping : 0;
464  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
465  break;
466  }
467 
468  pch->next_grouping = window_grouping[attack_n];
469  pch->next_window_seq = next_type;
470  } else {
471  for (i = 0; i < 3; i++)
472  wi.window_type[i] = prev_type;
473  grouping = (prev_type == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
474  }
475 
476  wi.window_shape = 1;
477  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
478  wi.num_windows = 1;
479  wi.grouping[0] = 1;
480  } else {
481  int lastgrp = 0;
482  wi.num_windows = 8;
483  for (i = 0; i < 8; i++) {
484  if (!((grouping >> i) & 1))
485  lastgrp = i;
486  wi.grouping[lastgrp]++;
487  }
488  }
489 
490  return wi;
491 }
492 
493 /* 5.6.1.2 "Calculation of Bit Demand" */
494 static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size,
495  int short_window)
496 {
497  const float bitsave_slope = short_window ? PSY_3GPP_SAVE_SLOPE_S : PSY_3GPP_SAVE_SLOPE_L;
498  const float bitsave_add = short_window ? PSY_3GPP_SAVE_ADD_S : PSY_3GPP_SAVE_ADD_L;
499  const float bitspend_slope = short_window ? PSY_3GPP_SPEND_SLOPE_S : PSY_3GPP_SPEND_SLOPE_L;
500  const float bitspend_add = short_window ? PSY_3GPP_SPEND_ADD_S : PSY_3GPP_SPEND_ADD_L;
501  const float clip_low = short_window ? PSY_3GPP_CLIP_LO_S : PSY_3GPP_CLIP_LO_L;
502  const float clip_high = short_window ? PSY_3GPP_CLIP_HI_S : PSY_3GPP_CLIP_HI_L;
503  float clipped_pe, bit_save, bit_spend, bit_factor, fill_level, forgetful_min_pe;
504 
505  ctx->fill_level += ctx->frame_bits - bits;
506  ctx->fill_level = av_clip(ctx->fill_level, 0, size);
507  fill_level = av_clipf((float)ctx->fill_level / size, clip_low, clip_high);
508  clipped_pe = av_clipf(pe, ctx->pe.min, ctx->pe.max);
509  bit_save = (fill_level + bitsave_add) * bitsave_slope;
510  assert(bit_save <= 0.3f && bit_save >= -0.05000001f);
511  bit_spend = (fill_level + bitspend_add) * bitspend_slope;
512  assert(bit_spend <= 0.5f && bit_spend >= -0.1f);
513  /* The bit factor graph in the spec is obviously incorrect.
514  * bit_spend + ((bit_spend - bit_spend))...
515  * The reference encoder subtracts everything from 1, but also seems incorrect.
516  * 1 - bit_save + ((bit_spend + bit_save))...
517  * Hopefully below is correct.
518  */
519  bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (ctx->pe.max - ctx->pe.min)) * (clipped_pe - ctx->pe.min);
520  /* NOTE: The reference encoder attempts to center pe max/min around the current pe.
521  * Here we do that by slowly forgetting pe.min when pe stays in a range that makes
522  * it unlikely (ie: above the mean)
523  */
524  ctx->pe.max = FFMAX(pe, ctx->pe.max);
525  forgetful_min_pe = ((ctx->pe.min * PSY_PE_FORGET_SLOPE)
526  + FFMAX(ctx->pe.min, pe * (pe / ctx->pe.max))) / (PSY_PE_FORGET_SLOPE + 1);
527  ctx->pe.min = FFMIN(pe, forgetful_min_pe);
528 
529  /* NOTE: allocate a minimum of 1/8th average frame bits, to avoid
530  * reservoir starvation from producing zero-bit frames
531  */
532  return FFMIN(
533  ctx->frame_bits * bit_factor,
534  FFMAX(ctx->frame_bits + size - bits, ctx->frame_bits / 8));
535 }
536 
537 static float calc_pe_3gpp(AacPsyBand *band)
538 {
539  float pe, a;
540 
541  band->pe = 0.0f;
542  band->pe_const = 0.0f;
543  band->active_lines = 0.0f;
544  if (band->energy > band->thr) {
545  a = log2f(band->energy);
546  pe = a - log2f(band->thr);
547  band->active_lines = band->nz_lines;
548  if (pe < PSY_3GPP_C1) {
549  pe = pe * PSY_3GPP_C3 + PSY_3GPP_C2;
550  a = a * PSY_3GPP_C3 + PSY_3GPP_C2;
551  band->active_lines *= PSY_3GPP_C3;
552  }
553  band->pe = pe * band->nz_lines;
554  band->pe_const = a * band->nz_lines;
555  }
556 
557  return band->pe;
558 }
559 
560 static float calc_reduction_3gpp(float a, float desired_pe, float pe,
561  float active_lines)
562 {
563  float thr_avg, reduction;
564 
565  if(active_lines == 0.0)
566  return 0;
567 
568  thr_avg = exp2f((a - pe) / (4.0f * active_lines));
569  reduction = exp2f((a - desired_pe) / (4.0f * active_lines)) - thr_avg;
570 
571  return FFMAX(reduction, 0.0f);
572 }
573 
574 static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr,
575  float reduction)
576 {
577  float thr = band->thr;
578 
579  if (band->energy > thr) {
580  thr = sqrtf(thr);
581  thr = sqrtf(thr) + reduction;
582  thr *= thr;
583  thr *= thr;
584 
585  /* This deviates from the 3GPP spec to match the reference encoder.
586  * It performs min(thr_reduced, max(thr, energy/min_snr)) only for bands
587  * that have hole avoidance on (active or inactive). It always reduces the
588  * threshold of bands with hole avoidance off.
589  */
590  if (thr > band->energy * min_snr && band->avoid_holes != PSY_3GPP_AH_NONE) {
591  thr = FFMAX(band->thr, band->energy * min_snr);
593  }
594  }
595 
596  return thr;
597 }
598 
599 #ifndef calc_thr_3gpp
600 static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch,
601  const uint8_t *band_sizes, const float *coefs, const int cutoff)
602 {
603  int i, w, g;
604  int start = 0, wstart = 0;
605  for (w = 0; w < wi->num_windows*16; w += 16) {
606  wstart = 0;
607  for (g = 0; g < num_bands; g++) {
608  AacPsyBand *band = &pch->band[w+g];
609 
610  float form_factor = 0.0f;
611  float Temp;
612  band->energy = 0.0f;
613  if (wstart < cutoff) {
614  for (i = 0; i < band_sizes[g]; i++) {
615  band->energy += coefs[start+i] * coefs[start+i];
616  form_factor += sqrtf(fabs(coefs[start+i]));
617  }
618  }
619  Temp = band->energy > 0 ? sqrtf((float)band_sizes[g] / band->energy) : 0;
620  band->thr = band->energy * 0.001258925f;
621  band->nz_lines = form_factor * sqrtf(Temp);
622 
623  start += band_sizes[g];
624  wstart += band_sizes[g];
625  }
626  }
627 }
628 #endif /* calc_thr_3gpp */
629 
630 #ifndef psy_hp_filter
631 static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
632 {
633  int i, j;
634  for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
635  float sum1, sum2;
636  sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2];
637  sum2 = 0.0;
638  for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
639  sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]);
640  sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]);
641  }
642  /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768.
643  * Tuning this for normalized floats would be difficult. */
644  hpfsmpl[i] = (sum1 + sum2) * 32768.0f;
645  }
646 }
647 #endif /* psy_hp_filter */
648 
649 /**
650  * Calculate band thresholds as suggested in 3GPP TS26.403
651  */
653  const float *coefs, const FFPsyWindowInfo *wi)
654 {
655  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
656  AacPsyChannel *pch = &pctx->ch[channel];
657  int i, w, g;
658  float desired_bits, desired_pe, delta_pe, reduction= NAN, spread_en[128] = {0};
659  float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f;
660  float pe = pctx->chan_bitrate > 32000 ? 0.0f : FFMAX(50.0f, 100.0f - pctx->chan_bitrate * 100.0f / 32000.0f);
661  const int num_bands = ctx->num_bands[wi->num_windows == 8];
662  const uint8_t *band_sizes = ctx->bands[wi->num_windows == 8];
663  AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8];
664  const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG;
665  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
666  const int cutoff = bandwidth * 2048 / wi->num_windows / ctx->avctx->sample_rate;
667 
668  //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
669  calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs, cutoff);
670 
671  //modify thresholds and energies - spread, threshold in quiet, pre-echo control
672  for (w = 0; w < wi->num_windows*16; w += 16) {
673  AacPsyBand *bands = &pch->band[w];
674 
675  /* 5.4.2.3 "Spreading" & 5.4.3 "Spread Energy Calculation" */
676  spread_en[0] = bands[0].energy;
677  for (g = 1; g < num_bands; g++) {
678  bands[g].thr = FFMAX(bands[g].thr, bands[g-1].thr * coeffs[g].spread_hi[0]);
679  spread_en[w+g] = FFMAX(bands[g].energy, spread_en[w+g-1] * coeffs[g].spread_hi[1]);
680  }
681  for (g = num_bands - 2; g >= 0; g--) {
682  bands[g].thr = FFMAX(bands[g].thr, bands[g+1].thr * coeffs[g].spread_low[0]);
683  spread_en[w+g] = FFMAX(spread_en[w+g], spread_en[w+g+1] * coeffs[g].spread_low[1]);
684  }
685  //5.4.2.4 "Threshold in quiet"
686  for (g = 0; g < num_bands; g++) {
687  AacPsyBand *band = &bands[g];
688 
689  band->thr_quiet = band->thr = FFMAX(band->thr, coeffs[g].ath);
690  //5.4.2.5 "Pre-echo control"
691  if (!(wi->window_type[0] == LONG_STOP_SEQUENCE || (!w && wi->window_type[1] == LONG_START_SEQUENCE)))
692  band->thr = FFMAX(PSY_3GPP_RPEMIN*band->thr, FFMIN(band->thr,
693  PSY_3GPP_RPELEV*pch->prev_band[w+g].thr_quiet));
694 
695  /* 5.6.1.3.1 "Preparatory steps of the perceptual entropy calculation" */
696  pe += calc_pe_3gpp(band);
697  a += band->pe_const;
698  active_lines += band->active_lines;
699 
700  /* 5.6.1.3.3 "Selection of the bands for avoidance of holes" */
701  if (spread_en[w+g] * avoid_hole_thr > band->energy || coeffs[g].min_snr > 1.0f)
703  else
705  }
706  }
707 
708  /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
709  ctx->ch[channel].entropy = pe;
710  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
711  /* (2.5 * 120) achieves almost transparent rate, and we want to give
712  * ample room downwards, so we make that equivalent to QSCALE=2.4
713  */
714  desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f);
715  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
716  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
717 
718  /* PE slope smoothing */
719  if (ctx->bitres.bits > 0) {
720  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
721  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
722  }
723 
724  pctx->pe.max = FFMAX(pe, pctx->pe.max);
725  pctx->pe.min = FFMIN(pe, pctx->pe.min);
726  } else {
727  desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
728  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
729 
730  /* NOTE: PE correction is kept simple. During initial testing it had very
731  * little effect on the final bitrate. Probably a good idea to come
732  * back and do more testing later.
733  */
734  if (ctx->bitres.bits > 0)
735  desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
736  0.85f, 1.15f);
737  }
738  pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits);
739  ctx->bitres.alloc = desired_bits;
740 
741  if (desired_pe < pe) {
742  /* 5.6.1.3.4 "First Estimation of the reduction value" */
743  for (w = 0; w < wi->num_windows*16; w += 16) {
744  reduction = calc_reduction_3gpp(a, desired_pe, pe, active_lines);
745  pe = 0.0f;
746  a = 0.0f;
747  active_lines = 0.0f;
748  for (g = 0; g < num_bands; g++) {
749  AacPsyBand *band = &pch->band[w+g];
750 
751  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
752  /* recalculate PE */
753  pe += calc_pe_3gpp(band);
754  a += band->pe_const;
755  active_lines += band->active_lines;
756  }
757  }
758 
759  /* 5.6.1.3.5 "Second Estimation of the reduction value" */
760  for (i = 0; i < 2; i++) {
761  float pe_no_ah = 0.0f, desired_pe_no_ah;
762  active_lines = a = 0.0f;
763  for (w = 0; w < wi->num_windows*16; w += 16) {
764  for (g = 0; g < num_bands; g++) {
765  AacPsyBand *band = &pch->band[w+g];
766 
767  if (band->avoid_holes != PSY_3GPP_AH_ACTIVE) {
768  pe_no_ah += band->pe;
769  a += band->pe_const;
770  active_lines += band->active_lines;
771  }
772  }
773  }
774  desired_pe_no_ah = FFMAX(desired_pe - (pe - pe_no_ah), 0.0f);
775  if (active_lines > 0.0f)
776  reduction = calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines);
777 
778  pe = 0.0f;
779  for (w = 0; w < wi->num_windows*16; w += 16) {
780  for (g = 0; g < num_bands; g++) {
781  AacPsyBand *band = &pch->band[w+g];
782 
783  if (active_lines > 0.0f)
784  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
785  pe += calc_pe_3gpp(band);
786  if (band->thr > 0.0f)
787  band->norm_fac = band->active_lines / band->thr;
788  else
789  band->norm_fac = 0.0f;
790  norm_fac += band->norm_fac;
791  }
792  }
793  delta_pe = desired_pe - pe;
794  if (fabs(delta_pe) > 0.05f * desired_pe)
795  break;
796  }
797 
798  if (pe < 1.15f * desired_pe) {
799  /* 6.6.1.3.6 "Final threshold modification by linearization" */
800  norm_fac = norm_fac ? 1.0f / norm_fac : 0;
801  for (w = 0; w < wi->num_windows*16; w += 16) {
802  for (g = 0; g < num_bands; g++) {
803  AacPsyBand *band = &pch->band[w+g];
804 
805  if (band->active_lines > 0.5f) {
806  float delta_sfb_pe = band->norm_fac * norm_fac * delta_pe;
807  float thr = band->thr;
808 
809  thr *= exp2f(delta_sfb_pe / band->active_lines);
810  if (thr > coeffs[g].min_snr * band->energy && band->avoid_holes == PSY_3GPP_AH_INACTIVE)
811  thr = FFMAX(band->thr, coeffs[g].min_snr * band->energy);
812  band->thr = thr;
813  }
814  }
815  }
816  } else {
817  /* 5.6.1.3.7 "Further perceptual entropy reduction" */
818  g = num_bands;
819  while (pe > desired_pe && g--) {
820  for (w = 0; w < wi->num_windows*16; w+= 16) {
821  AacPsyBand *band = &pch->band[w+g];
822  if (band->avoid_holes != PSY_3GPP_AH_NONE && coeffs[g].min_snr < PSY_SNR_1DB) {
823  coeffs[g].min_snr = PSY_SNR_1DB;
824  band->thr = band->energy * PSY_SNR_1DB;
825  pe += band->active_lines * 1.5f - band->pe;
826  }
827  }
828  }
829  /* TODO: allow more holes (unused without mid/side) */
830  }
831  }
832 
833  for (w = 0; w < wi->num_windows*16; w += 16) {
834  for (g = 0; g < num_bands; g++) {
835  AacPsyBand *band = &pch->band[w+g];
836  FFPsyBand *psy_band = &ctx->ch[channel].psy_bands[w+g];
837 
838  psy_band->threshold = band->thr;
839  psy_band->energy = band->energy;
840  psy_band->spread = band->active_lines * 2.0f / band_sizes[g];
841  psy_band->bits = PSY_3GPP_PE_TO_BITS(band->pe);
842  }
843  }
844 
845  memcpy(pch->prev_band, pch->band, sizeof(pch->band));
846 }
847 
849  const float **coeffs, const FFPsyWindowInfo *wi)
850 {
851  int ch;
853 
854  for (ch = 0; ch < group->num_ch; ch++)
855  psy_3gpp_analyze_channel(ctx, channel + ch, coeffs[ch], &wi[ch]);
856 }
857 
859 {
861  av_freep(&pctx->ch);
862  av_freep(&apc->model_priv_data);
863 }
864 
865 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
866 {
867  int blocktype = ONLY_LONG_SEQUENCE;
868  if (uselongblock) {
869  if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE)
870  blocktype = LONG_STOP_SEQUENCE;
871  } else {
872  blocktype = EIGHT_SHORT_SEQUENCE;
873  if (ctx->next_window_seq == ONLY_LONG_SEQUENCE)
874  ctx->next_window_seq = LONG_START_SEQUENCE;
875  if (ctx->next_window_seq == LONG_STOP_SEQUENCE)
876  ctx->next_window_seq = EIGHT_SHORT_SEQUENCE;
877  }
878 
879  wi->window_type[0] = ctx->next_window_seq;
880  ctx->next_window_seq = blocktype;
881 }
882 
883 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
884  const float *la, int channel, int prev_type)
885 {
886  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
887  AacPsyChannel *pch = &pctx->ch[channel];
888  int grouping = 0;
889  int uselongblock = 1;
890  int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
891  int i;
892  FFPsyWindowInfo wi = { { 0 } };
893 
894  if (la) {
895  float hpfsmpl[AAC_BLOCK_SIZE_LONG];
896  const float *pf = hpfsmpl;
897  float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
898  float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
899  float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
900  const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN);
901  int att_sum = 0;
902 
903  /* LAME comment: apply high pass filter of fs/4 */
904  psy_hp_filter(firbuf, hpfsmpl, psy_fir_coeffs);
905 
906  /* Calculate the energies of each sub-shortblock */
907  for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
908  energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
909  assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
910  attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
911  energy_short[0] += energy_subshort[i];
912  }
913 
914  for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
915  const float *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS);
916  float p = 1.0f;
917  for (; pf < pfe; pf++)
918  p = FFMAX(p, fabsf(*pf));
919  pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
920  energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
921  /* NOTE: The indexes below are [i + 3 - 2] in the LAME source.
922  * Obviously the 3 and 2 have some significance, or this would be just [i + 1]
923  * (which is what we use here). What the 3 stands for is ambiguous, as it is both
924  * number of short blocks, and the number of sub-short blocks.
925  * It seems that LAME is comparing each sub-block to sub-block + 1 in the
926  * previous block.
927  */
928  if (p > energy_subshort[i + 1])
929  p = p / energy_subshort[i + 1];
930  else if (energy_subshort[i + 1] > p * 10.0f)
931  p = energy_subshort[i + 1] / (p * 10.0f);
932  else
933  p = 0.0;
934  attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
935  }
936 
937  /* compare energy between sub-short blocks */
938  for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++)
939  if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
940  if (attack_intensity[i] > pch->attack_threshold)
941  attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBBLOCKS) + 1;
942 
943  /* should have energy change between short blocks, in order to avoid periodic signals */
944  /* Good samples to show the effect are Trumpet test songs */
945  /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
946  /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
947  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
948  const float u = energy_short[i - 1];
949  const float v = energy_short[i];
950  const float m = FFMAX(u, v);
951  if (m < 40000) { /* (2) */
952  if (u < 1.7f * v && v < 1.7f * u) { /* (1) */
953  if (i == 1 && attacks[0] < attacks[i])
954  attacks[0] = 0;
955  attacks[i] = 0;
956  }
957  }
958  att_sum += attacks[i];
959  }
960 
961  if (attacks[0] <= pch->prev_attack)
962  attacks[0] = 0;
963 
964  att_sum += attacks[0];
965  /* 3 below indicates the previous attack happened in the last sub-block of the previous sequence */
966  if (pch->prev_attack == 3 || att_sum) {
967  uselongblock = 0;
968 
969  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
970  if (attacks[i] && attacks[i-1])
971  attacks[i] = 0;
972  }
973  } else {
974  /* We have no lookahead info, so just use same type as the previous sequence. */
975  uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
976  }
977 
978  lame_apply_block_type(pch, &wi, uselongblock);
979 
980  wi.window_type[1] = prev_type;
981  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
982 
983  wi.num_windows = 1;
984  wi.grouping[0] = 1;
985  if (wi.window_type[0] == LONG_START_SEQUENCE)
986  wi.window_shape = 0;
987  else
988  wi.window_shape = 1;
989 
990  } else {
991  int lastgrp = 0;
992 
993  wi.num_windows = 8;
994  wi.window_shape = 0;
995  for (i = 0; i < 8; i++) {
996  if (!((pch->next_grouping >> i) & 1))
997  lastgrp = i;
998  wi.grouping[lastgrp]++;
999  }
1000  }
1001 
1002  /* Determine grouping, based on the location of the first attack, and save for
1003  * the next frame.
1004  * FIXME: Move this to analysis.
1005  * TODO: Tune groupings depending on attack location
1006  * TODO: Handle more than one attack in a group
1007  */
1008  for (i = 0; i < 9; i++) {
1009  if (attacks[i]) {
1010  grouping = i;
1011  break;
1012  }
1013  }
1014  pch->next_grouping = window_grouping[grouping];
1015 
1016  pch->prev_attack = attacks[8];
1017 
1018  return wi;
1019 }
1020 
1022 {
1023  .name = "3GPP TS 26.403-inspired model",
1024  .init = psy_3gpp_init,
1025  .window = psy_lame_window,
1026  .analyze = psy_3gpp_analyze,
1027  .end = psy_3gpp_end,
1028 };
AacPsyCoeffs::spread_low
float spread_low[2]
spreading factor for low-to-high threshold spreading in long frame
Definition: aacpsy.c:143
ff_exp10
static av_always_inline double ff_exp10(double x)
Compute 10^x for floating point values.
Definition: ffmath.h:42
av_clip
#define av_clip
Definition: common.h:122
psy_3gpp_init
static av_cold int psy_3gpp_init(FFPsyContext *ctx)
Definition: aacpsy.c:301
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
psy_3gpp_window
static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type)
Tell encoder which window types to use.
Definition: aacpsy.c:408
lame_calc_attack_threshold
static float lame_calc_attack_threshold(int bitrate)
Calculate the ABR attack threshold from the above LAME psymodel table.
Definition: aacpsy.c:232
FFPsyModel::name
const char * name
Definition: psymodel.h:115
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:264
PSY_PE_FORGET_SLOPE
#define PSY_PE_FORGET_SLOPE
Definition: aacpsy.c:83
psy_lame_window
static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Definition: aacpsy.c:883
log2f
#define log2f(x)
Definition: libm.h:409
AacPsyBand::thr
float thr
energy threshold
Definition: aacpsy.c:110
calc_thr_3gpp
static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, const uint8_t *band_sizes, const float *coefs, const int cutoff)
Definition: aacpsy.c:600
PSY_3GPP_PE_TO_BITS
#define PSY_3GPP_PE_TO_BITS(bits)
Definition: aacpsy.c:92
AV_CODEC_FLAG_QSCALE
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:275
calc_bark
static av_cold float calc_bark(float f)
Calculate Bark value for given line.
Definition: aacpsy.c:282
AacPsyBand::nz_lines
float nz_lines
number of non-zero spectral lines
Definition: aacpsy.c:112
av_unused
#define av_unused
Definition: attributes.h:131
state
static struct @321 state
PSY_3GPP_CLIP_LO_S
#define PSY_3GPP_CLIP_LO_S
Definition: aacpsy.c:76
w
uint8_t w
Definition: llviddspenc.c:39
PSY_3GPP_AH_THR_LONG
#define PSY_3GPP_AH_THR_LONG
Definition: aacpsy.c:80
av_mallocz_array
void * av_mallocz_array(size_t nmemb, size_t size)
Definition: mem.c:190
FFPsyWindowInfo::window_shape
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
PSY_SNR_1DB
#define PSY_SNR_1DB
Definition: aacpsy.c:64
calc_pe_3gpp
static float calc_pe_3gpp(AacPsyBand *band)
Definition: aacpsy.c:537
AacPsyContext::min
float min
minimum allowed PE for bit factor calculation
Definition: aacpsy.c:156
PSY_3GPP_SPEND_SLOPE_L
#define PSY_3GPP_SPEND_SLOPE_L
Definition: aacpsy.c:71
PSY_3GPP_THR_SPREAD_HI
#define PSY_3GPP_THR_SPREAD_HI
constants for 3GPP AAC psychoacoustic model
Definition: aacpsy.c:44
AacPsyContext::fill_level
int fill_level
bit reservoir fill level
Definition: aacpsy.c:154
AacPsyCoeffs::spread_hi
float spread_hi[2]
spreading factor for high-to-low threshold spreading in long frame
Definition: aacpsy.c:144
quality
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
Definition: rate_distortion.txt:12
lame_apply_block_type
static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
Definition: aacpsy.c:865
AacPsyCoeffs
psychoacoustic model frame type-dependent coefficients
Definition: aacpsy.c:140
lame_window_init
static av_cold void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
LAME psy model specific initialization.
Definition: aacpsy.c:262
PsyLamePreset::st_lrm
float st_lrm
short threshold for L, R, and M channels
Definition: aacpsy.c:174
PSY_3GPP_EN_SPREAD_HI_S
#define PSY_3GPP_EN_SPREAD_HI_S
Definition: aacpsy.c:51
PSY_3GPP_SPEND_ADD_L
#define PSY_3GPP_SPEND_ADD_L
Definition: aacpsy.c:73
AVCodecContext::flags
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:616
AacPsyCoeffs::barks
float barks
Bark value for each spectral band in long frame.
Definition: aacpsy.c:142
AacPsyChannel::prev_energy_subshort
float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT *PSY_LAME_NUM_SUBBLOCKS]
Definition: aacpsy.c:133
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
FFPsyWindowInfo
windowing related information
Definition: psymodel.h:77
ATH_ADD
#define ATH_ADD
Definition: aacpsy.c:287
AVFormatContext::bit_rate
int64_t bit_rate
Total stream bitrate in bit/s, 0 if not available.
Definition: avformat.h:1354
AacPsyContext::previous
float previous
allowed PE of the previous frame
Definition: aacpsy.c:158
ff_aac_psy_model
const FFPsyModel ff_aac_psy_model
Definition: aacpsy.c:1021
AacPsyContext::ch
AacPsyChannel * ch
Definition: aacpsy.c:162
av_cold
#define av_cold
Definition: attributes.h:90
FFPsyChannelGroup::num_ch
uint8_t num_ch
number of channels in this group
Definition: psymodel.h:70
PsyLamePreset
LAME psy model preset struct.
Definition: aacpsy.c:169
PSY_3GPP_CLIP_HI_S
#define PSY_3GPP_CLIP_HI_S
Definition: aacpsy.c:78
s
#define s(width, name)
Definition: cbs_vp9.c:257
AacPsyBand
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
Definition: aacpsy.c:108
AVCodecContext::global_quality
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:602
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1363
g
const char * g
Definition: vf_curves.c:117
EIGHT_SHORT_SEQUENCE
@ EIGHT_SHORT_SEQUENCE
Definition: aac.h:79
PSY_3GPP_AH_ACTIVE
@ PSY_3GPP_AH_ACTIVE
Definition: aacpsy.c:88
PsyLamePreset::quality
int quality
Quality to map the rest of the vaules to.
Definition: aacpsy.c:170
AacPsyBand::pe_const
float pe_const
constant part of the PE calculation
Definition: aacpsy.c:115
bits
uint8_t bits
Definition: vp3data.h:141
AacPsyContext
3GPP TS26.403-inspired psychoacoustic model specific data
Definition: aacpsy.c:151
AacPsyCoeffs::min_snr
float min_snr
minimal SNR
Definition: aacpsy.c:145
ctx
AVFormatContext * ctx
Definition: movenc.c:48
exp2f
#define exp2f(x)
Definition: libm.h:293
calc_reduction_3gpp
static float calc_reduction_3gpp(float a, float desired_pe, float pe, float active_lines)
Definition: aacpsy.c:560
window_grouping
static const uint8_t window_grouping[9]
window grouping information stored as bits (0 - new group, 1 - group continues)
Definition: aacpsy.c:400
aacpsy_mips.h
AAC_BLOCK_SIZE_SHORT
#define AAC_BLOCK_SIZE_SHORT
short block size
Definition: aacpsy.c:97
bands
static const float bands[]
Definition: af_superequalizer.c:56
ath
static av_cold float ath(float f, float add)
Calculate ATH value for given frequency.
Definition: aacpsy.c:292
calc_bit_demand
static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, int short_window)
Definition: aacpsy.c:494
NAN
#define NAN
Definition: mathematics.h:64
f
#define f(width, name)
Definition: cbs_vp9.c:255
PSY_3GPP_AH_THR_SHORT
#define PSY_3GPP_AH_THR_SHORT
Definition: aacpsy.c:81
psy_hp_filter
static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
Definition: aacpsy.c:631
iir_filter
static float iir_filter(int in, float state[2])
IIR filter used in block switching decision.
Definition: aacpsy.c:387
psy_vbr_map
static const PsyLamePreset psy_vbr_map[]
LAME psy model preset table for constant quality.
Definition: aacpsy.c:201
AAC_CUTOFF
#define AAC_CUTOFF(s)
Definition: psymodel.h:41
FFPsyWindowInfo::window_type
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
FFPsyBand::bits
int bits
Definition: psymodel.h:51
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
PSY_3GPP_RPEMIN
#define PSY_3GPP_RPEMIN
Definition: aacpsy.c:57
psy_abr_map
static const PsyLamePreset psy_abr_map[]
LAME psy model preset table for ABR.
Definition: aacpsy.c:180
PSY_3GPP_AH_INACTIVE
@ PSY_3GPP_AH_INACTIVE
Definition: aacpsy.c:87
PSY_3GPP_C1
#define PSY_3GPP_C1
Definition: aacpsy.c:60
AVCodecContext::bit_rate
int64_t bit_rate
the average bitrate
Definition: avcodec.h:586
av_clipf
#define av_clipf
Definition: common.h:170
psy_3gpp_end
static av_cold void psy_3gpp_end(FFPsyContext *apc)
Definition: aacpsy.c:858
PSY_3GPP_BITS_TO_PE
#define PSY_3GPP_BITS_TO_PE(bits)
Definition: aacpsy.c:91
FFPsyBand
single band psychoacoustic information
Definition: psymodel.h:50
aactab.h
FFPsyWindowInfo::grouping
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
AacPsyContext::max
float max
maximum allowed PE for bit factor calculation
Definition: aacpsy.c:157
exp
int8_t exp
Definition: eval.c:72
AacPsyChannel::iir_state
float iir_state[2]
hi-pass IIR filter state
Definition: aacpsy.c:128
AacPsyContext::psy_coef
AacPsyCoeffs psy_coef[2][64]
Definition: aacpsy.c:161
AacPsyBand::thr_quiet
float thr_quiet
threshold in quiet
Definition: aacpsy.c:111
AAC_BLOCK_SIZE_LONG
#define AAC_BLOCK_SIZE_LONG
long block size
Definition: aacpsy.c:96
ONLY_LONG_SEQUENCE
@ ONLY_LONG_SEQUENCE
Definition: aac.h:77
AacPsyChannel::band
AacPsyBand band[128]
bands information
Definition: aacpsy.c:124
FFMAX
#define FFMAX(a, b)
Definition: common.h:103
size
int size
Definition: twinvq_data.h:10344
calc_reduced_thr_3gpp
static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float reduction)
Definition: aacpsy.c:574
AacPsyCoeffs::ath
float ath
absolute threshold of hearing per bands
Definition: aacpsy.c:141
AacPsyBand::active_lines
float active_lines
number of active spectral lines
Definition: aacpsy.c:113
AAC_NUM_BLOCKS_SHORT
#define AAC_NUM_BLOCKS_SHORT
number of blocks in a short sequence
Definition: aacpsy.c:98
PSY_LAME_FIR_LEN
#define PSY_LAME_FIR_LEN
LAME psy model FIR order.
Definition: aacpsy.c:95
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
FFMIN
#define FFMIN(a, b)
Definition: common.h:105
PSY_3GPP_CLIP_LO_L
#define PSY_3GPP_CLIP_LO_L
Definition: aacpsy.c:75
attributes.h
AacPsyBand::avoid_holes
int avoid_holes
hole avoidance flag
Definition: aacpsy.c:117
PSY_3GPP_THR_SPREAD_LOW
#define PSY_3GPP_THR_SPREAD_LOW
Definition: aacpsy.c:45
bitrate
int64_t bitrate
Definition: h264_levels.c:131
PSY_3GPP_SAVE_ADD_S
#define PSY_3GPP_SAVE_ADD_S
Definition: aacpsy.c:70
PSY_3GPP_SPEND_ADD_S
#define PSY_3GPP_SPEND_ADD_S
Definition: aacpsy.c:74
AVCodecContext::channels
int channels
number of audio channels
Definition: avcodec.h:1197
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
AacPsyContext::pe
struct AacPsyContext::@9 pe
psy_fir_coeffs
static const float psy_fir_coeffs[]
LAME psy model FIR coefficient table.
Definition: aacpsy.c:219
AacPsyChannel::attack_threshold
float attack_threshold
attack threshold for this channel
Definition: aacpsy.c:132
i
int i
Definition: input.c:407
AacPsyBand::norm_fac
float norm_fac
normalization factor for linearization
Definition: aacpsy.c:116
FFPsyBand::threshold
float threshold
Definition: psymodel.h:53
PSY_3GPP_CLIP_HI_L
#define PSY_3GPP_CLIP_HI_L
Definition: aacpsy.c:77
LONG_STOP_SEQUENCE
@ LONG_STOP_SEQUENCE
Definition: aac.h:80
atanf
#define atanf(x)
Definition: libm.h:40
exp2
#define exp2(x)
Definition: libm.h:288
PSY_3GPP_RPELEV
#define PSY_3GPP_RPELEV
Definition: aacpsy.c:58
uint8_t
uint8_t
Definition: audio_convert.c:194
AacPsyBand::pe
float pe
perceptual entropy
Definition: aacpsy.c:114
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
AacPsyBand::energy
float energy
band energy
Definition: aacpsy.c:109
avcodec.h
FFPsyChannelGroup
psychoacoustic information for an arbitrary group of channels
Definition: psymodel.h:68
AacPsyChannel::next_window_seq
enum WindowSequence next_window_seq
window sequence to be used in the next frame
Definition: aacpsy.c:130
AacPsyChannel::win_energy
float win_energy
sliding average of channel energy
Definition: aacpsy.c:127
ret
ret
Definition: filter_design.txt:187
AacPsyChannel
single/pair channel context for psychoacoustic model
Definition: aacpsy.c:123
AacPsyContext::correction
float correction
PE correction factor.
Definition: aacpsy.c:159
FFPsyContext::model_priv_data
void * model_priv_data
psychoacoustic model implementation private data
Definition: psymodel.h:108
LONG_START_SEQUENCE
@ LONG_START_SEQUENCE
Definition: aac.h:78
PSY_3GPP_SAVE_SLOPE_S
#define PSY_3GPP_SAVE_SLOPE_S
Definition: aacpsy.c:68
PSY_3GPP_EN_SPREAD_HI_L1
#define PSY_3GPP_EN_SPREAD_HI_L1
Definition: aacpsy.c:47
AacPsyChannel::next_grouping
uint8_t next_grouping
stored grouping scheme for the next frame (in case of 8 short window sequence)
Definition: aacpsy.c:129
FFPsyBand::energy
float energy
Definition: psymodel.h:52
AVCodecContext
main external API structure.
Definition: avcodec.h:536
PSY_LAME_NUM_SUBBLOCKS
#define PSY_LAME_NUM_SUBBLOCKS
Number of sub-blocks in each short block.
Definition: aacpsy.c:99
PSY_SNR_25DB
#define PSY_SNR_25DB
Definition: aacpsy.c:65
AacPsyContext::global_quality
float global_quality
normalized global quality taken from avctx
Definition: aacpsy.c:163
psy_3gpp_analyze_channel
static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float *coefs, const FFPsyWindowInfo *wi)
Calculate band thresholds as suggested in 3GPP TS26.403.
Definition: aacpsy.c:652
FFPsyModel
codec-specific psychoacoustic model implementation
Definition: psymodel.h:114
AacPsyContext::frame_bits
int frame_bits
average bits per frame
Definition: aacpsy.c:153
ffmath.h
ff_psy_find_group
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
Definition: psymodel.c:73
psy_3gpp_analyze
static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Definition: aacpsy.c:848
PSY_3GPP_C3
#define PSY_3GPP_C3
Definition: aacpsy.c:62
add
static float add(float src0, float src1)
Definition: dnn_backend_native_layer_mathbinary.c:36
PSY_3GPP_EN_SPREAD_LOW_L
#define PSY_3GPP_EN_SPREAD_LOW_L
Definition: aacpsy.c:53
PSY_3GPP_AH_NONE
@ PSY_3GPP_AH_NONE
Definition: aacpsy.c:86
AacPsyContext::chan_bitrate
int chan_bitrate
bitrate per channel
Definition: aacpsy.c:152
PSY_3GPP_SAVE_SLOPE_L
#define PSY_3GPP_SAVE_SLOPE_L
Definition: aacpsy.c:67
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
PSY_3GPP_C2
#define PSY_3GPP_C2
Definition: aacpsy.c:61
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:73
PSY_3GPP_SPEND_SLOPE_S
#define PSY_3GPP_SPEND_SLOPE_S
Definition: aacpsy.c:72
WindowSequence
WindowSequence
Definition: aac.h:76
FFPsyBand::spread
float spread
Definition: psymodel.h:54
FF_QP2LAMBDA
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
Definition: avutil.h:227
int
int
Definition: ffmpeg_filter.c:170
PSY_3GPP_EN_SPREAD_LOW_S
#define PSY_3GPP_EN_SPREAD_LOW_S
Definition: aacpsy.c:55
AacPsyChannel::prev_attack
int prev_attack
attack value for the last short block in the previous sequence
Definition: aacpsy.c:134
FFPsyContext
context used by psychoacoustic model
Definition: psymodel.h:89
AacPsyChannel::prev_band
AacPsyBand prev_band[128]
bands information from the previous frame
Definition: aacpsy.c:125
psymodel.h
channel
channel
Definition: ebur128.h:39
FFPsyWindowInfo::num_windows
int num_windows
number of windows in a frame
Definition: psymodel.h:80
PSY_3GPP_SAVE_ADD_L
#define PSY_3GPP_SAVE_ADD_L
Definition: aacpsy.c:69