FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "avcodec.h"
32 #include "internal.h"
33 #include "g722.h"
34 #include "libavutil/common.h"
35 
36 #define FREEZE_INTERVAL 128
37 
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39  problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41 
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43  undefined behavior. Using larger values is insanely slow anyway. */
44 #define MIN_TRELLIS 0
45 #define MAX_TRELLIS 16
46 
48 {
49  G722Context *c = avctx->priv_data;
50  int i;
51  for (i = 0; i < 2; i++) {
52  av_freep(&c->paths[i]);
53  av_freep(&c->node_buf[i]);
54  av_freep(&c->nodep_buf[i]);
55  }
56  return 0;
57 }
58 
60 {
61  G722Context *c = avctx->priv_data;
62  int ret;
63 
64  if (avctx->channels != 1) {
65  av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
66  return AVERROR_INVALIDDATA;
67  }
68 
69  c->band[0].scale_factor = 8;
70  c->band[1].scale_factor = 2;
71  c->prev_samples_pos = 22;
72 
73  if (avctx->trellis) {
74  int frontier = 1 << avctx->trellis;
75  int max_paths = frontier * FREEZE_INTERVAL;
76  int i;
77  for (i = 0; i < 2; i++) {
78  c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
79  c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
80  c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
81  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
82  ret = AVERROR(ENOMEM);
83  goto error;
84  }
85  }
86  }
87 
88  if (avctx->frame_size) {
89  /* validate frame size */
90  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
91  int new_frame_size;
92 
93  if (avctx->frame_size == 1)
94  new_frame_size = 2;
95  else if (avctx->frame_size > MAX_FRAME_SIZE)
96  new_frame_size = MAX_FRAME_SIZE;
97  else
98  new_frame_size = avctx->frame_size - 1;
99 
100  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
101  "allowed. Using %d instead of %d\n", new_frame_size,
102  avctx->frame_size);
103  avctx->frame_size = new_frame_size;
104  }
105  } else {
106  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
107  a common packet size for VoIP applications */
108  avctx->frame_size = 320;
109  }
110  avctx->initial_padding = 22;
111 
112  if (avctx->trellis) {
113  /* validate trellis */
114  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
115  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
116  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
117  "allowed. Using %d instead of %d\n", new_trellis,
118  avctx->trellis);
119  avctx->trellis = new_trellis;
120  }
121  }
122 
123  ff_g722dsp_init(&c->dsp);
124 
125  return 0;
126 error:
127  g722_encode_close(avctx);
128  return ret;
129 }
130 
131 static const int16_t low_quant[33] = {
132  35, 72, 110, 150, 190, 233, 276, 323,
133  370, 422, 473, 530, 587, 650, 714, 786,
134  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
135  1765, 1980, 2195, 2557, 2919
136 };
137 
138 static inline void filter_samples(G722Context *c, const int16_t *samples,
139  int *xlow, int *xhigh)
140 {
141  int xout[2];
142  c->prev_samples[c->prev_samples_pos++] = samples[0];
143  c->prev_samples[c->prev_samples_pos++] = samples[1];
144  c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
145  *xlow = xout[0] + xout[1] >> 14;
146  *xhigh = xout[0] - xout[1] >> 14;
148  memmove(c->prev_samples,
149  c->prev_samples + c->prev_samples_pos - 22,
150  22 * sizeof(c->prev_samples[0]));
151  c->prev_samples_pos = 22;
152  }
153 }
154 
155 static inline int encode_high(const struct G722Band *state, int xhigh)
156 {
157  int diff = av_clip_int16(xhigh - state->s_predictor);
158  int pred = 141 * state->scale_factor >> 8;
159  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
160  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
161 }
162 
163 static inline int encode_low(const struct G722Band* state, int xlow)
164 {
165  int diff = av_clip_int16(xlow - state->s_predictor);
166  /* = diff >= 0 ? diff : -(diff + 1) */
167  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
168  int i = 0;
169  limit = limit + 1 << 10;
170  if (limit > low_quant[8] * state->scale_factor)
171  i = 9;
172  while (i < 29 && limit > low_quant[i] * state->scale_factor)
173  i++;
174  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
175 }
176 
177 static void g722_encode_trellis(G722Context *c, int trellis,
178  uint8_t *dst, int nb_samples,
179  const int16_t *samples)
180 {
181  int i, j, k;
182  int frontier = 1 << trellis;
183  struct TrellisNode **nodes[2];
184  struct TrellisNode **nodes_next[2];
185  int pathn[2] = {0, 0}, froze = -1;
186  struct TrellisPath *p[2];
187 
188  for (i = 0; i < 2; i++) {
189  nodes[i] = c->nodep_buf[i];
190  nodes_next[i] = c->nodep_buf[i] + frontier;
191  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
192  nodes[i][0] = c->node_buf[i] + frontier;
193  nodes[i][0]->ssd = 0;
194  nodes[i][0]->path = 0;
195  nodes[i][0]->state = c->band[i];
196  }
197 
198  for (i = 0; i < nb_samples >> 1; i++) {
199  int xlow, xhigh;
200  struct TrellisNode *next[2];
201  int heap_pos[2] = {0, 0};
202 
203  for (j = 0; j < 2; j++) {
204  next[j] = c->node_buf[j] + frontier*(i & 1);
205  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
206  }
207 
208  filter_samples(c, &samples[2*i], &xlow, &xhigh);
209 
210  for (j = 0; j < frontier && nodes[0][j]; j++) {
211  /* Only k >> 2 affects the future adaptive state, therefore testing
212  * small steps that don't change k >> 2 is useless, the original
213  * value from encode_low is better than them. Since we step k
214  * in steps of 4, make sure range is a multiple of 4, so that
215  * we don't miss the original value from encode_low. */
216  int range = j < frontier/2 ? 4 : 0;
217  struct TrellisNode *cur_node = nodes[0][j];
218 
219  int ilow = encode_low(&cur_node->state, xlow);
220 
221  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
222  int decoded, dec_diff, pos;
223  uint32_t ssd;
224  struct TrellisNode* node;
225 
226  if (k < 0)
227  continue;
228 
229  decoded = av_clip_intp2((cur_node->state.scale_factor *
230  ff_g722_low_inv_quant6[k] >> 10)
231  + cur_node->state.s_predictor, 14);
232  dec_diff = xlow - decoded;
233 
234 #define STORE_NODE(index, UPDATE, VALUE)\
235  ssd = cur_node->ssd + dec_diff*dec_diff;\
236  /* Check for wraparound. Using 64 bit ssd counters would \
237  * be simpler, but is slower on x86 32 bit. */\
238  if (ssd < cur_node->ssd)\
239  continue;\
240  if (heap_pos[index] < frontier) {\
241  pos = heap_pos[index]++;\
242  av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
243  node = nodes_next[index][pos] = next[index]++;\
244  node->path = pathn[index]++;\
245  } else {\
246  /* Try to replace one of the leaf nodes with the new \
247  * one, but not always testing the same leaf position */\
248  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
249  if (ssd >= nodes_next[index][pos]->ssd)\
250  continue;\
251  heap_pos[index]++;\
252  node = nodes_next[index][pos];\
253  }\
254  node->ssd = ssd;\
255  node->state = cur_node->state;\
256  UPDATE;\
257  c->paths[index][node->path].value = VALUE;\
258  c->paths[index][node->path].prev = cur_node->path;\
259  /* Sift the newly inserted node up in the heap to restore \
260  * the heap property */\
261  while (pos > 0) {\
262  int parent = (pos - 1) >> 1;\
263  if (nodes_next[index][parent]->ssd <= ssd)\
264  break;\
265  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
266  nodes_next[index][pos]);\
267  pos = parent;\
268  }
269  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
270  }
271  }
272 
273  for (j = 0; j < frontier && nodes[1][j]; j++) {
274  int ihigh;
275  struct TrellisNode *cur_node = nodes[1][j];
276 
277  /* We don't try to get any initial guess for ihigh via
278  * encode_high - since there's only 4 possible values, test
279  * them all. Testing all of these gives a much, much larger
280  * gain than testing a larger range around ilow. */
281  for (ihigh = 0; ihigh < 4; ihigh++) {
282  int dhigh, decoded, dec_diff, pos;
283  uint32_t ssd;
284  struct TrellisNode* node;
285 
286  dhigh = cur_node->state.scale_factor *
287  ff_g722_high_inv_quant[ihigh] >> 10;
288  decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
289  dec_diff = xhigh - decoded;
290 
291  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
292  }
293  }
294 
295  for (j = 0; j < 2; j++) {
296  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
297 
298  if (nodes[j][0]->ssd > (1 << 16)) {
299  for (k = 1; k < frontier && nodes[j][k]; k++)
300  nodes[j][k]->ssd -= nodes[j][0]->ssd;
301  nodes[j][0]->ssd = 0;
302  }
303  }
304 
305  if (i == froze + FREEZE_INTERVAL) {
306  p[0] = &c->paths[0][nodes[0][0]->path];
307  p[1] = &c->paths[1][nodes[1][0]->path];
308  for (j = i; j > froze; j--) {
309  dst[j] = p[1]->value << 6 | p[0]->value;
310  p[0] = &c->paths[0][p[0]->prev];
311  p[1] = &c->paths[1][p[1]->prev];
312  }
313  froze = i;
314  pathn[0] = pathn[1] = 0;
315  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
316  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
317  }
318  }
319 
320  p[0] = &c->paths[0][nodes[0][0]->path];
321  p[1] = &c->paths[1][nodes[1][0]->path];
322  for (j = i; j > froze; j--) {
323  dst[j] = p[1]->value << 6 | p[0]->value;
324  p[0] = &c->paths[0][p[0]->prev];
325  p[1] = &c->paths[1][p[1]->prev];
326  }
327  c->band[0] = nodes[0][0]->state;
328  c->band[1] = nodes[1][0]->state;
329 }
330 
332  const int16_t *samples)
333 {
334  int xlow, xhigh, ilow, ihigh;
335  filter_samples(c, samples, &xlow, &xhigh);
336  ihigh = encode_high(&c->band[1], xhigh);
337  ilow = encode_low (&c->band[0], xlow);
339  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
340  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
341  *dst = ihigh << 6 | ilow;
342 }
343 
345  uint8_t *dst, int nb_samples,
346  const int16_t *samples)
347 {
348  int i;
349  for (i = 0; i < nb_samples; i += 2)
350  encode_byte(c, dst++, &samples[i]);
351 }
352 
353 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
354  const AVFrame *frame, int *got_packet_ptr)
355 {
356  G722Context *c = avctx->priv_data;
357  const int16_t *samples = (const int16_t *)frame->data[0];
358  int nb_samples, out_size, ret;
359 
360  out_size = (frame->nb_samples + 1) / 2;
361  if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
362  return ret;
363 
364  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
365 
366  if (avctx->trellis)
367  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
368  else
369  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
370 
371  /* handle last frame with odd frame_size */
372  if (nb_samples < frame->nb_samples) {
373  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
374  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
375  }
376 
377  if (frame->pts != AV_NOPTS_VALUE)
378  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
379  *got_packet_ptr = 1;
380  return 0;
381 }
382 
384  .name = "g722",
385  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
386  .type = AVMEDIA_TYPE_AUDIO,
388  .priv_data_size = sizeof(G722Context),
390  .close = g722_encode_close,
391  .encode2 = g722_encode_frame,
392  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
393  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
395 };