FFmpeg
af_atempo.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * tempo scaling audio filter -- an implementation of WSOLA algorithm
24  *
25  * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h
26  * from Apprentice Video player by Pavel Koshevoy.
27  * https://sourceforge.net/projects/apprenticevideo/
28  *
29  * An explanation of SOLA algorithm is available at
30  * http://www.surina.net/article/time-and-pitch-scaling.html
31  *
32  * WSOLA is very similar to SOLA, only one major difference exists between
33  * these algorithms. SOLA shifts audio fragments along the output stream,
34  * where as WSOLA shifts audio fragments along the input stream.
35  *
36  * The advantage of WSOLA algorithm is that the overlap region size is
37  * always the same, therefore the blending function is constant and
38  * can be precomputed.
39  */
40 
41 #include <float.h>
42 #include "libavutil/avassert.h"
43 #include "libavutil/avstring.h"
45 #include "libavutil/eval.h"
46 #include "libavutil/opt.h"
47 #include "libavutil/samplefmt.h"
48 #include "libavutil/tx.h"
49 #include "avfilter.h"
50 #include "audio.h"
51 #include "internal.h"
52 
53 /**
54  * A fragment of audio waveform
55  */
56 typedef struct AudioFragment {
57  // index of the first sample of this fragment in the overall waveform;
58  // 0: input sample position
59  // 1: output sample position
60  int64_t position[2];
61 
62  // original packed multi-channel samples:
63  uint8_t *data;
64 
65  // number of samples in this fragment:
66  int nsamples;
67 
68  // rDFT transform of the down-mixed mono fragment, used for
69  // fast waveform alignment via correlation in frequency domain:
70  float *xdat_in;
71  float *xdat;
73 
74 /**
75  * Filter state machine states
76  */
77 typedef enum {
83 } FilterState;
84 
85 /**
86  * Filter state machine
87  */
88 typedef struct ATempoContext {
89  const AVClass *class;
90 
91  // ring-buffer of input samples, necessary because some times
92  // input fragment position may be adjusted backwards:
93  uint8_t *buffer;
94 
95  // ring-buffer maximum capacity, expressed in sample rate time base:
96  int ring;
97 
98  // ring-buffer house keeping:
99  int size;
100  int head;
101  int tail;
102 
103  // 0: input sample position corresponding to the ring buffer tail
104  // 1: output sample position
105  int64_t position[2];
106 
107  // first input timestamp, all other timestamps are offset by this one
108  int64_t start_pts;
109 
110  // sample format:
112 
113  // number of channels:
114  int channels;
115 
116  // row of bytes to skip from one sample to next, across multple channels;
117  // stride = (number-of-channels * bits-per-sample-per-channel) / 8
118  int stride;
119 
120  // fragment window size, power-of-two integer:
121  int window;
122 
123  // Hann window coefficients, for feathering
124  // (blending) the overlapping fragment region:
125  float *hann;
126 
127  // tempo scaling factor:
128  double tempo;
129 
130  // a snapshot of previous fragment input and output position values
131  // captured when the tempo scale factor was set most recently:
132  int64_t origin[2];
133 
134  // current/previous fragment ring-buffer:
136 
137  // current fragment index:
138  uint64_t nfrag;
139 
140  // current state:
142 
143  // for fast correlation calculation in frequency domain:
148  float *correlation;
149 
150  // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
152  uint8_t *dst;
153  uint8_t *dst_end;
154  uint64_t nsamples_in;
155  uint64_t nsamples_out;
156 } ATempoContext;
157 
158 #define YAE_ATEMPO_MIN 0.5
159 #define YAE_ATEMPO_MAX 100.0
160 
161 #define OFFSET(x) offsetof(ATempoContext, x)
162 
163 static const AVOption atempo_options[] = {
164  { "tempo", "set tempo scale factor",
165  OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 },
169  { NULL }
170 };
171 
172 AVFILTER_DEFINE_CLASS(atempo);
173 
175 {
176  return &atempo->frag[atempo->nfrag % 2];
177 }
178 
180 {
181  return &atempo->frag[(atempo->nfrag + 1) % 2];
182 }
183 
184 /**
185  * Reset filter to initial state, do not deallocate existing local buffers.
186  */
187 static void yae_clear(ATempoContext *atempo)
188 {
189  atempo->size = 0;
190  atempo->head = 0;
191  atempo->tail = 0;
192 
193  atempo->nfrag = 0;
194  atempo->state = YAE_LOAD_FRAGMENT;
195  atempo->start_pts = AV_NOPTS_VALUE;
196 
197  atempo->position[0] = 0;
198  atempo->position[1] = 0;
199 
200  atempo->origin[0] = 0;
201  atempo->origin[1] = 0;
202 
203  atempo->frag[0].position[0] = 0;
204  atempo->frag[0].position[1] = 0;
205  atempo->frag[0].nsamples = 0;
206 
207  atempo->frag[1].position[0] = 0;
208  atempo->frag[1].position[1] = 0;
209  atempo->frag[1].nsamples = 0;
210 
211  // shift left position of 1st fragment by half a window
212  // so that no re-normalization would be required for
213  // the left half of the 1st fragment:
214  atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2);
215  atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2);
216 
217  av_frame_free(&atempo->dst_buffer);
218  atempo->dst = NULL;
219  atempo->dst_end = NULL;
220 
221  atempo->nsamples_in = 0;
222  atempo->nsamples_out = 0;
223 }
224 
225 /**
226  * Reset filter to initial state and deallocate all buffers.
227  */
228 static void yae_release_buffers(ATempoContext *atempo)
229 {
230  yae_clear(atempo);
231 
232  av_freep(&atempo->frag[0].data);
233  av_freep(&atempo->frag[1].data);
234  av_freep(&atempo->frag[0].xdat_in);
235  av_freep(&atempo->frag[1].xdat_in);
236  av_freep(&atempo->frag[0].xdat);
237  av_freep(&atempo->frag[1].xdat);
238 
239  av_freep(&atempo->buffer);
240  av_freep(&atempo->hann);
241  av_freep(&atempo->correlation_in);
242  av_freep(&atempo->correlation);
243 
244  av_tx_uninit(&atempo->real_to_complex);
245  av_tx_uninit(&atempo->complex_to_real);
246 }
247 
248 /* av_realloc is not aligned enough; fortunately, the data does not need to
249  * be preserved */
250 #define RE_MALLOC_OR_FAIL(field, field_size) \
251  do { \
252  av_freep(&field); \
253  field = av_calloc(field_size, 1); \
254  if (!field) { \
255  yae_release_buffers(atempo); \
256  return AVERROR(ENOMEM); \
257  } \
258  } while (0)
259 
260 /**
261  * Prepare filter for processing audio data of given format,
262  * sample rate and number of channels.
263  */
264 static int yae_reset(ATempoContext *atempo,
265  enum AVSampleFormat format,
266  int sample_rate,
267  int channels)
268 {
269  const int sample_size = av_get_bytes_per_sample(format);
270  uint32_t nlevels = 0;
271  float scale = 1.f, iscale = 1.f;
272  uint32_t pot;
273  int i;
274 
275  atempo->format = format;
276  atempo->channels = channels;
277  atempo->stride = sample_size * channels;
278 
279  // pick a segment window size:
280  atempo->window = sample_rate / 24;
281 
282  // adjust window size to be a power-of-two integer:
283  nlevels = av_log2(atempo->window);
284  pot = 1 << nlevels;
285  av_assert0(pot <= atempo->window);
286 
287  if (pot < atempo->window) {
288  atempo->window = pot * 2;
289  nlevels++;
290  }
291 
292  // initialize audio fragment buffers:
293  RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride);
294  RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride);
295  RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
296  RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
297  RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
298  RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
299 
300  // initialize rDFT contexts:
301  av_tx_uninit(&atempo->real_to_complex);
302  av_tx_uninit(&atempo->complex_to_real);
303 
304  av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn, AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
305  if (!atempo->real_to_complex) {
306  yae_release_buffers(atempo);
307  return AVERROR(ENOMEM);
308  }
309 
310  av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn, AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
311  if (!atempo->complex_to_real) {
312  yae_release_buffers(atempo);
313  return AVERROR(ENOMEM);
314  }
315 
316  RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) * sizeof(AVComplexFloat));
317  RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(AVComplexFloat));
318 
319  atempo->ring = atempo->window * 3;
320  RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
321 
322  // initialize the Hann window function:
323  RE_MALLOC_OR_FAIL(atempo->hann, atempo->window * sizeof(float));
324 
325  for (i = 0; i < atempo->window; i++) {
326  double t = (double)i / (double)(atempo->window - 1);
327  double h = 0.5 * (1.0 - cos(2.0 * M_PI * t));
328  atempo->hann[i] = (float)h;
329  }
330 
331  yae_clear(atempo);
332  return 0;
333 }
334 
336 {
337  const AudioFragment *prev;
338  ATempoContext *atempo = ctx->priv;
339 
340  prev = yae_prev_frag(atempo);
341  atempo->origin[0] = prev->position[0] + atempo->window / 2;
342  atempo->origin[1] = prev->position[1] + atempo->window / 2;
343  return 0;
344 }
345 
346 /**
347  * A helper macro for initializing complex data buffer with scalar data
348  * of a given type.
349  */
350 #define yae_init_xdat(scalar_type, scalar_max) \
351  do { \
352  const uint8_t *src_end = src + \
353  frag->nsamples * atempo->channels * sizeof(scalar_type); \
354  \
355  float *xdat = frag->xdat_in; \
356  scalar_type tmp; \
357  \
358  if (atempo->channels == 1) { \
359  for (; src < src_end; xdat++) { \
360  tmp = *(const scalar_type *)src; \
361  src += sizeof(scalar_type); \
362  \
363  *xdat = (float)tmp; \
364  } \
365  } else { \
366  float s, max, ti, si; \
367  int i; \
368  \
369  for (; src < src_end; xdat++) { \
370  tmp = *(const scalar_type *)src; \
371  src += sizeof(scalar_type); \
372  \
373  max = (float)tmp; \
374  s = FFMIN((float)scalar_max, \
375  (float)fabsf(max)); \
376  \
377  for (i = 1; i < atempo->channels; i++) { \
378  tmp = *(const scalar_type *)src; \
379  src += sizeof(scalar_type); \
380  \
381  ti = (float)tmp; \
382  si = FFMIN((float)scalar_max, \
383  (float)fabsf(ti)); \
384  \
385  if (s < si) { \
386  s = si; \
387  max = ti; \
388  } \
389  } \
390  \
391  *xdat = max; \
392  } \
393  } \
394  } while (0)
395 
396 /**
397  * Initialize complex data buffer of a given audio fragment
398  * with down-mixed mono data of appropriate scalar type.
399  */
400 static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
401 {
402  // shortcuts:
403  const uint8_t *src = frag->data;
404 
405  // init complex data buffer used for FFT and Correlation:
406  memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * (atempo->window + 1));
407 
408  if (atempo->format == AV_SAMPLE_FMT_U8) {
409  yae_init_xdat(uint8_t, 127);
410  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
411  yae_init_xdat(int16_t, 32767);
412  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
413  yae_init_xdat(int, 2147483647);
414  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
415  yae_init_xdat(float, 1);
416  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
417  yae_init_xdat(double, 1);
418  }
419 }
420 
421 /**
422  * Populate the internal data buffer on as-needed basis.
423  *
424  * @return
425  * 0 if requested data was already available or was successfully loaded,
426  * AVERROR(EAGAIN) if more input data is required.
427  */
428 static int yae_load_data(ATempoContext *atempo,
429  const uint8_t **src_ref,
430  const uint8_t *src_end,
431  int64_t stop_here)
432 {
433  // shortcut:
434  const uint8_t *src = *src_ref;
435  const int read_size = stop_here - atempo->position[0];
436 
437  if (stop_here <= atempo->position[0]) {
438  return 0;
439  }
440 
441  // samples are not expected to be skipped, unless tempo is greater than 2:
442  av_assert0(read_size <= atempo->ring || atempo->tempo > 2.0);
443 
444  while (atempo->position[0] < stop_here && src < src_end) {
445  int src_samples = (src_end - src) / atempo->stride;
446 
447  // load data piece-wise, in order to avoid complicating the logic:
448  int nsamples = FFMIN(read_size, src_samples);
449  int na;
450  int nb;
451 
452  nsamples = FFMIN(nsamples, atempo->ring);
453  na = FFMIN(nsamples, atempo->ring - atempo->tail);
454  nb = FFMIN(nsamples - na, atempo->ring);
455 
456  if (na) {
457  uint8_t *a = atempo->buffer + atempo->tail * atempo->stride;
458  memcpy(a, src, na * atempo->stride);
459 
460  src += na * atempo->stride;
461  atempo->position[0] += na;
462 
463  atempo->size = FFMIN(atempo->size + na, atempo->ring);
464  atempo->tail = (atempo->tail + na) % atempo->ring;
465  atempo->head =
466  atempo->size < atempo->ring ?
467  atempo->tail - atempo->size :
468  atempo->tail;
469  }
470 
471  if (nb) {
472  uint8_t *b = atempo->buffer;
473  memcpy(b, src, nb * atempo->stride);
474 
475  src += nb * atempo->stride;
476  atempo->position[0] += nb;
477 
478  atempo->size = FFMIN(atempo->size + nb, atempo->ring);
479  atempo->tail = (atempo->tail + nb) % atempo->ring;
480  atempo->head =
481  atempo->size < atempo->ring ?
482  atempo->tail - atempo->size :
483  atempo->tail;
484  }
485  }
486 
487  // pass back the updated source buffer pointer:
488  *src_ref = src;
489 
490  // sanity check:
491  av_assert0(atempo->position[0] <= stop_here);
492 
493  return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN);
494 }
495 
496 /**
497  * Populate current audio fragment data buffer.
498  *
499  * @return
500  * 0 when the fragment is ready,
501  * AVERROR(EAGAIN) if more input data is required.
502  */
503 static int yae_load_frag(ATempoContext *atempo,
504  const uint8_t **src_ref,
505  const uint8_t *src_end)
506 {
507  // shortcuts:
508  AudioFragment *frag = yae_curr_frag(atempo);
509  uint8_t *dst;
510  int64_t missing, start, zeros;
511  uint32_t nsamples;
512  const uint8_t *a, *b;
513  int i0, i1, n0, n1, na, nb;
514 
515  int64_t stop_here = frag->position[0] + atempo->window;
516  if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) {
517  return AVERROR(EAGAIN);
518  }
519 
520  // calculate the number of samples we don't have:
521  missing =
522  stop_here > atempo->position[0] ?
523  stop_here - atempo->position[0] : 0;
524 
525  nsamples =
526  missing < (int64_t)atempo->window ?
527  (uint32_t)(atempo->window - missing) : 0;
528 
529  // setup the output buffer:
530  frag->nsamples = nsamples;
531  dst = frag->data;
532 
533  start = atempo->position[0] - atempo->size;
534  zeros = 0;
535 
536  if (frag->position[0] < start) {
537  // what we don't have we substitute with zeros:
538  zeros = FFMIN(start - frag->position[0], (int64_t)nsamples);
539  av_assert0(zeros != nsamples);
540 
541  memset(dst, 0, zeros * atempo->stride);
542  dst += zeros * atempo->stride;
543  }
544 
545  if (zeros == nsamples) {
546  return 0;
547  }
548 
549  // get the remaining data from the ring buffer:
550  na = (atempo->head < atempo->tail ?
551  atempo->tail - atempo->head :
552  atempo->ring - atempo->head);
553 
554  nb = atempo->head < atempo->tail ? 0 : atempo->tail;
555 
556  // sanity check:
557  av_assert0(nsamples <= zeros + na + nb);
558 
559  a = atempo->buffer + atempo->head * atempo->stride;
560  b = atempo->buffer;
561 
562  i0 = frag->position[0] + zeros - start;
563  i1 = i0 < na ? 0 : i0 - na;
564 
565  n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0;
566  n1 = nsamples - zeros - n0;
567 
568  if (n0) {
569  memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride);
570  dst += n0 * atempo->stride;
571  }
572 
573  if (n1) {
574  memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride);
575  }
576 
577  return 0;
578 }
579 
580 /**
581  * Prepare for loading next audio fragment.
582  */
584 {
585  const double fragment_step = atempo->tempo * (double)(atempo->window / 2);
586 
587  const AudioFragment *prev;
588  AudioFragment *frag;
589 
590  atempo->nfrag++;
591  prev = yae_prev_frag(atempo);
592  frag = yae_curr_frag(atempo);
593 
594  frag->position[0] = prev->position[0] + (int64_t)fragment_step;
595  frag->position[1] = prev->position[1] + atempo->window / 2;
596  frag->nsamples = 0;
597 }
598 
599 /**
600  * Calculate cross-correlation via rDFT.
601  *
602  * Multiply two vectors of complex numbers (result of real_to_complex rDFT)
603  * and transform back via complex_to_real rDFT.
604  */
605 static void yae_xcorr_via_rdft(float *xcorr_in,
606  float *xcorr,
607  AVTXContext *complex_to_real,
608  av_tx_fn c2r_fn,
609  const AVComplexFloat *xa,
610  const AVComplexFloat *xb,
611  const int window)
612 {
613  AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
614  int i;
615 
616  for (i = 0; i <= window; i++, xa++, xb++, xc++) {
617  xc->re = (xa->re * xb->re + xa->im * xb->im);
618  xc->im = (xa->im * xb->re - xa->re * xb->im);
619  }
620 
621  // apply inverse rDFT:
622  c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(*xc));
623 }
624 
625 /**
626  * Calculate alignment offset for given fragment
627  * relative to the previous fragment.
628  *
629  * @return alignment offset of current fragment relative to previous.
630  */
631 static int yae_align(AudioFragment *frag,
632  const AudioFragment *prev,
633  const int window,
634  const int delta_max,
635  const int drift,
636  float *correlation_in,
637  float *correlation,
638  AVTXContext *complex_to_real,
639  av_tx_fn c2r_fn)
640 {
641  int best_offset = -drift;
642  float best_metric = -FLT_MAX;
643  float *xcorr;
644 
645  int i0;
646  int i1;
647  int i;
648 
649  yae_xcorr_via_rdft(correlation_in,
650  correlation,
651  complex_to_real,
652  c2r_fn,
653  (const AVComplexFloat *)prev->xdat,
654  (const AVComplexFloat *)frag->xdat,
655  window);
656 
657  // identify search window boundaries:
658  i0 = FFMAX(window / 2 - delta_max - drift, 0);
659  i0 = FFMIN(i0, window);
660 
661  i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16);
662  i1 = FFMAX(i1, 0);
663 
664  // identify cross-correlation peaks within search window:
665  xcorr = correlation + i0;
666 
667  for (i = i0; i < i1; i++, xcorr++) {
668  float metric = *xcorr;
669 
670  // normalize:
671  float drifti = (float)(drift + i);
672  metric *= drifti * (float)(i - i0) * (float)(i1 - i);
673 
674  if (metric > best_metric) {
675  best_metric = metric;
676  best_offset = i - window / 2;
677  }
678  }
679 
680  return best_offset;
681 }
682 
683 /**
684  * Adjust current fragment position for better alignment
685  * with previous fragment.
686  *
687  * @return alignment correction.
688  */
690 {
691  const AudioFragment *prev = yae_prev_frag(atempo);
692  AudioFragment *frag = yae_curr_frag(atempo);
693 
694  const double prev_output_position =
695  (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2) *
696  atempo->tempo;
697 
698  const double ideal_output_position =
699  (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2);
700 
701  const int drift = (int)(prev_output_position - ideal_output_position);
702 
703  const int delta_max = atempo->window / 2;
704  const int correction = yae_align(frag,
705  prev,
706  atempo->window,
707  delta_max,
708  drift,
709  atempo->correlation_in,
710  atempo->correlation,
711  atempo->complex_to_real,
712  atempo->c2r_fn);
713 
714  if (correction) {
715  // adjust fragment position:
716  frag->position[0] -= correction;
717 
718  // clear so that the fragment can be reloaded:
719  frag->nsamples = 0;
720  }
721 
722  return correction;
723 }
724 
725 /**
726  * A helper macro for blending the overlap region of previous
727  * and current audio fragment.
728  */
729 #define yae_blend(scalar_type) \
730  do { \
731  const scalar_type *aaa = (const scalar_type *)a; \
732  const scalar_type *bbb = (const scalar_type *)b; \
733  \
734  scalar_type *out = (scalar_type *)dst; \
735  scalar_type *out_end = (scalar_type *)dst_end; \
736  int64_t i; \
737  \
738  for (i = 0; i < overlap && out < out_end; \
739  i++, atempo->position[1]++, wa++, wb++) { \
740  float w0 = *wa; \
741  float w1 = *wb; \
742  int j; \
743  \
744  for (j = 0; j < atempo->channels; \
745  j++, aaa++, bbb++, out++) { \
746  float t0 = (float)*aaa; \
747  float t1 = (float)*bbb; \
748  \
749  *out = \
750  frag->position[0] + i < 0 ? \
751  *aaa : \
752  (scalar_type)(t0 * w0 + t1 * w1); \
753  } \
754  } \
755  dst = (uint8_t *)out; \
756  } while (0)
757 
758 /**
759  * Blend the overlap region of previous and current audio fragment
760  * and output the results to the given destination buffer.
761  *
762  * @return
763  * 0 if the overlap region was completely stored in the dst buffer,
764  * AVERROR(EAGAIN) if more destination buffer space is required.
765  */
766 static int yae_overlap_add(ATempoContext *atempo,
767  uint8_t **dst_ref,
768  uint8_t *dst_end)
769 {
770  // shortcuts:
771  const AudioFragment *prev = yae_prev_frag(atempo);
772  const AudioFragment *frag = yae_curr_frag(atempo);
773 
774  const int64_t start_here = FFMAX(atempo->position[1],
775  frag->position[1]);
776 
777  const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples,
778  frag->position[1] + frag->nsamples);
779 
780  const int64_t overlap = stop_here - start_here;
781 
782  const int64_t ia = start_here - prev->position[1];
783  const int64_t ib = start_here - frag->position[1];
784 
785  const float *wa = atempo->hann + ia;
786  const float *wb = atempo->hann + ib;
787 
788  const uint8_t *a = prev->data + ia * atempo->stride;
789  const uint8_t *b = frag->data + ib * atempo->stride;
790 
791  uint8_t *dst = *dst_ref;
792 
793  av_assert0(start_here <= stop_here &&
794  frag->position[1] <= start_here &&
795  overlap <= frag->nsamples);
796 
797  if (atempo->format == AV_SAMPLE_FMT_U8) {
798  yae_blend(uint8_t);
799  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
800  yae_blend(int16_t);
801  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
802  yae_blend(int);
803  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
804  yae_blend(float);
805  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
806  yae_blend(double);
807  }
808 
809  // pass-back the updated destination buffer pointer:
810  *dst_ref = dst;
811 
812  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
813 }
814 
815 /**
816  * Feed as much data to the filter as it is able to consume
817  * and receive as much processed data in the destination buffer
818  * as it is able to produce or store.
819  */
820 static void
822  const uint8_t **src_ref,
823  const uint8_t *src_end,
824  uint8_t **dst_ref,
825  uint8_t *dst_end)
826 {
827  while (1) {
828  if (atempo->state == YAE_LOAD_FRAGMENT) {
829  // load additional data for the current fragment:
830  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
831  break;
832  }
833 
834  // down-mix to mono:
835  yae_downmix(atempo, yae_curr_frag(atempo));
836 
837  // apply rDFT:
838  atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
839 
840  // must load the second fragment before alignment can start:
841  if (!atempo->nfrag) {
842  yae_advance_to_next_frag(atempo);
843  continue;
844  }
845 
846  atempo->state = YAE_ADJUST_POSITION;
847  }
848 
849  if (atempo->state == YAE_ADJUST_POSITION) {
850  // adjust position for better alignment:
851  if (yae_adjust_position(atempo)) {
852  // reload the fragment at the corrected position, so that the
853  // Hann window blending would not require normalization:
854  atempo->state = YAE_RELOAD_FRAGMENT;
855  } else {
856  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
857  }
858  }
859 
860  if (atempo->state == YAE_RELOAD_FRAGMENT) {
861  // load additional data if necessary due to position adjustment:
862  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
863  break;
864  }
865 
866  // down-mix to mono:
867  yae_downmix(atempo, yae_curr_frag(atempo));
868 
869  // apply rDFT:
870  atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
871 
872  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
873  }
874 
875  if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) {
876  // overlap-add and output the result:
877  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
878  break;
879  }
880 
881  // advance to the next fragment, repeat:
882  yae_advance_to_next_frag(atempo);
883  atempo->state = YAE_LOAD_FRAGMENT;
884  }
885  }
886 }
887 
888 /**
889  * Flush any buffered data from the filter.
890  *
891  * @return
892  * 0 if all data was completely stored in the dst buffer,
893  * AVERROR(EAGAIN) if more destination buffer space is required.
894  */
895 static int yae_flush(ATempoContext *atempo,
896  uint8_t **dst_ref,
897  uint8_t *dst_end)
898 {
899  AudioFragment *frag = yae_curr_frag(atempo);
900  int64_t overlap_end;
901  int64_t start_here;
902  int64_t stop_here;
903  int64_t offset;
904 
905  const uint8_t *src;
906  uint8_t *dst;
907 
908  int src_size;
909  int dst_size;
910  int nbytes;
911 
912  atempo->state = YAE_FLUSH_OUTPUT;
913 
914  if (!atempo->nfrag) {
915  // there is nothing to flush:
916  return 0;
917  }
918 
919  if (atempo->position[0] == frag->position[0] + frag->nsamples &&
920  atempo->position[1] == frag->position[1] + frag->nsamples) {
921  // the current fragment is already flushed:
922  return 0;
923  }
924 
925  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
926  // finish loading the current (possibly partial) fragment:
927  yae_load_frag(atempo, NULL, NULL);
928 
929  if (atempo->nfrag) {
930  // down-mix to mono:
931  yae_downmix(atempo, frag);
932 
933  // apply rDFT:
934  atempo->r2c_fn(atempo->real_to_complex, frag->xdat, frag->xdat_in, sizeof(float));
935 
936  // align current fragment to previous fragment:
937  if (yae_adjust_position(atempo)) {
938  // reload the current fragment due to adjusted position:
939  yae_load_frag(atempo, NULL, NULL);
940  }
941  }
942  }
943 
944  // flush the overlap region:
945  overlap_end = frag->position[1] + FFMIN(atempo->window / 2,
946  frag->nsamples);
947 
948  while (atempo->position[1] < overlap_end) {
949  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
950  return AVERROR(EAGAIN);
951  }
952  }
953 
954  // check whether all of the input samples have been consumed:
955  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
956  yae_advance_to_next_frag(atempo);
957  return AVERROR(EAGAIN);
958  }
959 
960  // flush the remainder of the current fragment:
961  start_here = FFMAX(atempo->position[1], overlap_end);
962  stop_here = frag->position[1] + frag->nsamples;
963  offset = start_here - frag->position[1];
964  av_assert0(start_here <= stop_here && frag->position[1] <= start_here);
965 
966  src = frag->data + offset * atempo->stride;
967  dst = (uint8_t *)*dst_ref;
968 
969  src_size = (int)(stop_here - start_here) * atempo->stride;
970  dst_size = dst_end - dst;
971  nbytes = FFMIN(src_size, dst_size);
972 
973  memcpy(dst, src, nbytes);
974  dst += nbytes;
975 
976  atempo->position[1] += (nbytes / atempo->stride);
977 
978  // pass-back the updated destination buffer pointer:
979  *dst_ref = (uint8_t *)dst;
980 
981  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
982 }
983 
985 {
986  ATempoContext *atempo = ctx->priv;
987  atempo->format = AV_SAMPLE_FMT_NONE;
988  atempo->state = YAE_LOAD_FRAGMENT;
989  return 0;
990 }
991 
993 {
994  ATempoContext *atempo = ctx->priv;
995  yae_release_buffers(atempo);
996 }
997 
998  // WSOLA necessitates an internal sliding window ring buffer
999  // for incoming audio stream.
1000  //
1001  // Planar sample formats are too cumbersome to store in a ring buffer,
1002  // therefore planar sample formats are not supported.
1003  //
1004  static const enum AVSampleFormat sample_fmts[] = {
1011  };
1012 
1014 {
1015  AVFilterContext *ctx = inlink->dst;
1016  ATempoContext *atempo = ctx->priv;
1017 
1018  enum AVSampleFormat format = inlink->format;
1019  int sample_rate = (int)inlink->sample_rate;
1020 
1021  return yae_reset(atempo, format, sample_rate, inlink->ch_layout.nb_channels);
1022 }
1023 
1024 static int push_samples(ATempoContext *atempo,
1025  AVFilterLink *outlink,
1026  int n_out)
1027 {
1028  int ret;
1029 
1030  atempo->dst_buffer->sample_rate = outlink->sample_rate;
1031  atempo->dst_buffer->nb_samples = n_out;
1032 
1033  // adjust the PTS:
1034  atempo->dst_buffer->pts = atempo->start_pts +
1035  av_rescale_q(atempo->nsamples_out,
1036  (AVRational){ 1, outlink->sample_rate },
1037  outlink->time_base);
1038 
1039  ret = ff_filter_frame(outlink, atempo->dst_buffer);
1040  atempo->dst_buffer = NULL;
1041  atempo->dst = NULL;
1042  atempo->dst_end = NULL;
1043  if (ret < 0)
1044  return ret;
1045 
1046  atempo->nsamples_out += n_out;
1047  return 0;
1048 }
1049 
1050 static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
1051 {
1052  AVFilterContext *ctx = inlink->dst;
1053  ATempoContext *atempo = ctx->priv;
1054  AVFilterLink *outlink = ctx->outputs[0];
1055 
1056  int ret = 0;
1057  int n_in = src_buffer->nb_samples;
1058  int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo);
1059 
1060  const uint8_t *src = src_buffer->data[0];
1061  const uint8_t *src_end = src + n_in * atempo->stride;
1062 
1063  if (atempo->start_pts == AV_NOPTS_VALUE)
1064  atempo->start_pts = av_rescale_q(src_buffer->pts,
1065  inlink->time_base,
1066  outlink->time_base);
1067 
1068  while (src < src_end) {
1069  if (!atempo->dst_buffer) {
1070  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out);
1071  if (!atempo->dst_buffer) {
1072  av_frame_free(&src_buffer);
1073  return AVERROR(ENOMEM);
1074  }
1075  av_frame_copy_props(atempo->dst_buffer, src_buffer);
1076 
1077  atempo->dst = atempo->dst_buffer->data[0];
1078  atempo->dst_end = atempo->dst + n_out * atempo->stride;
1079  }
1080 
1081  yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end);
1082 
1083  if (atempo->dst == atempo->dst_end) {
1084  int n_samples = ((atempo->dst - atempo->dst_buffer->data[0]) /
1085  atempo->stride);
1086  ret = push_samples(atempo, outlink, n_samples);
1087  if (ret < 0)
1088  goto end;
1089  }
1090  }
1091 
1092  atempo->nsamples_in += n_in;
1093 end:
1094  av_frame_free(&src_buffer);
1095  return ret;
1096 }
1097 
1098 static int request_frame(AVFilterLink *outlink)
1099 {
1100  AVFilterContext *ctx = outlink->src;
1101  ATempoContext *atempo = ctx->priv;
1102  int ret;
1103 
1104  ret = ff_request_frame(ctx->inputs[0]);
1105 
1106  if (ret == AVERROR_EOF) {
1107  // flush the filter:
1108  int n_max = atempo->ring;
1109  int n_out;
1110  int err = AVERROR(EAGAIN);
1111 
1112  while (err == AVERROR(EAGAIN)) {
1113  if (!atempo->dst_buffer) {
1114  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max);
1115  if (!atempo->dst_buffer)
1116  return AVERROR(ENOMEM);
1117 
1118  atempo->dst = atempo->dst_buffer->data[0];
1119  atempo->dst_end = atempo->dst + n_max * atempo->stride;
1120  }
1121 
1122  err = yae_flush(atempo, &atempo->dst, atempo->dst_end);
1123 
1124  n_out = ((atempo->dst - atempo->dst_buffer->data[0]) /
1125  atempo->stride);
1126 
1127  if (n_out) {
1128  ret = push_samples(atempo, outlink, n_out);
1129  if (ret < 0)
1130  return ret;
1131  }
1132  }
1133 
1134  av_frame_free(&atempo->dst_buffer);
1135  atempo->dst = NULL;
1136  atempo->dst_end = NULL;
1137 
1138  return AVERROR_EOF;
1139  }
1140 
1141  return ret;
1142 }
1143 
1145  const char *cmd,
1146  const char *arg,
1147  char *res,
1148  int res_len,
1149  int flags)
1150 {
1151  int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
1152 
1153  if (ret < 0)
1154  return ret;
1155 
1156  return yae_update(ctx);
1157 }
1158 
1159 static const AVFilterPad atempo_inputs[] = {
1160  {
1161  .name = "default",
1162  .type = AVMEDIA_TYPE_AUDIO,
1163  .filter_frame = filter_frame,
1164  .config_props = config_props,
1165  },
1166 };
1167 
1168 static const AVFilterPad atempo_outputs[] = {
1169  {
1170  .name = "default",
1171  .request_frame = request_frame,
1172  .type = AVMEDIA_TYPE_AUDIO,
1173  },
1174 };
1175 
1177  .name = "atempo",
1178  .description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."),
1179  .init = init,
1180  .uninit = uninit,
1181  .process_command = process_command,
1182  .priv_size = sizeof(ATempoContext),
1183  .priv_class = &atempo_class,
1187 };
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:100
yae_update
static int yae_update(AVFilterContext *ctx)
Definition: af_atempo.c:335
ATempoContext::stride
int stride
Definition: af_atempo.c:118
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
push_samples
static int push_samples(ATempoContext *atempo, AVFilterLink *outlink, int n_out)
Definition: af_atempo.c:1024
ATempoContext::channels
int channels
Definition: af_atempo.c:114
config_props
static int config_props(AVFilterLink *inlink)
Definition: af_atempo.c:1013
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:969
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
ATempoContext::size
int size
Definition: af_atempo.c:99
AVTXContext
Definition: tx_priv.h:228
ATempoContext::dst_end
uint8_t * dst_end
Definition: af_atempo.c:153
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
yae_downmix
static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
Initialize complex data buffer of a given audio fragment with down-mixed mono data of appropriate sca...
Definition: af_atempo.c:400
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:99
YAE_FLUSH_OUTPUT
@ YAE_FLUSH_OUTPUT
Definition: af_atempo.c:82
yae_load_data
static int yae_load_data(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, int64_t stop_here)
Populate the internal data buffer on as-needed basis.
Definition: af_atempo.c:428
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:330
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:437
AVOption
AVOption.
Definition: opt.h:251
b
#define b
Definition: input.c:41
ATempoContext::position
int64_t position[2]
Definition: af_atempo.c:105
YAE_RELOAD_FRAGMENT
@ YAE_RELOAD_FRAGMENT
Definition: af_atempo.c:80
ff_request_frame
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:415
AV_OPT_FLAG_RUNTIME_PARAM
#define AV_OPT_FLAG_RUNTIME_PARAM
a generic parameter which can be set by the user at runtime
Definition: opt.h:296
ATempoContext::nsamples_out
uint64_t nsamples_out
Definition: af_atempo.c:155
float.h
AVComplexFloat
Definition: tx.h:27
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:165
AudioFragment::xdat_in
float * xdat_in
Definition: af_atempo.c:70
OFFSET
#define OFFSET(x)
Definition: af_atempo.c:161
ATempoContext::frag
AudioFragment frag[2]
Definition: af_atempo.c:135
AV_OPT_FLAG_FILTERING_PARAM
#define AV_OPT_FLAG_FILTERING_PARAM
a generic parameter which can be set by the user for filtering
Definition: opt.h:297
ATempoContext::tail
int tail
Definition: af_atempo.c:101
sample_rate
sample_rate
Definition: ffmpeg_filter.c:156
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:883
init
static av_cold int init(AVFilterContext *ctx)
Definition: af_atempo.c:984
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:351
ATempoContext::c2r_fn
av_tx_fn c2r_fn
Definition: af_atempo.c:146
ATempoContext
Filter state machine.
Definition: af_atempo.c:88
AVComplexFloat::im
float im
Definition: tx.h:28
window
static SDL_Window * window
Definition: ffplay.c:365
ATempoContext::complex_to_real
AVTXContext * complex_to_real
Definition: af_atempo.c:145
YAE_ADJUST_POSITION
@ YAE_ADJUST_POSITION
Definition: af_atempo.c:79
samplefmt.h
ATempoContext::state
FilterState state
Definition: af_atempo.c:141
scale
static av_always_inline float scale(float x, float s)
Definition: vf_v360.c:1389
ATempoContext::origin
int64_t origin[2]
Definition: af_atempo.c:132
atempo_outputs
static const AVFilterPad atempo_outputs[]
Definition: af_atempo.c:1168
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:49
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
yae_apply
static void yae_apply(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, uint8_t **dst_ref, uint8_t *dst_end)
Feed as much data to the filter as it is able to consume and receive as much processed data in the de...
Definition: af_atempo.c:821
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:127
float
float
Definition: af_crystalizer.c:122
ATempoContext::dst
uint8_t * dst
Definition: af_atempo.c:152
format
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate. The lists are not just lists
yae_init_xdat
#define yae_init_xdat(scalar_type, scalar_max)
A helper macro for initializing complex data buffer with scalar data of a given type.
Definition: af_atempo.c:350
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
YAE_ATEMPO_MAX
#define YAE_ATEMPO_MAX
Definition: af_atempo.c:159
YAE_OUTPUT_OVERLAP_ADD
@ YAE_OUTPUT_OVERLAP_ADD
Definition: af_atempo.c:81
ctx
AVFormatContext * ctx
Definition: movenc.c:48
channels
channels
Definition: aptx.h:31
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
ff_af_atempo
const AVFilter ff_af_atempo
Definition: af_atempo.c:1176
ATempoContext::buffer
uint8_t * buffer
Definition: af_atempo.c:93
ATempoContext::ring
int ring
Definition: af_atempo.c:96
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:194
AV_OPT_FLAG_AUDIO_PARAM
#define AV_OPT_FLAG_AUDIO_PARAM
Definition: opt.h:283
RE_MALLOC_OR_FAIL
#define RE_MALLOC_OR_FAIL(field, field_size)
Definition: af_atempo.c:250
arg
const char * arg
Definition: jacosubdec.c:67
ATempoContext::tempo
double tempo
Definition: af_atempo.c:128
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:594
AudioFragment::position
int64_t position[2]
Definition: af_atempo.c:60
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
ATempoContext::head
int head
Definition: af_atempo.c:100
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(atempo)
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
Definition: af_atempo.c:1050
yae_xcorr_via_rdft
static void yae_xcorr_via_rdft(float *xcorr_in, float *xcorr, AVTXContext *complex_to_real, av_tx_fn c2r_fn, const AVComplexFloat *xa, const AVComplexFloat *xb, const int window)
Calculate cross-correlation via rDFT.
Definition: af_atempo.c:605
double
double
Definition: af_crystalizer.c:132
yae_curr_frag
static AudioFragment * yae_curr_frag(ATempoContext *atempo)
Definition: af_atempo.c:174
yae_reset
static int yae_reset(ATempoContext *atempo, enum AVSampleFormat format, int sample_rate, int channels)
Prepare filter for processing audio data of given format, sample rate and number of channels.
Definition: af_atempo.c:264
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Definition: af_atempo.c:1144
yae_overlap_add
static int yae_overlap_add(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Blend the overlap region of previous and current audio fragment and output the results to the given d...
Definition: af_atempo.c:766
ATempoContext::nsamples_in
uint64_t nsamples_in
Definition: af_atempo.c:154
eval.h
yae_load_frag
static int yae_load_frag(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end)
Populate current audio fragment data buffer.
Definition: af_atempo.c:503
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:115
yae_prev_frag
static AudioFragment * yae_prev_frag(ATempoContext *atempo)
Definition: af_atempo.c:179
AVFrame::sample_rate
int sample_rate
Sample rate of the audio data.
Definition: frame.h:516
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:56
AVComplexFloat::re
float re
Definition: tx.h:28
AV_NOPTS_VALUE
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
FILTER_SAMPLEFMTS_ARRAY
#define FILTER_SAMPLEFMTS_ARRAY(array)
Definition: internal.h:177
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:842
ATempoContext::hann
float * hann
Definition: af_atempo.c:125
AudioFragment::nsamples
int nsamples
Definition: af_atempo.c:66
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AudioFragment::data
uint8_t * data
Definition: af_atempo.c:63
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
yae_blend
#define yae_blend(scalar_type)
A helper macro for blending the overlap region of previous and current audio fragment.
Definition: af_atempo.c:729
M_PI
#define M_PI
Definition: mathematics.h:52
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:294
internal.h
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:410
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
av_get_bytes_per_sample
int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt)
Return number of bytes per sample.
Definition: samplefmt.c:108
YAE_ATEMPO_MIN
#define YAE_ATEMPO_MIN
Definition: af_atempo.c:158
AV_SAMPLE_FMT_U8
@ AV_SAMPLE_FMT_U8
unsigned 8 bits
Definition: samplefmt.h:57
yae_advance_to_next_frag
static void yae_advance_to_next_frag(ATempoContext *atempo)
Prepare for loading next audio fragment.
Definition: af_atempo.c:583
ATempoContext::dst_buffer
AVFrame * dst_buffer
Definition: af_atempo.c:151
sample_fmts
static enum AVSampleFormat sample_fmts[]
Definition: af_atempo.c:1004
yae_clear
static void yae_clear(ATempoContext *atempo)
Reset filter to initial state, do not deallocate existing local buffers.
Definition: af_atempo.c:187
ATempoContext::r2c_fn
av_tx_fn r2c_fn
Definition: af_atempo.c:146
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:55
ATempoContext::window
int window
Definition: af_atempo.c:121
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_SAMPLE_FMT_S16
@ AV_SAMPLE_FMT_S16
signed 16 bits
Definition: samplefmt.h:58
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:55
atempo_inputs
static const AVFilterPad atempo_inputs[]
Definition: af_atempo.c:1159
yae_flush
static int yae_flush(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Flush any buffered data from the filter.
Definition: af_atempo.c:895
AVFilter
Filter definition.
Definition: avfilter.h:161
ret
ret
Definition: filter_design.txt:187
atempo_options
static const AVOption atempo_options[]
Definition: af_atempo.c:163
ATempoContext::correlation_in
float * correlation_in
Definition: af_atempo.c:147
ATempoContext::nfrag
uint64_t nfrag
Definition: af_atempo.c:138
request_frame
static int request_frame(AVFilterLink *outlink)
Definition: af_atempo.c:1098
ATempoContext::format
enum AVSampleFormat format
Definition: af_atempo.c:111
AV_TX_FLOAT_RDFT
@ AV_TX_FLOAT_RDFT
Real to complex and complex to real DFTs.
Definition: tx.h:90
ATempoContext::real_to_complex
AVTXContext * real_to_complex
Definition: af_atempo.c:144
channel_layout.h
yae_adjust_position
static int yae_adjust_position(ATempoContext *atempo)
Adjust current fragment position for better alignment with previous fragment.
Definition: af_atempo.c:689
avfilter.h
FilterState
FilterState
Filter state machine states.
Definition: af_atempo.c:77
ATempoContext::correlation
float * correlation
Definition: af_atempo.c:148
correlation
static void correlation(int32_t *corr, int32_t *ener, const int16_t *buffer, int16_t lag, int16_t blen, int16_t srange, int16_t scale)
Definition: ilbcdec.c:917
AVFilterContext
An instance of a filter.
Definition: avfilter.h:392
audio.h
ib
#define ib(width, name)
Definition: cbs_h2645.c:271
yae_align
static int yae_align(AudioFragment *frag, const AudioFragment *prev, const int window, const int delta_max, const int drift, float *correlation_in, float *correlation, AVTXContext *complex_to_real, av_tx_fn c2r_fn)
Calculate alignment offset for given fragment relative to the previous fragment.
Definition: af_atempo.c:631
ATempoContext::start_pts
int64_t start_pts
Definition: af_atempo.c:108
yae_release_buffers
static void yae_release_buffers(ATempoContext *atempo)
Reset filter to initial state and deallocate all buffers.
Definition: af_atempo.c:228
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:195
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_atempo.c:992
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
h
h
Definition: vp9dsp_template.c:2038
AV_SAMPLE_FMT_DBL
@ AV_SAMPLE_FMT_DBL
double
Definition: samplefmt.h:61
avstring.h
int
int
Definition: ffmpeg_filter.c:156
AV_SAMPLE_FMT_S32
@ AV_SAMPLE_FMT_S32
signed 32 bits
Definition: samplefmt.h:59
AudioFragment
A fragment of audio waveform.
Definition: af_atempo.c:56
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:60
YAE_LOAD_FRAGMENT
@ YAE_LOAD_FRAGMENT
Definition: af_atempo.c:78
AudioFragment::xdat
float * xdat
Definition: af_atempo.c:71
tx.h