Go to the documentation of this file.
103 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
104 #define AF AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
133 new_sum -= *
s->window_current;
136 return new_sum /
s->window_size;
141 s->sum -= *
s->window_current;
143 s->sum += *
s->window_current;
146 if (
s->window_current >=
s->window_end)
147 s->window_current =
s->window;
155 new_sum -= *
s->window_current;
158 return sqrt(new_sum /
s->window_size);
163 s->sum -= *
s->window_current;
165 s->sum += *
s->window_current;
168 if (
s->window_current >=
s->window_end)
169 s->window_current =
s->window;
176 if (
s->stop_periods < 0) {
177 s->stop_periods = -
s->stop_periods;
181 switch (
s->detection) {
197 memset(
s->window, 0,
s->window_size *
sizeof(*
s->window));
199 s->window_current =
s->window;
200 s->window_end =
s->window +
s->window_size;
227 sizeof(*
s->start_holdoff) *
229 if (!
s->start_holdoff)
233 sizeof(*
s->start_silence_hold) *
235 if (!
s->start_silence_hold)
238 s->start_holdoff_offset = 0;
239 s->start_holdoff_end = 0;
240 s->start_found_periods = 0;
243 sizeof(*
s->stop_holdoff) *
245 if (!
s->stop_holdoff)
249 sizeof(*
s->stop_silence_hold) *
251 if (!
s->stop_silence_hold)
254 s->stop_holdoff_offset = 0;
255 s->stop_holdoff_end = 0;
256 s->stop_found_periods = 0;
258 if (
s->start_periods)
268 int *nb_samples_written,
int *
ret,
int flush_silence)
272 if (*nb_samples_written) {
273 out->nb_samples = *nb_samples_written / outlink->
channels;
275 out->pts =
s->next_pts;
283 *nb_samples_written = 0;
288 if (
s->stop_silence_end <= 0 || !flush_silence)
297 if (
s->stop_silence_offset <
s->stop_silence_end) {
298 memcpy(silence->
data[0],
299 &
s->stop_silence_hold[
s->stop_silence_offset],
300 (
s->stop_silence_end -
s->stop_silence_offset) *
sizeof(
double));
303 if (
s->stop_silence_offset > 0) {
304 memcpy(silence->
data[0] + (
s->stop_silence_end -
s->stop_silence_offset) *
sizeof(
double),
305 &
s->stop_silence_hold[0],
306 s->stop_silence_offset *
sizeof(
double));
309 s->stop_silence_offset = 0;
310 s->stop_silence_end = 0;
312 silence->
pts =
s->next_pts;
325 int i, j, threshold,
ret = 0;
326 int nbs, nb_samples_read, nb_samples_written;
327 double *obuf, *ibuf = (
double *)
in->data[0];
330 nb_samples_read = nb_samples_written = 0;
333 s->next_pts =
in->pts;
338 nbs =
in->nb_samples - nb_samples_read / outlink->
channels;
342 for (
i = 0;
i < nbs;
i++) {
343 if (
s->start_mode ==
T_ANY) {
345 for (j = 0; j < outlink->
channels; j++) {
346 threshold |=
s->compute(
s, ibuf[j]) >
s->start_threshold;
350 for (j = 0; j < outlink->
channels; j++) {
351 threshold &=
s->compute(
s, ibuf[j]) >
s->start_threshold;
356 for (j = 0; j < outlink->
channels; j++) {
358 s->start_holdoff[
s->start_holdoff_end++] = *ibuf++;
360 nb_samples_read += outlink->
channels;
362 if (
s->start_holdoff_end >=
s->start_duration * outlink->
channels) {
363 if (++
s->start_found_periods >=
s->start_periods) {
365 goto silence_trim_flush;
368 s->start_holdoff_offset = 0;
369 s->start_holdoff_end = 0;
370 s->start_silence_offset = 0;
371 s->start_silence_end = 0;
374 s->start_holdoff_end = 0;
376 for (j = 0; j < outlink->
channels; j++) {
377 s->update(
s, ibuf[j]);
378 if (
s->start_silence) {
379 s->start_silence_hold[
s->start_silence_offset++] = ibuf[j];
380 s->start_silence_end =
FFMIN(
s->start_silence_end + 1, outlink->
channels *
s->start_silence);
381 if (
s->start_silence_offset >= outlink->
channels *
s->start_silence) {
382 s->start_silence_offset = 0;
388 nb_samples_read += outlink->
channels;
395 nbs =
s->start_holdoff_end -
s->start_holdoff_offset;
406 if (
s->start_silence_end > 0) {
407 if (
s->start_silence_offset <
s->start_silence_end) {
409 &
s->start_silence_hold[
s->start_silence_offset],
410 (
s->start_silence_end -
s->start_silence_offset) *
sizeof(
double));
413 if (
s->start_silence_offset > 0) {
414 memcpy(
out->data[0] + (
s->start_silence_end -
s->start_silence_offset) *
sizeof(
double),
415 &
s->start_silence_hold[0],
416 s->start_silence_offset *
sizeof(
double));
420 memcpy(
out->data[0] +
s->start_silence_end *
sizeof(
double),
421 &
s->start_holdoff[
s->start_holdoff_offset],
422 nbs *
sizeof(
double));
424 out->pts =
s->next_pts;
429 s->start_holdoff_offset += nbs;
433 if (
s->start_holdoff_offset ==
s->start_holdoff_end) {
434 s->start_holdoff_offset = 0;
435 s->start_holdoff_end = 0;
436 s->start_silence_offset = 0;
437 s->start_silence_end = 0;
445 nbs =
in->nb_samples - nb_samples_read / outlink->
channels;
454 obuf = (
double *)
out->data[0];
456 if (
s->stop_periods) {
457 for (
i = 0;
i < nbs;
i++) {
458 if (
s->stop_mode ==
T_ANY) {
460 for (j = 0; j < outlink->
channels; j++) {
461 threshold |=
s->compute(
s, ibuf[j]) >
s->stop_threshold;
465 for (j = 0; j < outlink->
channels; j++) {
466 threshold &=
s->compute(
s, ibuf[j]) >
s->stop_threshold;
470 if (threshold &&
s->stop_holdoff_end && !
s->stop_silence) {
473 goto silence_copy_flush;
474 }
else if (threshold) {
475 for (j = 0; j < outlink->
channels; j++) {
479 nb_samples_read += outlink->
channels;
480 nb_samples_written += outlink->
channels;
481 }
else if (!threshold) {
482 for (j = 0; j < outlink->
channels; j++) {
484 if (
s->stop_silence) {
485 s->stop_silence_hold[
s->stop_silence_offset++] = *ibuf;
486 s->stop_silence_end =
FFMIN(
s->stop_silence_end + 1, outlink->
channels *
s->stop_silence);
487 if (
s->stop_silence_offset >= outlink->
channels *
s->stop_silence) {
488 s->stop_silence_offset = 0;
492 s->stop_holdoff[
s->stop_holdoff_end++] = *ibuf++;
494 nb_samples_read += outlink->
channels;
496 if (
s->stop_holdoff_end >=
s->stop_duration * outlink->
channels) {
497 if (++
s->stop_found_periods >=
s->stop_periods) {
498 s->stop_holdoff_offset = 0;
499 s->stop_holdoff_end = 0;
506 s->stop_found_periods = 0;
507 s->start_found_periods = 0;
508 s->start_holdoff_offset = 0;
509 s->start_holdoff_end = 0;
510 s->start_silence_offset = 0;
511 s->start_silence_end = 0;
520 goto silence_copy_flush;
526 memcpy(obuf, ibuf,
sizeof(
double) * nbs * outlink->
channels);
528 out->pts =
s->next_pts;
539 nbs =
s->stop_holdoff_end -
s->stop_holdoff_offset;
550 memcpy(
out->data[0], &
s->stop_holdoff[
s->stop_holdoff_offset],
551 nbs *
sizeof(
double));
552 s->stop_holdoff_offset += nbs;
554 out->pts =
s->next_pts;
561 if (
s->stop_holdoff_offset ==
s->stop_holdoff_end) {
562 s->stop_holdoff_offset = 0;
563 s->stop_holdoff_end = 0;
564 s->stop_silence_offset = 0;
565 s->stop_silence_end = 0;
589 int nbs =
s->stop_holdoff_end -
s->stop_holdoff_offset;
597 memcpy(
frame->data[0], &
s->stop_holdoff[
s->stop_holdoff_offset],
598 nbs *
sizeof(
double));
672 .
name =
"silenceremove",
675 .priv_class = &silenceremove_class,
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
A list of supported channel layouts.
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
static double compute_rms(SilenceRemoveContext *s, double sample)
size_t stop_silence_offset
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
static enum AVSampleFormat sample_fmts[]
enum MovChannelLayoutTag * layouts
#define AVERROR_EOF
End of file.
static av_cold int init(AVFilterContext *ctx)
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
size_t start_holdoff_offset
This structure describes decoded (raw) audio or video data.
static int request_frame(AVFilterLink *outlink)
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
static const AVFilterPad silenceremove_inputs[]
int64_t start_duration_opt
const char * name
Filter name.
static double compute_peak(SilenceRemoveContext *s, double sample)
int64_t start_silence_opt
A link between two filters.
AVFILTER_DEFINE_CLASS(silenceremove)
int channels
Number of channels.
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
static void update_peak(SilenceRemoveContext *s, double sample)
double(* compute)(struct SilenceRemoveContext *s, double sample)
A filter pad used for either input or output.
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
static const AVFilterPad outputs[]
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
static int query_formats(AVFilterContext *ctx)
Describe the class of an AVClass context structure.
static __device__ float fabs(float a)
Rational number (pair of numerator and denominator).
size_t start_silence_offset
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
static const AVOption silenceremove_options[]
static av_cold void uninit(AVFilterContext *ctx)
static void update_rms(SilenceRemoveContext *s, double sample)
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
static int config_input(AVFilterLink *inlink)
#define AV_NOPTS_VALUE
Undefined timestamp value.
size_t stop_holdoff_offset
AVFilterContext * src
source filter
static void clear_window(SilenceRemoveContext *s)
static void flush(SilenceRemoveContext *s, AVFrame *out, AVFilterLink *outlink, int *nb_samples_written, int *ret, int flush_silence)
double * start_silence_hold
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
int nb_samples
number of audio samples (per channel) described by this frame
#define AV_TIME_BASE
Internal time base represented as integer.
#define av_malloc_array(a, b)
AVSampleFormat
Audio sample formats.
AVFilter ff_af_silenceremove
const char * name
Pad name.
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
Rescale a 64-bit integer with rounding to nearest.
double * stop_silence_hold
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
void(* update)(struct SilenceRemoveContext *s, double sample)
AVRational time_base
Define the time base used by the PTS of the frames/samples which will pass through this link.
int64_t stop_duration_opt
static const AVFilterPad silenceremove_outputs[]
@ AV_SAMPLE_FMT_DBL
double