FFmpeg: libavfilter/af_amix.c Source File

00001 /*
00002  * Audio Mix Filter
00003  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00031 #include "libavutil/audioconvert.h"
00032 #include "libavutil/audio_fifo.h"
00033 #include "libavutil/avassert.h"
00034 #include "libavutil/avstring.h"
00035 #include "libavutil/common.h"
00036 #include "libavutil/float_dsp.h"
00037 #include "libavutil/mathematics.h"
00038 #include "libavutil/opt.h"
00039 #include "libavutil/samplefmt.h"
00040 
00041 #include "audio.h"
00042 #include "avfilter.h"
00043 #include "formats.h"
00044 #include "internal.h"
00045 
00046 #define INPUT_OFF      0    
00047 #define INPUT_ON       1    
00048 #define INPUT_INACTIVE 2    
00050 #define DURATION_LONGEST  0
00051 #define DURATION_SHORTEST 1
00052 #define DURATION_FIRST    2
00053 
00054 
00055 typedef struct FrameInfo {
00056     int nb_samples;
00057     int64_t pts;
00058     struct FrameInfo *next;
00059 } FrameInfo;
00060 
00069 typedef struct FrameList {
00070     int nb_frames;
00071     int nb_samples;
00072     FrameInfo *list;
00073     FrameInfo *end;
00074 } FrameList;
00075 
00076 static void frame_list_clear(FrameList *frame_list)
00077 {
00078     if (frame_list) {
00079         while (frame_list->list) {
00080             FrameInfo *info = frame_list->list;
00081             frame_list->list = info->next;
00082             av_free(info);
00083         }
00084         frame_list->nb_frames  = 0;
00085         frame_list->nb_samples = 0;
00086         frame_list->end        = NULL;
00087     }
00088 }
00089 
00090 static int frame_list_next_frame_size(FrameList *frame_list)
00091 {
00092     if (!frame_list->list)
00093         return 0;
00094     return frame_list->list->nb_samples;
00095 }
00096 
00097 static int64_t frame_list_next_pts(FrameList *frame_list)
00098 {
00099     if (!frame_list->list)
00100         return AV_NOPTS_VALUE;
00101     return frame_list->list->pts;
00102 }
00103 
00104 static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
00105 {
00106     if (nb_samples >= frame_list->nb_samples) {
00107         frame_list_clear(frame_list);
00108     } else {
00109         int samples = nb_samples;
00110         while (samples > 0) {
00111             FrameInfo *info = frame_list->list;
00112             av_assert0(info != NULL);
00113             if (info->nb_samples <= samples) {
00114                 samples -= info->nb_samples;
00115                 frame_list->list = info->next;
00116                 if (!frame_list->list)
00117                     frame_list->end = NULL;
00118                 frame_list->nb_frames--;
00119                 frame_list->nb_samples -= info->nb_samples;
00120                 av_free(info);
00121             } else {
00122                 info->nb_samples       -= samples;
00123                 info->pts              += samples;
00124                 frame_list->nb_samples -= samples;
00125                 samples = 0;
00126             }
00127         }
00128     }
00129 }
00130 
00131 static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
00132 {
00133     FrameInfo *info = av_malloc(sizeof(*info));
00134     if (!info)
00135         return AVERROR(ENOMEM);
00136     info->nb_samples = nb_samples;
00137     info->pts        = pts;
00138     info->next       = NULL;
00139 
00140     if (!frame_list->list) {
00141         frame_list->list = info;
00142         frame_list->end  = info;
00143     } else {
00144         av_assert0(frame_list->end != NULL);
00145         frame_list->end->next = info;
00146         frame_list->end       = info;
00147     }
00148     frame_list->nb_frames++;
00149     frame_list->nb_samples += nb_samples;
00150 
00151     return 0;
00152 }
00153 
00154 
00155 typedef struct MixContext {
00156     const AVClass *class;       
00157     AVFloatDSPContext fdsp;
00158 
00159     int nb_inputs;              
00160     int active_inputs;          
00161     int duration_mode;          
00162     float dropout_transition;   
00164     int nb_channels;            
00165     int sample_rate;            
00166     int planar;
00167     AVAudioFifo **fifos;        
00168     uint8_t *input_state;       
00169     float *input_scale;         
00170     float scale_norm;           
00171     int64_t next_pts;           
00172     FrameList *frame_list;      
00173 } MixContext;
00174 
00175 #define OFFSET(x) offsetof(MixContext, x)
00176 #define A AV_OPT_FLAG_AUDIO_PARAM
00177 #define F AV_OPT_FLAG_FILTERING_PARAM
00178 static const AVOption amix_options[] = {
00179     { "inputs", "Number of inputs.",
00180             OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A|F },
00181     { "duration", "How to determine the end-of-stream.",
00182             OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0,  2, A|F, "duration" },
00183         { "longest",  "Duration of longest input.",  0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST  }, INT_MIN, INT_MAX, A|F, "duration" },
00184         { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A|F, "duration" },
00185         { "first",    "Duration of first input.",    0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST    }, INT_MIN, INT_MAX, A|F, "duration" },
00186     { "dropout_transition", "Transition time, in seconds, for volume "
00187                             "renormalization when an input stream ends.",
00188             OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A|F },
00189     { NULL },
00190 };
00191 
00192 AVFILTER_DEFINE_CLASS(amix);
00193 
00201 static void calculate_scales(MixContext *s, int nb_samples)
00202 {
00203     int i;
00204 
00205     if (s->scale_norm > s->active_inputs) {
00206         s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
00207         s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
00208     }
00209 
00210     for (i = 0; i < s->nb_inputs; i++) {
00211         if (s->input_state[i] == INPUT_ON)
00212             s->input_scale[i] = 1.0f / s->scale_norm;
00213         else
00214             s->input_scale[i] = 0.0f;
00215     }
00216 }
00217 
00218 static int config_output(AVFilterLink *outlink)
00219 {
00220     AVFilterContext *ctx = outlink->src;
00221     MixContext *s      = ctx->priv;
00222     int i;
00223     char buf[64];
00224 
00225     s->planar          = av_sample_fmt_is_planar(outlink->format);
00226     s->sample_rate     = outlink->sample_rate;
00227     outlink->time_base = (AVRational){ 1, outlink->sample_rate };
00228     s->next_pts        = AV_NOPTS_VALUE;
00229 
00230     s->frame_list = av_mallocz(sizeof(*s->frame_list));
00231     if (!s->frame_list)
00232         return AVERROR(ENOMEM);
00233 
00234     s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos));
00235     if (!s->fifos)
00236         return AVERROR(ENOMEM);
00237 
00238     s->nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout);
00239     for (i = 0; i < s->nb_inputs; i++) {
00240         s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
00241         if (!s->fifos[i])
00242             return AVERROR(ENOMEM);
00243     }
00244 
00245     s->input_state = av_malloc(s->nb_inputs);
00246     if (!s->input_state)
00247         return AVERROR(ENOMEM);
00248     memset(s->input_state, INPUT_ON, s->nb_inputs);
00249     s->active_inputs = s->nb_inputs;
00250 
00251     s->input_scale = av_mallocz(s->nb_inputs * sizeof(*s->input_scale));
00252     if (!s->input_scale)
00253         return AVERROR(ENOMEM);
00254     s->scale_norm = s->active_inputs;
00255     calculate_scales(s, 0);
00256 
00257     av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
00258 
00259     av_log(ctx, AV_LOG_VERBOSE,
00260            "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
00261            av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
00262 
00263     return 0;
00264 }
00265 
00269 static int output_frame(AVFilterLink *outlink, int nb_samples)
00270 {
00271     AVFilterContext *ctx = outlink->src;
00272     MixContext      *s = ctx->priv;
00273     AVFilterBufferRef *out_buf, *in_buf;
00274     int i;
00275 
00276     calculate_scales(s, nb_samples);
00277 
00278     out_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
00279     if (!out_buf)
00280         return AVERROR(ENOMEM);
00281 
00282     in_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
00283     if (!in_buf)
00284         return AVERROR(ENOMEM);
00285 
00286     for (i = 0; i < s->nb_inputs; i++) {
00287         if (s->input_state[i] == INPUT_ON) {
00288             int planes, plane_size, p;
00289 
00290             av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
00291                                nb_samples);
00292 
00293             planes     = s->planar ? s->nb_channels : 1;
00294             plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
00295             plane_size = FFALIGN(plane_size, 16);
00296 
00297             for (p = 0; p < planes; p++) {
00298                 s->fdsp.vector_fmac_scalar((float *)out_buf->extended_data[p],
00299                                            (float *) in_buf->extended_data[p],
00300                                            s->input_scale[i], plane_size);
00301             }
00302         }
00303     }
00304     avfilter_unref_buffer(in_buf);
00305 
00306     out_buf->pts = s->next_pts;
00307     if (s->next_pts != AV_NOPTS_VALUE)
00308         s->next_pts += nb_samples;
00309 
00310     return ff_filter_samples(outlink, out_buf);
00311 }
00312 
00317 static int get_available_samples(MixContext *s)
00318 {
00319     int i;
00320     int available_samples = INT_MAX;
00321 
00322     av_assert0(s->nb_inputs > 1);
00323 
00324     for (i = 1; i < s->nb_inputs; i++) {
00325         int nb_samples;
00326         if (s->input_state[i] == INPUT_OFF)
00327             continue;
00328         nb_samples = av_audio_fifo_size(s->fifos[i]);
00329         available_samples = FFMIN(available_samples, nb_samples);
00330     }
00331     if (available_samples == INT_MAX)
00332         return 0;
00333     return available_samples;
00334 }
00335 
00339 static int request_samples(AVFilterContext *ctx, int min_samples)
00340 {
00341     MixContext *s = ctx->priv;
00342     int i, ret;
00343 
00344     av_assert0(s->nb_inputs > 1);
00345 
00346     for (i = 1; i < s->nb_inputs; i++) {
00347         ret = 0;
00348         if (s->input_state[i] == INPUT_OFF)
00349             continue;
00350         while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
00351             ret = ff_request_frame(ctx->inputs[i]);
00352         if (ret == AVERROR_EOF) {
00353             if (av_audio_fifo_size(s->fifos[i]) == 0) {
00354                 s->input_state[i] = INPUT_OFF;
00355                 continue;
00356             }
00357         } else if (ret < 0)
00358             return ret;
00359     }
00360     return 0;
00361 }
00362 
00369 static int calc_active_inputs(MixContext *s)
00370 {
00371     int i;
00372     int active_inputs = 0;
00373     for (i = 0; i < s->nb_inputs; i++)
00374         active_inputs += !!(s->input_state[i] != INPUT_OFF);
00375     s->active_inputs = active_inputs;
00376 
00377     if (!active_inputs ||
00378         (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) ||
00379         (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
00380         return AVERROR_EOF;
00381     return 0;
00382 }
00383 
00384 static int request_frame(AVFilterLink *outlink)
00385 {
00386     AVFilterContext *ctx = outlink->src;
00387     MixContext      *s = ctx->priv;
00388     int ret;
00389     int wanted_samples, available_samples;
00390 
00391     ret = calc_active_inputs(s);
00392     if (ret < 0)
00393         return ret;
00394 
00395     if (s->input_state[0] == INPUT_OFF) {
00396         ret = request_samples(ctx, 1);
00397         if (ret < 0)
00398             return ret;
00399 
00400         ret = calc_active_inputs(s);
00401         if (ret < 0)
00402             return ret;
00403 
00404         available_samples = get_available_samples(s);
00405         if (!available_samples)
00406             return AVERROR(EAGAIN);
00407 
00408         return output_frame(outlink, available_samples);
00409     }
00410 
00411     if (s->frame_list->nb_frames == 0) {
00412         ret = ff_request_frame(ctx->inputs[0]);
00413         if (ret == AVERROR_EOF) {
00414             s->input_state[0] = INPUT_OFF;
00415             if (s->nb_inputs == 1)
00416                 return AVERROR_EOF;
00417             else
00418                 return AVERROR(EAGAIN);
00419         } else if (ret < 0)
00420             return ret;
00421     }
00422     av_assert0(s->frame_list->nb_frames > 0);
00423 
00424     wanted_samples = frame_list_next_frame_size(s->frame_list);
00425 
00426     if (s->active_inputs > 1) {
00427         ret = request_samples(ctx, wanted_samples);
00428         if (ret < 0)
00429             return ret;
00430 
00431         ret = calc_active_inputs(s);
00432         if (ret < 0)
00433             return ret;
00434     }
00435 
00436     if (s->active_inputs > 1) {
00437         available_samples = get_available_samples(s);
00438         if (!available_samples)
00439             return AVERROR(EAGAIN);
00440         available_samples = FFMIN(available_samples, wanted_samples);
00441     } else {
00442         available_samples = wanted_samples;
00443     }
00444 
00445     s->next_pts = frame_list_next_pts(s->frame_list);
00446     frame_list_remove_samples(s->frame_list, available_samples);
00447 
00448     return output_frame(outlink, available_samples);
00449 }
00450 
00451 static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *buf)
00452 {
00453     AVFilterContext  *ctx = inlink->dst;
00454     MixContext       *s = ctx->priv;
00455     AVFilterLink *outlink = ctx->outputs[0];
00456     int i, ret = 0;
00457 
00458     for (i = 0; i < ctx->nb_inputs; i++)
00459         if (ctx->inputs[i] == inlink)
00460             break;
00461     if (i >= ctx->nb_inputs) {
00462         av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
00463         ret = AVERROR(EINVAL);
00464         goto fail;
00465     }
00466 
00467     if (i == 0) {
00468         int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
00469                                    outlink->time_base);
00470         ret = frame_list_add_frame(s->frame_list, buf->audio->nb_samples, pts);
00471         if (ret < 0)
00472             goto fail;
00473     }
00474 
00475     ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
00476                               buf->audio->nb_samples);
00477 
00478 fail:
00479     avfilter_unref_buffer(buf);
00480 
00481     return ret;
00482 }
00483 
00484 static int init(AVFilterContext *ctx, const char *args)
00485 {
00486     MixContext *s = ctx->priv;
00487     int i, ret;
00488 
00489     s->class = &amix_class;
00490     av_opt_set_defaults(s);
00491 
00492     if ((ret = av_set_options_string(s, args, "=", ":")) < 0)
00493         return ret;
00494     av_opt_free(s);
00495 
00496     for (i = 0; i < s->nb_inputs; i++) {
00497         char name[32];
00498         AVFilterPad pad = { 0 };
00499 
00500         snprintf(name, sizeof(name), "input%d", i);
00501         pad.type           = AVMEDIA_TYPE_AUDIO;
00502         pad.name           = av_strdup(name);
00503         pad.filter_samples = filter_samples;
00504 
00505         ff_insert_inpad(ctx, i, &pad);
00506     }
00507 
00508     avpriv_float_dsp_init(&s->fdsp, 0);
00509 
00510     return 0;
00511 }
00512 
00513 static void uninit(AVFilterContext *ctx)
00514 {
00515     int i;
00516     MixContext *s = ctx->priv;
00517 
00518     if (s->fifos) {
00519         for (i = 0; i < s->nb_inputs; i++)
00520             av_audio_fifo_free(s->fifos[i]);
00521         av_freep(&s->fifos);
00522     }
00523     frame_list_clear(s->frame_list);
00524     av_freep(&s->frame_list);
00525     av_freep(&s->input_state);
00526     av_freep(&s->input_scale);
00527 
00528     for (i = 0; i < ctx->nb_inputs; i++)
00529         av_freep(&ctx->input_pads[i].name);
00530 }
00531 
00532 static int query_formats(AVFilterContext *ctx)
00533 {
00534     AVFilterFormats *formats = NULL;
00535     ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
00536     ff_add_format(&formats, AV_SAMPLE_FMT_FLTP);
00537     ff_set_common_formats(ctx, formats);
00538     ff_set_common_channel_layouts(ctx, ff_all_channel_layouts());
00539     ff_set_common_samplerates(ctx, ff_all_samplerates());
00540     return 0;
00541 }
00542 
00543 AVFilter avfilter_af_amix = {
00544     .name          = "amix",
00545     .description   = NULL_IF_CONFIG_SMALL("Audio mixing."),
00546     .priv_size     = sizeof(MixContext),
00547 
00548     .init           = init,
00549     .uninit         = uninit,
00550     .query_formats  = query_formats,
00551 
00552     .inputs    = NULL,
00553     .outputs   = (const AVFilterPad[]) {{ .name          = "default",
00554                                           .type          = AVMEDIA_TYPE_AUDIO,
00555                                           .config_props  = config_output,
00556                                           .request_frame = request_frame },
00557                                         { .name = NULL}},
00558     .priv_class = &amix_class,
00559 };