00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00031 #include "libavutil/audioconvert.h"
00032 #include "libavutil/audio_fifo.h"
00033 #include "libavutil/avassert.h"
00034 #include "libavutil/avstring.h"
00035 #include "libavutil/common.h"
00036 #include "libavutil/float_dsp.h"
00037 #include "libavutil/mathematics.h"
00038 #include "libavutil/opt.h"
00039 #include "libavutil/samplefmt.h"
00040
00041 #include "audio.h"
00042 #include "avfilter.h"
00043 #include "formats.h"
00044 #include "internal.h"
00045
00046 #define INPUT_OFF 0
00047 #define INPUT_ON 1
00048 #define INPUT_INACTIVE 2
00050 #define DURATION_LONGEST 0
00051 #define DURATION_SHORTEST 1
00052 #define DURATION_FIRST 2
00053
00054
00055 typedef struct FrameInfo {
00056 int nb_samples;
00057 int64_t pts;
00058 struct FrameInfo *next;
00059 } FrameInfo;
00060
00069 typedef struct FrameList {
00070 int nb_frames;
00071 int nb_samples;
00072 FrameInfo *list;
00073 FrameInfo *end;
00074 } FrameList;
00075
00076 static void frame_list_clear(FrameList *frame_list)
00077 {
00078 if (frame_list) {
00079 while (frame_list->list) {
00080 FrameInfo *info = frame_list->list;
00081 frame_list->list = info->next;
00082 av_free(info);
00083 }
00084 frame_list->nb_frames = 0;
00085 frame_list->nb_samples = 0;
00086 frame_list->end = NULL;
00087 }
00088 }
00089
00090 static int frame_list_next_frame_size(FrameList *frame_list)
00091 {
00092 if (!frame_list->list)
00093 return 0;
00094 return frame_list->list->nb_samples;
00095 }
00096
00097 static int64_t frame_list_next_pts(FrameList *frame_list)
00098 {
00099 if (!frame_list->list)
00100 return AV_NOPTS_VALUE;
00101 return frame_list->list->pts;
00102 }
00103
00104 static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
00105 {
00106 if (nb_samples >= frame_list->nb_samples) {
00107 frame_list_clear(frame_list);
00108 } else {
00109 int samples = nb_samples;
00110 while (samples > 0) {
00111 FrameInfo *info = frame_list->list;
00112 av_assert0(info != NULL);
00113 if (info->nb_samples <= samples) {
00114 samples -= info->nb_samples;
00115 frame_list->list = info->next;
00116 if (!frame_list->list)
00117 frame_list->end = NULL;
00118 frame_list->nb_frames--;
00119 frame_list->nb_samples -= info->nb_samples;
00120 av_free(info);
00121 } else {
00122 info->nb_samples -= samples;
00123 info->pts += samples;
00124 frame_list->nb_samples -= samples;
00125 samples = 0;
00126 }
00127 }
00128 }
00129 }
00130
00131 static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
00132 {
00133 FrameInfo *info = av_malloc(sizeof(*info));
00134 if (!info)
00135 return AVERROR(ENOMEM);
00136 info->nb_samples = nb_samples;
00137 info->pts = pts;
00138 info->next = NULL;
00139
00140 if (!frame_list->list) {
00141 frame_list->list = info;
00142 frame_list->end = info;
00143 } else {
00144 av_assert0(frame_list->end != NULL);
00145 frame_list->end->next = info;
00146 frame_list->end = info;
00147 }
00148 frame_list->nb_frames++;
00149 frame_list->nb_samples += nb_samples;
00150
00151 return 0;
00152 }
00153
00154
00155 typedef struct MixContext {
00156 const AVClass *class;
00157 AVFloatDSPContext fdsp;
00158
00159 int nb_inputs;
00160 int active_inputs;
00161 int duration_mode;
00162 float dropout_transition;
00164 int nb_channels;
00165 int sample_rate;
00166 int planar;
00167 AVAudioFifo **fifos;
00168 uint8_t *input_state;
00169 float *input_scale;
00170 float scale_norm;
00171 int64_t next_pts;
00172 FrameList *frame_list;
00173 } MixContext;
00174
00175 #define OFFSET(x) offsetof(MixContext, x)
00176 #define A AV_OPT_FLAG_AUDIO_PARAM
00177 #define F AV_OPT_FLAG_FILTERING_PARAM
00178 static const AVOption amix_options[] = {
00179 { "inputs", "Number of inputs.",
00180 OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A|F },
00181 { "duration", "How to determine the end-of-stream.",
00182 OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0, 2, A|F, "duration" },
00183 { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST }, INT_MIN, INT_MAX, A|F, "duration" },
00184 { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A|F, "duration" },
00185 { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST }, INT_MIN, INT_MAX, A|F, "duration" },
00186 { "dropout_transition", "Transition time, in seconds, for volume "
00187 "renormalization when an input stream ends.",
00188 OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A|F },
00189 { NULL },
00190 };
00191
00192 AVFILTER_DEFINE_CLASS(amix);
00193
00201 static void calculate_scales(MixContext *s, int nb_samples)
00202 {
00203 int i;
00204
00205 if (s->scale_norm > s->active_inputs) {
00206 s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
00207 s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
00208 }
00209
00210 for (i = 0; i < s->nb_inputs; i++) {
00211 if (s->input_state[i] == INPUT_ON)
00212 s->input_scale[i] = 1.0f / s->scale_norm;
00213 else
00214 s->input_scale[i] = 0.0f;
00215 }
00216 }
00217
00218 static int config_output(AVFilterLink *outlink)
00219 {
00220 AVFilterContext *ctx = outlink->src;
00221 MixContext *s = ctx->priv;
00222 int i;
00223 char buf[64];
00224
00225 s->planar = av_sample_fmt_is_planar(outlink->format);
00226 s->sample_rate = outlink->sample_rate;
00227 outlink->time_base = (AVRational){ 1, outlink->sample_rate };
00228 s->next_pts = AV_NOPTS_VALUE;
00229
00230 s->frame_list = av_mallocz(sizeof(*s->frame_list));
00231 if (!s->frame_list)
00232 return AVERROR(ENOMEM);
00233
00234 s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos));
00235 if (!s->fifos)
00236 return AVERROR(ENOMEM);
00237
00238 s->nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout);
00239 for (i = 0; i < s->nb_inputs; i++) {
00240 s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
00241 if (!s->fifos[i])
00242 return AVERROR(ENOMEM);
00243 }
00244
00245 s->input_state = av_malloc(s->nb_inputs);
00246 if (!s->input_state)
00247 return AVERROR(ENOMEM);
00248 memset(s->input_state, INPUT_ON, s->nb_inputs);
00249 s->active_inputs = s->nb_inputs;
00250
00251 s->input_scale = av_mallocz(s->nb_inputs * sizeof(*s->input_scale));
00252 if (!s->input_scale)
00253 return AVERROR(ENOMEM);
00254 s->scale_norm = s->active_inputs;
00255 calculate_scales(s, 0);
00256
00257 av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
00258
00259 av_log(ctx, AV_LOG_VERBOSE,
00260 "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
00261 av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
00262
00263 return 0;
00264 }
00265
00269 static int output_frame(AVFilterLink *outlink, int nb_samples)
00270 {
00271 AVFilterContext *ctx = outlink->src;
00272 MixContext *s = ctx->priv;
00273 AVFilterBufferRef *out_buf, *in_buf;
00274 int i;
00275
00276 calculate_scales(s, nb_samples);
00277
00278 out_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
00279 if (!out_buf)
00280 return AVERROR(ENOMEM);
00281
00282 in_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
00283 if (!in_buf)
00284 return AVERROR(ENOMEM);
00285
00286 for (i = 0; i < s->nb_inputs; i++) {
00287 if (s->input_state[i] == INPUT_ON) {
00288 int planes, plane_size, p;
00289
00290 av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
00291 nb_samples);
00292
00293 planes = s->planar ? s->nb_channels : 1;
00294 plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
00295 plane_size = FFALIGN(plane_size, 16);
00296
00297 for (p = 0; p < planes; p++) {
00298 s->fdsp.vector_fmac_scalar((float *)out_buf->extended_data[p],
00299 (float *) in_buf->extended_data[p],
00300 s->input_scale[i], plane_size);
00301 }
00302 }
00303 }
00304 avfilter_unref_buffer(in_buf);
00305
00306 out_buf->pts = s->next_pts;
00307 if (s->next_pts != AV_NOPTS_VALUE)
00308 s->next_pts += nb_samples;
00309
00310 return ff_filter_samples(outlink, out_buf);
00311 }
00312
00317 static int get_available_samples(MixContext *s)
00318 {
00319 int i;
00320 int available_samples = INT_MAX;
00321
00322 av_assert0(s->nb_inputs > 1);
00323
00324 for (i = 1; i < s->nb_inputs; i++) {
00325 int nb_samples;
00326 if (s->input_state[i] == INPUT_OFF)
00327 continue;
00328 nb_samples = av_audio_fifo_size(s->fifos[i]);
00329 available_samples = FFMIN(available_samples, nb_samples);
00330 }
00331 if (available_samples == INT_MAX)
00332 return 0;
00333 return available_samples;
00334 }
00335
00339 static int request_samples(AVFilterContext *ctx, int min_samples)
00340 {
00341 MixContext *s = ctx->priv;
00342 int i, ret;
00343
00344 av_assert0(s->nb_inputs > 1);
00345
00346 for (i = 1; i < s->nb_inputs; i++) {
00347 ret = 0;
00348 if (s->input_state[i] == INPUT_OFF)
00349 continue;
00350 while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
00351 ret = ff_request_frame(ctx->inputs[i]);
00352 if (ret == AVERROR_EOF) {
00353 if (av_audio_fifo_size(s->fifos[i]) == 0) {
00354 s->input_state[i] = INPUT_OFF;
00355 continue;
00356 }
00357 } else if (ret < 0)
00358 return ret;
00359 }
00360 return 0;
00361 }
00362
00369 static int calc_active_inputs(MixContext *s)
00370 {
00371 int i;
00372 int active_inputs = 0;
00373 for (i = 0; i < s->nb_inputs; i++)
00374 active_inputs += !!(s->input_state[i] != INPUT_OFF);
00375 s->active_inputs = active_inputs;
00376
00377 if (!active_inputs ||
00378 (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) ||
00379 (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
00380 return AVERROR_EOF;
00381 return 0;
00382 }
00383
00384 static int request_frame(AVFilterLink *outlink)
00385 {
00386 AVFilterContext *ctx = outlink->src;
00387 MixContext *s = ctx->priv;
00388 int ret;
00389 int wanted_samples, available_samples;
00390
00391 ret = calc_active_inputs(s);
00392 if (ret < 0)
00393 return ret;
00394
00395 if (s->input_state[0] == INPUT_OFF) {
00396 ret = request_samples(ctx, 1);
00397 if (ret < 0)
00398 return ret;
00399
00400 ret = calc_active_inputs(s);
00401 if (ret < 0)
00402 return ret;
00403
00404 available_samples = get_available_samples(s);
00405 if (!available_samples)
00406 return AVERROR(EAGAIN);
00407
00408 return output_frame(outlink, available_samples);
00409 }
00410
00411 if (s->frame_list->nb_frames == 0) {
00412 ret = ff_request_frame(ctx->inputs[0]);
00413 if (ret == AVERROR_EOF) {
00414 s->input_state[0] = INPUT_OFF;
00415 if (s->nb_inputs == 1)
00416 return AVERROR_EOF;
00417 else
00418 return AVERROR(EAGAIN);
00419 } else if (ret < 0)
00420 return ret;
00421 }
00422 av_assert0(s->frame_list->nb_frames > 0);
00423
00424 wanted_samples = frame_list_next_frame_size(s->frame_list);
00425
00426 if (s->active_inputs > 1) {
00427 ret = request_samples(ctx, wanted_samples);
00428 if (ret < 0)
00429 return ret;
00430
00431 ret = calc_active_inputs(s);
00432 if (ret < 0)
00433 return ret;
00434 }
00435
00436 if (s->active_inputs > 1) {
00437 available_samples = get_available_samples(s);
00438 if (!available_samples)
00439 return AVERROR(EAGAIN);
00440 available_samples = FFMIN(available_samples, wanted_samples);
00441 } else {
00442 available_samples = wanted_samples;
00443 }
00444
00445 s->next_pts = frame_list_next_pts(s->frame_list);
00446 frame_list_remove_samples(s->frame_list, available_samples);
00447
00448 return output_frame(outlink, available_samples);
00449 }
00450
00451 static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *buf)
00452 {
00453 AVFilterContext *ctx = inlink->dst;
00454 MixContext *s = ctx->priv;
00455 AVFilterLink *outlink = ctx->outputs[0];
00456 int i, ret = 0;
00457
00458 for (i = 0; i < ctx->nb_inputs; i++)
00459 if (ctx->inputs[i] == inlink)
00460 break;
00461 if (i >= ctx->nb_inputs) {
00462 av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
00463 ret = AVERROR(EINVAL);
00464 goto fail;
00465 }
00466
00467 if (i == 0) {
00468 int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
00469 outlink->time_base);
00470 ret = frame_list_add_frame(s->frame_list, buf->audio->nb_samples, pts);
00471 if (ret < 0)
00472 goto fail;
00473 }
00474
00475 ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
00476 buf->audio->nb_samples);
00477
00478 fail:
00479 avfilter_unref_buffer(buf);
00480
00481 return ret;
00482 }
00483
00484 static int init(AVFilterContext *ctx, const char *args)
00485 {
00486 MixContext *s = ctx->priv;
00487 int i, ret;
00488
00489 s->class = &amix_class;
00490 av_opt_set_defaults(s);
00491
00492 if ((ret = av_set_options_string(s, args, "=", ":")) < 0)
00493 return ret;
00494 av_opt_free(s);
00495
00496 for (i = 0; i < s->nb_inputs; i++) {
00497 char name[32];
00498 AVFilterPad pad = { 0 };
00499
00500 snprintf(name, sizeof(name), "input%d", i);
00501 pad.type = AVMEDIA_TYPE_AUDIO;
00502 pad.name = av_strdup(name);
00503 pad.filter_samples = filter_samples;
00504
00505 ff_insert_inpad(ctx, i, &pad);
00506 }
00507
00508 avpriv_float_dsp_init(&s->fdsp, 0);
00509
00510 return 0;
00511 }
00512
00513 static void uninit(AVFilterContext *ctx)
00514 {
00515 int i;
00516 MixContext *s = ctx->priv;
00517
00518 if (s->fifos) {
00519 for (i = 0; i < s->nb_inputs; i++)
00520 av_audio_fifo_free(s->fifos[i]);
00521 av_freep(&s->fifos);
00522 }
00523 frame_list_clear(s->frame_list);
00524 av_freep(&s->frame_list);
00525 av_freep(&s->input_state);
00526 av_freep(&s->input_scale);
00527
00528 for (i = 0; i < ctx->nb_inputs; i++)
00529 av_freep(&ctx->input_pads[i].name);
00530 }
00531
00532 static int query_formats(AVFilterContext *ctx)
00533 {
00534 AVFilterFormats *formats = NULL;
00535 ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
00536 ff_add_format(&formats, AV_SAMPLE_FMT_FLTP);
00537 ff_set_common_formats(ctx, formats);
00538 ff_set_common_channel_layouts(ctx, ff_all_channel_layouts());
00539 ff_set_common_samplerates(ctx, ff_all_samplerates());
00540 return 0;
00541 }
00542
00543 AVFilter avfilter_af_amix = {
00544 .name = "amix",
00545 .description = NULL_IF_CONFIG_SMALL("Audio mixing."),
00546 .priv_size = sizeof(MixContext),
00547
00548 .init = init,
00549 .uninit = uninit,
00550 .query_formats = query_formats,
00551
00552 .inputs = NULL,
00553 .outputs = (const AVFilterPad[]) {{ .name = "default",
00554 .type = AVMEDIA_TYPE_AUDIO,
00555 .config_props = config_output,
00556 .request_frame = request_frame },
00557 { .name = NULL}},
00558 .priv_class = &amix_class,
00559 };