00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00067 #include <speex/speex.h>
00068 #include <speex/speex_header.h>
00069 #include <speex/speex_stereo.h>
00070
00071 #include "libavutil/audioconvert.h"
00072 #include "libavutil/common.h"
00073 #include "libavutil/opt.h"
00074 #include "avcodec.h"
00075 #include "internal.h"
00076 #include "audio_frame_queue.h"
00077
00078 typedef struct {
00079 AVClass *class;
00080 SpeexBits bits;
00081 SpeexHeader header;
00082 void *enc_state;
00083 int frames_per_packet;
00084 float vbr_quality;
00085 int cbr_quality;
00086 int abr;
00087 int pkt_frame_count;
00088 AudioFrameQueue afq;
00089 } LibSpeexEncContext;
00090
00091 static av_cold void print_enc_params(AVCodecContext *avctx,
00092 LibSpeexEncContext *s)
00093 {
00094 const char *mode_str = "unknown";
00095
00096 av_log(avctx, AV_LOG_DEBUG, "channels: %d\n", avctx->channels);
00097 switch (s->header.mode) {
00098 case SPEEX_MODEID_NB: mode_str = "narrowband"; break;
00099 case SPEEX_MODEID_WB: mode_str = "wideband"; break;
00100 case SPEEX_MODEID_UWB: mode_str = "ultra-wideband"; break;
00101 }
00102 av_log(avctx, AV_LOG_DEBUG, "mode: %s\n", mode_str);
00103 if (s->header.vbr) {
00104 av_log(avctx, AV_LOG_DEBUG, "rate control: VBR\n");
00105 av_log(avctx, AV_LOG_DEBUG, " quality: %f\n", s->vbr_quality);
00106 } else if (s->abr) {
00107 av_log(avctx, AV_LOG_DEBUG, "rate control: ABR\n");
00108 av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
00109 } else {
00110 av_log(avctx, AV_LOG_DEBUG, "rate control: CBR\n");
00111 av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
00112 }
00113 av_log(avctx, AV_LOG_DEBUG, "complexity: %d\n",
00114 avctx->compression_level);
00115 av_log(avctx, AV_LOG_DEBUG, "frame size: %d samples\n",
00116 avctx->frame_size);
00117 av_log(avctx, AV_LOG_DEBUG, "frames per packet: %d\n",
00118 s->frames_per_packet);
00119 av_log(avctx, AV_LOG_DEBUG, "packet size: %d\n",
00120 avctx->frame_size * s->frames_per_packet);
00121 }
00122
00123 static av_cold int encode_init(AVCodecContext *avctx)
00124 {
00125 LibSpeexEncContext *s = avctx->priv_data;
00126 const SpeexMode *mode;
00127 uint8_t *header_data;
00128 int header_size;
00129 int32_t complexity;
00130
00131
00132 if (avctx->channels < 1 || avctx->channels > 2) {
00133 av_log(avctx, AV_LOG_ERROR, "Invalid channels (%d). Only stereo and "
00134 "mono are supported\n", avctx->channels);
00135 return AVERROR(EINVAL);
00136 }
00137
00138
00139 switch (avctx->sample_rate) {
00140 case 8000: mode = &speex_nb_mode; break;
00141 case 16000: mode = &speex_wb_mode; break;
00142 case 32000: mode = &speex_uwb_mode; break;
00143 default:
00144 av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
00145 "Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
00146 return AVERROR(EINVAL);
00147 }
00148
00149
00150 s->enc_state = speex_encoder_init(mode);
00151 if (!s->enc_state) {
00152 av_log(avctx, AV_LOG_ERROR, "Error initializing libspeex\n");
00153 return -1;
00154 }
00155 speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
00156
00157
00158 if (avctx->flags & CODEC_FLAG_QSCALE) {
00159
00160 s->header.vbr = 1;
00161 speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR, &s->header.vbr);
00162 s->vbr_quality = av_clipf(avctx->global_quality / (float)FF_QP2LAMBDA,
00163 0.0f, 10.0f);
00164 speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR_QUALITY, &s->vbr_quality);
00165 } else {
00166 s->header.bitrate = avctx->bit_rate;
00167 if (avctx->bit_rate > 0) {
00168
00169 if (s->abr) {
00170 speex_encoder_ctl(s->enc_state, SPEEX_SET_ABR,
00171 &s->header.bitrate);
00172 speex_encoder_ctl(s->enc_state, SPEEX_GET_ABR,
00173 &s->header.bitrate);
00174 } else {
00175 speex_encoder_ctl(s->enc_state, SPEEX_SET_BITRATE,
00176 &s->header.bitrate);
00177 speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
00178 &s->header.bitrate);
00179 }
00180 } else {
00181
00182 speex_encoder_ctl(s->enc_state, SPEEX_SET_QUALITY,
00183 &s->cbr_quality);
00184 speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
00185 &s->header.bitrate);
00186 }
00187
00188
00189 avctx->bit_rate = s->header.bitrate + (avctx->channels == 2 ? 800 : 0);
00190 }
00191
00192
00193 if (avctx->compression_level > FF_COMPRESSION_DEFAULT) {
00194 complexity = av_clip(avctx->compression_level, 0, 10);
00195 speex_encoder_ctl(s->enc_state, SPEEX_SET_COMPLEXITY, &complexity);
00196 }
00197 speex_encoder_ctl(s->enc_state, SPEEX_GET_COMPLEXITY, &complexity);
00198 avctx->compression_level = complexity;
00199
00200
00201 avctx->frame_size = s->header.frame_size;
00202 s->header.frames_per_packet = s->frames_per_packet;
00203
00204
00205 speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
00206 ff_af_queue_init(avctx, &s->afq);
00207
00208
00209
00210
00211 header_data = speex_header_to_packet(&s->header, &header_size);
00212
00213
00214 avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
00215 if (!avctx->extradata) {
00216 speex_header_free(header_data);
00217 speex_encoder_destroy(s->enc_state);
00218 av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
00219 return AVERROR(ENOMEM);
00220 }
00221 #if FF_API_OLD_ENCODE_AUDIO
00222 avctx->coded_frame = avcodec_alloc_frame();
00223 if (!avctx->coded_frame) {
00224 av_freep(&avctx->extradata);
00225 speex_header_free(header_data);
00226 speex_encoder_destroy(s->enc_state);
00227 av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
00228 return AVERROR(ENOMEM);
00229 }
00230 #endif
00231
00232
00233 memcpy(avctx->extradata, header_data, header_size);
00234 avctx->extradata_size = header_size;
00235 speex_header_free(header_data);
00236
00237
00238 speex_bits_init(&s->bits);
00239
00240 print_enc_params(avctx, s);
00241 return 0;
00242 }
00243
00244 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
00245 const AVFrame *frame, int *got_packet_ptr)
00246 {
00247 LibSpeexEncContext *s = avctx->priv_data;
00248 int16_t *samples = frame ? (int16_t *)frame->data[0] : NULL;
00249 int ret;
00250
00251 if (samples) {
00252
00253 if (avctx->channels == 2)
00254 speex_encode_stereo_int(samples, s->header.frame_size, &s->bits);
00255 speex_encode_int(s->enc_state, samples, &s->bits);
00256 s->pkt_frame_count++;
00257 if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
00258 return ret;
00259 } else {
00260
00261 if (!s->pkt_frame_count)
00262 return 0;
00263
00264 while (s->pkt_frame_count < s->frames_per_packet) {
00265 speex_bits_pack(&s->bits, 15, 5);
00266 s->pkt_frame_count++;
00267 }
00268 }
00269
00270
00271 if (s->pkt_frame_count == s->frames_per_packet) {
00272 s->pkt_frame_count = 0;
00273 if ((ret = ff_alloc_packet2(avctx, avpkt, speex_bits_nbytes(&s->bits))))
00274 return ret;
00275 ret = speex_bits_write(&s->bits, avpkt->data, avpkt->size);
00276 speex_bits_reset(&s->bits);
00277
00278
00279 ff_af_queue_remove(&s->afq, s->frames_per_packet * avctx->frame_size,
00280 &avpkt->pts, &avpkt->duration);
00281
00282 avpkt->size = ret;
00283 *got_packet_ptr = 1;
00284 return 0;
00285 }
00286 return 0;
00287 }
00288
00289 static av_cold int encode_close(AVCodecContext *avctx)
00290 {
00291 LibSpeexEncContext *s = avctx->priv_data;
00292
00293 speex_bits_destroy(&s->bits);
00294 speex_encoder_destroy(s->enc_state);
00295
00296 ff_af_queue_close(&s->afq);
00297 #if FF_API_OLD_ENCODE_AUDIO
00298 av_freep(&avctx->coded_frame);
00299 #endif
00300 av_freep(&avctx->extradata);
00301
00302 return 0;
00303 }
00304
00305 #define OFFSET(x) offsetof(LibSpeexEncContext, x)
00306 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
00307 static const AVOption options[] = {
00308 { "abr", "Use average bit rate", OFFSET(abr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
00309 { "cbr_quality", "Set quality value (0 to 10) for CBR", OFFSET(cbr_quality), AV_OPT_TYPE_INT, { .i64 = 8 }, 0, 10, AE },
00310 { "frames_per_packet", "Number of frames to encode in each packet", OFFSET(frames_per_packet), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 8, AE },
00311 { NULL },
00312 };
00313
00314 static const AVClass class = {
00315 .class_name = "libspeex",
00316 .item_name = av_default_item_name,
00317 .option = options,
00318 .version = LIBAVUTIL_VERSION_INT,
00319 };
00320
00321 static const AVCodecDefault defaults[] = {
00322 { "b", "0" },
00323 { "compression_level", "3" },
00324 { NULL },
00325 };
00326
00327 AVCodec ff_libspeex_encoder = {
00328 .name = "libspeex",
00329 .type = AVMEDIA_TYPE_AUDIO,
00330 .id = AV_CODEC_ID_SPEEX,
00331 .priv_data_size = sizeof(LibSpeexEncContext),
00332 .init = encode_init,
00333 .encode2 = encode_frame,
00334 .close = encode_close,
00335 .capabilities = CODEC_CAP_DELAY,
00336 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
00337 AV_SAMPLE_FMT_NONE },
00338 .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
00339 AV_CH_LAYOUT_STEREO,
00340 0 },
00341 .supported_samplerates = (const int[]){ 8000, 16000, 32000, 0 },
00342 .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
00343 .priv_class = &class,
00344 .defaults = defaults,
00345 };