00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00067 #include <speex/speex.h>
00068 #include <speex/speex_header.h>
00069 #include <speex/speex_stereo.h>
00070 #include "libavutil/opt.h"
00071 #include "avcodec.h"
00072 #include "internal.h"
00073 #include "audio_frame_queue.h"
00074
00075 typedef struct {
00076 AVClass *class;
00077 SpeexBits bits;
00078 SpeexHeader header;
00079 void *enc_state;
00080 int frames_per_packet;
00081 float vbr_quality;
00082 int cbr_quality;
00083 int abr;
00084 int pkt_frame_count;
00085 AudioFrameQueue afq;
00086 } LibSpeexEncContext;
00087
00088 static av_cold void print_enc_params(AVCodecContext *avctx,
00089 LibSpeexEncContext *s)
00090 {
00091 const char *mode_str = "unknown";
00092
00093 av_log(avctx, AV_LOG_DEBUG, "channels: %d\n", avctx->channels);
00094 switch (s->header.mode) {
00095 case SPEEX_MODEID_NB: mode_str = "narrowband"; break;
00096 case SPEEX_MODEID_WB: mode_str = "wideband"; break;
00097 case SPEEX_MODEID_UWB: mode_str = "ultra-wideband"; break;
00098 }
00099 av_log(avctx, AV_LOG_DEBUG, "mode: %s\n", mode_str);
00100 if (s->header.vbr) {
00101 av_log(avctx, AV_LOG_DEBUG, "rate control: VBR\n");
00102 av_log(avctx, AV_LOG_DEBUG, " quality: %f\n", s->vbr_quality);
00103 } else if (s->abr) {
00104 av_log(avctx, AV_LOG_DEBUG, "rate control: ABR\n");
00105 av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
00106 } else {
00107 av_log(avctx, AV_LOG_DEBUG, "rate control: CBR\n");
00108 av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
00109 }
00110 av_log(avctx, AV_LOG_DEBUG, "complexity: %d\n",
00111 avctx->compression_level);
00112 av_log(avctx, AV_LOG_DEBUG, "frame size: %d samples\n",
00113 avctx->frame_size);
00114 av_log(avctx, AV_LOG_DEBUG, "frames per packet: %d\n",
00115 s->frames_per_packet);
00116 av_log(avctx, AV_LOG_DEBUG, "packet size: %d\n",
00117 avctx->frame_size * s->frames_per_packet);
00118 }
00119
00120 static av_cold int encode_init(AVCodecContext *avctx)
00121 {
00122 LibSpeexEncContext *s = avctx->priv_data;
00123 const SpeexMode *mode;
00124 uint8_t *header_data;
00125 int header_size;
00126 int32_t complexity;
00127
00128
00129 if (avctx->channels < 1 || avctx->channels > 2) {
00130 av_log(avctx, AV_LOG_ERROR, "Invalid channels (%d). Only stereo and "
00131 "mono are supported\n", avctx->channels);
00132 return AVERROR(EINVAL);
00133 }
00134
00135
00136 switch (avctx->sample_rate) {
00137 case 8000: mode = &speex_nb_mode; break;
00138 case 16000: mode = &speex_wb_mode; break;
00139 case 32000: mode = &speex_uwb_mode; break;
00140 default:
00141 av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
00142 "Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
00143 return AVERROR(EINVAL);
00144 }
00145
00146
00147 s->enc_state = speex_encoder_init(mode);
00148 if (!s->enc_state) {
00149 av_log(avctx, AV_LOG_ERROR, "Error initializing libspeex\n");
00150 return -1;
00151 }
00152 speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
00153
00154
00155 if (avctx->flags & CODEC_FLAG_QSCALE) {
00156
00157 s->header.vbr = 1;
00158 speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR, &s->header.vbr);
00159 s->vbr_quality = av_clipf(avctx->global_quality / (float)FF_QP2LAMBDA,
00160 0.0f, 10.0f);
00161 speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR_QUALITY, &s->vbr_quality);
00162 } else {
00163 s->header.bitrate = avctx->bit_rate;
00164 if (avctx->bit_rate > 0) {
00165
00166 if (s->abr) {
00167 speex_encoder_ctl(s->enc_state, SPEEX_SET_ABR,
00168 &s->header.bitrate);
00169 speex_encoder_ctl(s->enc_state, SPEEX_GET_ABR,
00170 &s->header.bitrate);
00171 } else {
00172 speex_encoder_ctl(s->enc_state, SPEEX_SET_BITRATE,
00173 &s->header.bitrate);
00174 speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
00175 &s->header.bitrate);
00176 }
00177 } else {
00178
00179 speex_encoder_ctl(s->enc_state, SPEEX_SET_QUALITY,
00180 &s->cbr_quality);
00181 speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
00182 &s->header.bitrate);
00183 }
00184
00185
00186 avctx->bit_rate = s->header.bitrate + (avctx->channels == 2 ? 800 : 0);
00187 }
00188
00189
00190 if (avctx->compression_level > FF_COMPRESSION_DEFAULT) {
00191 complexity = av_clip(avctx->compression_level, 0, 10);
00192 speex_encoder_ctl(s->enc_state, SPEEX_SET_COMPLEXITY, &complexity);
00193 }
00194 speex_encoder_ctl(s->enc_state, SPEEX_GET_COMPLEXITY, &complexity);
00195 avctx->compression_level = complexity;
00196
00197
00198 avctx->frame_size = s->header.frame_size;
00199 s->header.frames_per_packet = s->frames_per_packet;
00200
00201
00202 speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
00203 ff_af_queue_init(avctx, &s->afq);
00204
00205
00206
00207
00208 header_data = speex_header_to_packet(&s->header, &header_size);
00209
00210
00211 avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
00212 if (!avctx->extradata) {
00213 speex_header_free(header_data);
00214 speex_encoder_destroy(s->enc_state);
00215 av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
00216 return AVERROR(ENOMEM);
00217 }
00218 #if FF_API_OLD_ENCODE_AUDIO
00219 avctx->coded_frame = avcodec_alloc_frame();
00220 if (!avctx->coded_frame) {
00221 av_freep(&avctx->extradata);
00222 speex_header_free(header_data);
00223 speex_encoder_destroy(s->enc_state);
00224 av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
00225 return AVERROR(ENOMEM);
00226 }
00227 #endif
00228
00229
00230 memcpy(avctx->extradata, header_data, header_size);
00231 avctx->extradata_size = header_size;
00232 speex_header_free(header_data);
00233
00234
00235 speex_bits_init(&s->bits);
00236
00237 print_enc_params(avctx, s);
00238 return 0;
00239 }
00240
00241 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
00242 const AVFrame *frame, int *got_packet_ptr)
00243 {
00244 LibSpeexEncContext *s = avctx->priv_data;
00245 int16_t *samples = frame ? (int16_t *)frame->data[0] : NULL;
00246 int ret;
00247
00248 if (samples) {
00249
00250 if (avctx->channels == 2)
00251 speex_encode_stereo_int(samples, s->header.frame_size, &s->bits);
00252 speex_encode_int(s->enc_state, samples, &s->bits);
00253 s->pkt_frame_count++;
00254 if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
00255 return ret;
00256 } else {
00257
00258 if (!s->pkt_frame_count)
00259 return 0;
00260
00261 while (s->pkt_frame_count < s->frames_per_packet) {
00262 speex_bits_pack(&s->bits, 15, 5);
00263 s->pkt_frame_count++;
00264 }
00265 }
00266
00267
00268 if (s->pkt_frame_count == s->frames_per_packet) {
00269 s->pkt_frame_count = 0;
00270 if ((ret = ff_alloc_packet2(avctx, avpkt, speex_bits_nbytes(&s->bits))))
00271 return ret;
00272 ret = speex_bits_write(&s->bits, avpkt->data, avpkt->size);
00273 speex_bits_reset(&s->bits);
00274
00275
00276 ff_af_queue_remove(&s->afq, s->frames_per_packet * avctx->frame_size,
00277 &avpkt->pts, &avpkt->duration);
00278
00279 avpkt->size = ret;
00280 *got_packet_ptr = 1;
00281 return 0;
00282 }
00283 return 0;
00284 }
00285
00286 static av_cold int encode_close(AVCodecContext *avctx)
00287 {
00288 LibSpeexEncContext *s = avctx->priv_data;
00289
00290 speex_bits_destroy(&s->bits);
00291 speex_encoder_destroy(s->enc_state);
00292
00293 ff_af_queue_close(&s->afq);
00294 #if FF_API_OLD_ENCODE_AUDIO
00295 av_freep(&avctx->coded_frame);
00296 #endif
00297 av_freep(&avctx->extradata);
00298
00299 return 0;
00300 }
00301
00302 #define OFFSET(x) offsetof(LibSpeexEncContext, x)
00303 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
00304 static const AVOption options[] = {
00305 { "abr", "Use average bit rate", OFFSET(abr), AV_OPT_TYPE_INT, { 0 }, 0, 1, AE },
00306 { "cbr_quality", "Set quality value (0 to 10) for CBR", OFFSET(cbr_quality), AV_OPT_TYPE_INT, { 8 }, 0, 10, AE },
00307 { "frames_per_packet", "Number of frames to encode in each packet", OFFSET(frames_per_packet), AV_OPT_TYPE_INT, { 1 }, 1, 8, AE },
00308 { NULL },
00309 };
00310
00311 static const AVClass class = {
00312 .class_name = "libspeex",
00313 .item_name = av_default_item_name,
00314 .option = options,
00315 .version = LIBAVUTIL_VERSION_INT,
00316 };
00317
00318 static const AVCodecDefault defaults[] = {
00319 { "b", "0" },
00320 { "compression_level", "3" },
00321 { NULL },
00322 };
00323
00324 AVCodec ff_libspeex_encoder = {
00325 .name = "libspeex",
00326 .type = AVMEDIA_TYPE_AUDIO,
00327 .id = CODEC_ID_SPEEX,
00328 .priv_data_size = sizeof(LibSpeexEncContext),
00329 .init = encode_init,
00330 .encode2 = encode_frame,
00331 .close = encode_close,
00332 .capabilities = CODEC_CAP_DELAY,
00333 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
00334 AV_SAMPLE_FMT_NONE },
00335 .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
00336 .priv_class = &class,
00337 .defaults = defaults,
00338 };