00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "libavutil/x86_cpu.h"
00023 #include "dsputil_mmx.h"
00024 #include "libavcodec/ac3dsp.h"
00025
00026 extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
00027 extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
00028 extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
00029
00030 extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len);
00031 extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
00032 extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
00033 extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len);
00034
00035 extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
00036 extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
00037
00038 extern void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
00039 extern void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
00040
00041 extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len);
00042 extern void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len);
00043 extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
00044
00045 extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
00046
00047 extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
00048 extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
00049 extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
00050
00051 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
00052 {
00053 #if HAVE_YASM
00054 int mm_flags = av_get_cpu_flags();
00055
00056 if (mm_flags & AV_CPU_FLAG_MMX) {
00057 c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
00058 c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
00059 c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
00060 c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
00061 }
00062 if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
00063 c->extract_exponents = ff_ac3_extract_exponents_3dnow;
00064 if (!bit_exact) {
00065 c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
00066 }
00067 }
00068 if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
00069 c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
00070 c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
00071 }
00072 if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
00073 c->float_to_fixed24 = ff_float_to_fixed24_sse;
00074 }
00075 if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
00076 c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
00077 c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
00078 c->float_to_fixed24 = ff_float_to_fixed24_sse2;
00079 c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
00080 c->extract_exponents = ff_ac3_extract_exponents_sse2;
00081 if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
00082 c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
00083 c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
00084 }
00085 }
00086 if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
00087 c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
00088 if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
00089 c->extract_exponents = ff_ac3_extract_exponents_ssse3;
00090 }
00091 }
00092 #endif
00093 }