70 #define DECL_INIT_FN(basis, interleave) \
71 static av_cold int b ##basis## _i ##interleave(AVTXContext *s, \
72 const FFTXCodelet *cd, \
74 FFTXCodeletOptions *opts, \
78 ff_tx_init_tabs_float(len); \
79 if (cd->max_len == 2) \
80 return ff_tx_gen_ptwo_revtab(s, opts); \
82 return ff_tx_gen_split_radix_parity_revtab(s, len, inv, opts, \
111 int cnt = 0,
tmp[15];
114 memcpy(
tmp,
s->map, 15*
sizeof(*
tmp));
115 for (
int i = 1;
i < 15;
i += 3) {
116 s->map[cnt] =
tmp[
i];
119 for (
int i = 2;
i < 15;
i += 3) {
120 s->map[cnt] =
tmp[
i];
123 for (
int i = 0;
i < 15;
i += 3) {
124 s->map[cnt] =
tmp[
i];
127 memmove(&
s->map[7], &
s->map[6], 4*
sizeof(
int));
128 memmove(&
s->map[3], &
s->map[1], 4*
sizeof(
int));
138 int len,
int inv,
const void *
scale)
143 s->scale_d = *((SCALE_TYPE *)
scale);
144 s->scale_f =
s->scale_d;
159 memcpy(
s->map,
s->sub->map, (
len >> 1)*
sizeof(*
s->map));
161 for (
int i = 0;
i < (
len >> 1);
i++)
162 s->map[(
len >> 1) +
s->map[
i]] =
i;
187 sub_len, inv,
scale)))
200 for (
int k = 0; k <
s->sub[0].len; k++) {
202 memcpy(
tmp, &
s->map[k*15], 15*
sizeof(*
tmp));
203 for (
int i = 1;
i < 15;
i += 3) {
204 s->map[k*15 + cnt] =
tmp[
i];
207 for (
int i = 2;
i < 15;
i += 3) {
208 s->map[k*15 + cnt] =
tmp[
i];
211 for (
int i = 0;
i < 15;
i += 3) {
212 s->map[k*15 + cnt] =
tmp[
i];
215 memmove(&
s->map[k*15 + 7], &
s->map[k*15 + 6], 4*
sizeof(
int));
216 memmove(&
s->map[k*15 + 3], &
s->map[k*15 + 1], 4*
sizeof(
int));
217 s->map[k*15 + 1] =
tmp[2];
218 s->map[k*15 + 2] =
tmp[0];
231 TX_DEF(fft2, FFT, 2, 2, 2, 0, 128,
NULL, sse3, SSE3,
AV_TX_INPLACE, 0),
232 TX_DEF(fft2_asm, FFT, 2, 2, 2, 0, 192, b8_i0, sse3, SSE3,
234 TX_DEF(fft2, FFT, 2, 2, 2, 0, 192, b8_i0, sse3, SSE3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE, 0),
235 TX_DEF(fft4_fwd, FFT, 4, 4, 2, 0, 128,
NULL, sse2, SSE2,
AV_TX_INPLACE |
FF_TX_FORWARD_ONLY, 0),
236 TX_DEF(fft4_fwd_asm, FFT, 4, 4, 2, 0, 192, b8_i0, sse2, SSE2,
238 TX_DEF(fft4_inv_asm, FFT, 4, 4, 2, 0, 128,
NULL, sse2, SSE2,
240 TX_DEF(fft4_fwd, FFT, 4, 4, 2, 0, 192, b8_i0, sse2, SSE2,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE, 0),
241 TX_DEF(fft4_inv, FFT, 4, 4, 2, 0, 128,
NULL, sse2, SSE2,
AV_TX_INPLACE |
FF_TX_INVERSE_ONLY, 0),
242 TX_DEF(
fft8, FFT, 8, 8, 2, 0, 128, b8_i0, sse3, SSE3,
AV_TX_INPLACE, 0),
243 TX_DEF(fft8_asm, FFT, 8, 8, 2, 0, 192, b8_i0, sse3, SSE3,
245 TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 192, b8_i0, sse3, SSE3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE, 0),
246 TX_DEF(
fft8, FFT, 8, 8, 2, 0, 256, b8_i0, avx, AVX,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
247 TX_DEF(fft8_asm, FFT, 8, 8, 2, 0, 320, b8_i0, avx, AVX,
249 TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 320, b8_i0, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
251 TX_DEF(
fft16, FFT, 16, 16, 2, 0, 256, b8_i2, avx, AVX,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
252 TX_DEF(fft16_asm, FFT, 16, 16, 2, 0, 320, b8_i2, avx, AVX,
254 TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
256 TX_DEF(
fft16, FFT, 16, 16, 2, 0, 288, b8_i2, fma3, FMA3,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
257 TX_DEF(fft16_asm, FFT, 16, 16, 2, 0, 352, b8_i2, fma3, FMA3,
259 TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 352, b8_i2, fma3, FMA3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
263 TX_DEF(fft32, FFT, 32, 32, 2, 0, 256, b8_i2, avx, AVX,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
264 TX_DEF(fft32_asm, FFT, 32, 32, 2, 0, 320, b8_i2, avx, AVX,
266 TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
268 TX_DEF(fft32, FFT, 32, 32, 2, 0, 288, b8_i2, fma3, FMA3,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
269 TX_DEF(fft32_asm, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3,
271 TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
273 TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX, 0,
AV_CPU_FLAG_AVXSLOW),
274 TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,
276 TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
278 TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 288, b8_i2, fma3, FMA3, 0,
AV_CPU_FLAG_AVXSLOW),
279 TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3, FMA3,
281 TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3, FMA3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
284 #if HAVE_AVX2_EXTERNAL
285 TX_DEF(
fft15, FFT, 15, 15, 15, 0, 320,
factor_init, avx2, AVX2,
287 TX_DEF(fft15_ns, FFT, 15, 15, 15, 0, 384,
factor_init, avx2, AVX2,
290 TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 320, b8_i2, avx2, AVX2, 0,
292 TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2,
294 TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
297 TX_DEF(fft_pfa_15xM, FFT, 60,
TX_LEN_UNLIMITED, 15, 2, 320,
fft_pfa_init, avx2, AVX2,
299 TX_DEF(fft_pfa_15xM_asm, FFT, 60,
TX_LEN_UNLIMITED, 15, 2, 384,
fft_pfa_init, avx2, AVX2,
301 TX_DEF(fft_pfa_15xM_ns, FFT, 60,
TX_LEN_UNLIMITED, 15, 2, 384,
fft_pfa_init, avx2, AVX2,
304 TX_DEF(mdct_inv, MDCT, 16,
TX_LEN_UNLIMITED, 2,
TX_FACTOR_ANY, 384,
m_inv_init, avx2, AVX2,