Go to the documentation of this file.
29 #define TABLE_DEF(name, size) \
30 DECLARE_ALIGNED(32, TXSample, TX_TAB(ff_tx_tab_ ##name))[size]
32 #define SR_POW2_TABLES \
53 #define SR_TABLE(len) \
54 TABLE_DEF(len, len/4 + 1);
69 #define SR_TABLE(len) \
70 static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void) \
72 double freq = 2*M_PI/len; \
73 TXSample *tab = TX_TAB(ff_tx_tab_ ##len); \
75 for (int i = 0; i < len/4; i++) \
76 *tab++ = RESCALE(cos(i*freq)); \
84 #define SR_TABLE(len) TX_TAB(ff_tx_init_tab_ ##len),
90 #define SR_TABLE(len) AV_ONCE_INIT,
98 TX_TAB(ff_tx_tab_53)[0] = RESCALE(cos(2 *
M_PI / 5));
99 TX_TAB(ff_tx_tab_53)[1] = RESCALE(cos(2 *
M_PI / 5));
100 TX_TAB(ff_tx_tab_53)[2] = RESCALE(cos(2 *
M_PI / 10));
101 TX_TAB(ff_tx_tab_53)[3] = RESCALE(cos(2 *
M_PI / 10));
102 TX_TAB(ff_tx_tab_53)[4] = RESCALE(sin(2 *
M_PI / 5));
103 TX_TAB(ff_tx_tab_53)[5] = RESCALE(sin(2 *
M_PI / 5));
104 TX_TAB(ff_tx_tab_53)[6] = RESCALE(sin(2 *
M_PI / 10));
105 TX_TAB(ff_tx_tab_53)[7] = RESCALE(sin(2 *
M_PI / 10));
108 TX_TAB(ff_tx_tab_53)[ 8] = RESCALE(cos(2 *
M_PI / 12));
109 TX_TAB(ff_tx_tab_53)[ 9] = RESCALE(cos(2 *
M_PI / 12));
110 TX_TAB(ff_tx_tab_53)[10] = RESCALE(cos(2 *
M_PI / 6));
111 TX_TAB(ff_tx_tab_53)[11] = RESCALE(cos(8 *
M_PI / 6));
116 TX_TAB(ff_tx_tab_7)[0] = RESCALE(cos(2 *
M_PI / 7));
117 TX_TAB(ff_tx_tab_7)[1] = RESCALE(sin(2 *
M_PI / 7));
118 TX_TAB(ff_tx_tab_7)[2] = RESCALE(sin(2 *
M_PI / 28));
119 TX_TAB(ff_tx_tab_7)[3] = RESCALE(cos(2 *
M_PI / 28));
120 TX_TAB(ff_tx_tab_7)[4] = RESCALE(cos(2 *
M_PI / 14));
121 TX_TAB(ff_tx_tab_7)[5] = RESCALE(sin(2 *
M_PI / 14));
126 TX_TAB(ff_tx_tab_9)[0] = RESCALE(cos(2 *
M_PI / 3));
127 TX_TAB(ff_tx_tab_9)[1] = RESCALE(sin(2 *
M_PI / 3));
128 TX_TAB(ff_tx_tab_9)[2] = RESCALE(cos(2 *
M_PI / 9));
129 TX_TAB(ff_tx_tab_9)[3] = RESCALE(sin(2 *
M_PI / 9));
130 TX_TAB(ff_tx_tab_9)[4] = RESCALE(cos(2 *
M_PI / 36));
131 TX_TAB(ff_tx_tab_9)[5] = RESCALE(sin(2 *
M_PI / 36));
132 TX_TAB(ff_tx_tab_9)[6] = TX_TAB(ff_tx_tab_9)[2] + TX_TAB(ff_tx_tab_9)[5];
133 TX_TAB(ff_tx_tab_9)[7] = TX_TAB(ff_tx_tab_9)[3] - TX_TAB(ff_tx_tab_9)[4];
152 int idx = factor_2 - 3;
153 for (
int i = 0;
i <= idx;
i++)
181 const TXSample *
tab = TX_TAB(ff_tx_tab_53);
187 BF(
tmp[1].re,
tmp[2].im, in[1].im, in[2].im);
188 BF(
tmp[1].im,
tmp[2].re, in[1].re, in[2].re);
197 out[1*
stride].re =
tmp[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
198 out[1*
stride].im =
tmp[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
199 out[2*
stride].re =
tmp[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
200 out[2*
stride].im =
tmp[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
215 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
216 static av_always_inline void NAME(TXComplex *out, TXComplex *in, \
219 TXComplex dc, z0[4], t[6]; \
220 const TXSample *tab = TX_TAB(ff_tx_tab_53); \
223 BF(t[1].im, t[0].re, in[1].re, in[4].re); \
224 BF(t[1].re, t[0].im, in[1].im, in[4].im); \
225 BF(t[3].im, t[2].re, in[2].re, in[3].re); \
226 BF(t[3].re, t[2].im, in[2].im, in[3].im); \
228 out[D0*stride].re = dc.re + (TXUSample)t[0].re + t[2].re; \
229 out[D0*stride].im = dc.im + (TXUSample)t[0].im + t[2].im; \
231 SMUL(t[4].re, t[0].re, tab[0], tab[2], t[2].re, t[0].re); \
232 SMUL(t[4].im, t[0].im, tab[0], tab[2], t[2].im, t[0].im); \
233 CMUL(t[5].re, t[1].re, tab[4], tab[6], t[3].re, t[1].re); \
234 CMUL(t[5].im, t[1].im, tab[4], tab[6], t[3].im, t[1].im); \
236 BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
237 BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
238 BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
239 BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
241 out[D1*stride].re = dc.re + (TXUSample)z0[3].re; \
242 out[D1*stride].im = dc.im + (TXUSample)z0[0].im; \
243 out[D2*stride].re = dc.re + (TXUSample)z0[2].re; \
244 out[D2*stride].im = dc.im + (TXUSample)z0[1].im; \
245 out[D3*stride].re = dc.re + (TXUSample)z0[1].re; \
246 out[D3*stride].im = dc.im + (TXUSample)z0[2].im; \
247 out[D4*stride].re = dc.re + (TXUSample)z0[0].re; \
248 out[D4*stride].im = dc.im + (TXUSample)z0[3].im; \
266 BF(t[1].re, t[0].re, in[1].re, in[6].re);
267 BF(t[1].im, t[0].im, in[1].im, in[6].im);
268 BF(t[3].re, t[2].re, in[2].re, in[5].re);
269 BF(t[3].im, t[2].im, in[2].im, in[5].im);
270 BF(t[5].re, t[4].re, in[3].re, in[4].re);
271 BF(t[5].im, t[4].im, in[3].im, in[4].im);
273 out[0*
stride].re =
dc.re + t[0].re + t[2].re + t[4].re;
274 out[0*
stride].im =
dc.im + t[0].im + t[2].im + t[4].im;
305 z[0].re =
tab[0].re*t[0].re -
tab[2].re*t[4].re -
tab[1].re*t[2].re;
306 z[1].re =
tab[0].re*t[4].re -
tab[1].re*t[0].re -
tab[2].re*t[2].re;
307 z[2].re =
tab[0].re*t[2].re -
tab[2].re*t[0].re -
tab[1].re*t[4].re;
308 z[0].im =
tab[0].re*t[0].im -
tab[1].re*t[2].im -
tab[2].re*t[4].im;
309 z[1].im =
tab[0].re*t[4].im -
tab[1].re*t[0].im -
tab[2].re*t[2].im;
310 z[2].im =
tab[0].re*t[2].im -
tab[2].re*t[0].im -
tab[1].re*t[4].im;
315 t[0].re =
tab[2].im*t[1].im +
tab[1].im*t[5].im -
tab[0].im*t[3].im;
316 t[2].re =
tab[0].im*t[5].im +
tab[2].im*t[3].im -
tab[1].im*t[1].im;
317 t[4].re =
tab[2].im*t[5].im +
tab[1].im*t[3].im +
tab[0].im*t[1].im;
318 t[0].im =
tab[0].im*t[1].re +
tab[1].im*t[3].re +
tab[2].im*t[5].re;
319 t[2].im =
tab[2].im*t[3].re +
tab[0].im*t[5].re -
tab[1].im*t[1].re;
320 t[4].im =
tab[2].im*t[1].re +
tab[1].im*t[5].re -
tab[0].im*t[3].re;
323 BF(t[1].re, z[0].re, z[0].re, t[4].re);
324 BF(t[3].re, z[1].re, z[1].re, t[2].re);
325 BF(t[5].re, z[2].re, z[2].re, t[0].re);
326 BF(t[1].im, z[0].im, z[0].im, t[0].im);
327 BF(t[3].im, z[1].im, z[1].im, t[2].im);
328 BF(t[5].im, z[2].im, z[2].im, t[4].im);
354 BF(t[1].re, t[0].re, in[1].re, in[8].re);
355 BF(t[1].im, t[0].im, in[1].im, in[8].im);
356 BF(t[3].re, t[2].re, in[2].re, in[7].re);
357 BF(t[3].im, t[2].im, in[2].im, in[7].im);
358 BF(t[5].re, t[4].re, in[3].re, in[6].re);
359 BF(t[5].im, t[4].im, in[3].im, in[6].im);
360 BF(t[7].re, t[6].re, in[4].re, in[5].re);
361 BF(t[7].im, t[6].im, in[4].im, in[5].im);
363 w[0].re = t[0].re - t[6].re;
364 w[0].im = t[0].im - t[6].im;
365 w[1].re = t[2].re - t[6].re;
366 w[1].im = t[2].im - t[6].im;
367 w[2].re = t[1].re - t[7].re;
368 w[2].im = t[1].im - t[7].im;
369 w[3].re = t[3].re + t[7].re;
370 w[3].im = t[3].im + t[7].im;
372 z[0].re =
dc.re + t[4].re;
373 z[0].im =
dc.im + t[4].im;
375 z[1].re = t[0].re + t[2].re + t[6].re;
376 z[1].im = t[0].im + t[2].im + t[6].im;
382 mtmp[0] = t[1].re - t[3].re + t[7].re;
383 mtmp[1] = t[1].im - t[3].im + t[7].im;
393 x[3].re = z[0].re + (
int32_t)mtmp[0];
394 x[3].im = z[0].im + (
int32_t)mtmp[1];
395 z[0].re = in[0].re + (
int32_t)mtmp[2];
396 z[0].im = in[0].im + (
int32_t)mtmp[3];
420 y[3].re =
tab[0].im*(t[1].re - t[3].re + t[7].re);
421 y[3].im =
tab[0].im*(t[1].im - t[3].im + t[7].im);
423 x[3].re = z[0].re +
tab[0].re*z[1].re;
424 x[3].im = z[0].im +
tab[0].re*z[1].im;
425 z[0].re =
dc.re +
tab[0].re*t[4].re;
426 z[0].im =
dc.im +
tab[0].re*t[4].im;
428 x[1].re =
tab[1].re*
w[0].re +
tab[2].im*
w[1].re;
429 x[1].im =
tab[1].re*
w[0].im +
tab[2].im*
w[1].im;
430 x[2].re =
tab[2].im*
w[0].re -
tab[3].re*
w[1].re;
431 x[2].im =
tab[2].im*
w[0].im -
tab[3].re*
w[1].im;
432 y[1].re =
tab[1].im*
w[2].re +
tab[2].re*
w[3].re;
433 y[1].im =
tab[1].im*
w[2].im +
tab[2].re*
w[3].im;
434 y[2].re =
tab[2].re*
w[2].re -
tab[3].im*
w[3].re;
435 y[2].im =
tab[2].re*
w[2].im -
tab[3].im*
w[3].im;
437 y[0].re =
tab[0].im*t[5].re;
438 y[0].im =
tab[0].im*t[5].im;
441 x[4].re = x[1].re + x[2].re;
442 x[4].im = x[1].im + x[2].im;
444 y[4].re = y[1].re - y[2].re;
445 y[4].im = y[1].im - y[2].im;
446 x[1].re = z[0].re + x[1].re;
447 x[1].im = z[0].im + x[1].im;
448 y[1].re = y[0].re + y[1].re;
449 y[1].im = y[0].im + y[1].im;
450 x[2].re = z[0].re + x[2].re;
451 x[2].im = z[0].im + x[2].im;
452 y[2].re = y[2].re - y[0].re;
453 y[2].im = y[2].im - y[0].im;
454 x[4].re = z[0].re - x[4].re;
455 x[4].im = z[0].im - x[4].im;
456 y[4].re = y[0].re - y[4].re;
457 y[4].im = y[0].im - y[4].im;
474 for (
int i = 0;
i < 5;
i++)
500 #define DECL_FACTOR_S(n) \
501 static void TX_NAME(ff_tx_fft##n)(AVTXContext *s, void *dst, \
502 void *src, ptrdiff_t stride) \
504 fft##n((TXComplex *)dst, (TXComplex *)src, stride / sizeof(TXComplex)); \
506 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
507 .name = TX_NAME_STR("fft" #n "_ns"), \
508 .function = TX_NAME(ff_tx_fft##n), \
509 .type = TX_TYPE(FFT), \
510 .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
511 AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
516 .init = TX_NAME(ff_tx_fft_factor_init), \
517 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
518 .prio = FF_TX_PRIO_BASE, \
521 #define DECL_FACTOR_F(n) \
523 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = { \
524 .name = TX_NAME_STR("fft" #n "_fwd"), \
525 .function = TX_NAME(ff_tx_fft##n), \
526 .type = TX_TYPE(FFT), \
527 .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
528 AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY, \
533 .init = TX_NAME(ff_tx_fft_factor_init), \
534 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
535 .prio = FF_TX_PRIO_BASE, \
544 #define BUTTERFLIES(a0, a1, a2, a3) \
550 BF(t3, t5, t5, t1); \
551 BF(a2.re, a0.re, r0, t5); \
552 BF(a3.im, a1.im, i1, t3); \
553 BF(t4, t6, t2, t6); \
554 BF(a3.re, a1.re, r1, t4); \
555 BF(a2.im, a0.im, i0, t6); \
558 #define TRANSFORM(a0, a1, a2, a3, wre, wim) \
560 CMUL(t1, t2, a2.re, a2.im, wre, -wim); \
561 CMUL(t5, t6, a3.re, a3.im, wre, wim); \
562 BUTTERFLIES(a0, a1, a2, a3); \
567 const TXSample *cos,
int len)
572 const TXSample *wim = cos + o1 - 7;
573 TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
575 for (
int i = 0;
i <
len;
i += 4) {
576 TRANSFORM(z[0], z[o1 + 0], z[o2 + 0], z[o3 + 0], cos[0], wim[7]);
577 TRANSFORM(z[2], z[o1 + 2], z[o2 + 2], z[o3 + 2], cos[2], wim[5]);
578 TRANSFORM(z[4], z[o1 + 4], z[o2 + 4], z[o3 + 4], cos[4], wim[3]);
579 TRANSFORM(z[6], z[o1 + 6], z[o2 + 6], z[o3 + 6], cos[6], wim[1]);
581 TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], cos[1], wim[6]);
582 TRANSFORM(z[3], z[o1 + 3], z[o2 + 3], z[o3 + 3], cos[3], wim[4]);
583 TRANSFORM(z[5], z[o1 + 5], z[o2 + 5], z[o3 + 5], cos[5], wim[2]);
584 TRANSFORM(z[7], z[o1 + 7], z[o2 + 7], z[o3 + 7], cos[7], wim[0]);
603 #define DECL_SR_CODELET_DEF(n) \
604 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
605 .name = TX_NAME_STR("fft" #n "_ns"), \
606 .function = TX_NAME(ff_tx_fft##n##_ns), \
607 .type = TX_TYPE(FFT), \
608 .flags = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE | \
609 AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
614 .init = TX_NAME(ff_tx_fft_sr_codelet_init), \
615 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
616 .prio = FF_TX_PRIO_BASE, \
619 #define DECL_SR_CODELET(n, n2, n4) \
620 static void TX_NAME(ff_tx_fft##n##_ns)(AVTXContext *s, void *_dst, \
621 void *_src, ptrdiff_t stride) \
623 TXComplex *src = _src; \
624 TXComplex *dst = _dst; \
625 const TXSample *cos = TX_TAB(ff_tx_tab_##n); \
627 TX_NAME(ff_tx_fft##n2##_ns)(s, dst, src, stride); \
628 TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*2, src + n4*2, stride); \
629 TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*3, src + n4*3, stride); \
630 TX_NAME(ff_tx_fft_sr_combine)(dst, cos, n4 >> 1); \
633 DECL_SR_CODELET_DEF(n)
652 TXSample t1, t2, t3, t4, t5, t6, t7, t8;
669 TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
670 const TXSample cos = TX_TAB(ff_tx_tab_8)[1];
688 const TXSample *cos = TX_TAB(ff_tx_tab_16);
690 TXUSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
691 TXSample cos_16_1 = cos[1];
692 TXSample cos_16_2 = cos[2];
693 TXSample cos_16_3 = cos[3];
777 int *
map =
s->sub[0].map;
782 for (
int i = 0;
i <
len;
i++)
785 s->fn[0](&
s->sub[0], dst2, dst1,
stride);
794 const int *
map =
s->sub->map;
795 const int *inplace_idx =
s->map;
796 int src_idx, dst_idx;
798 src_idx = *inplace_idx++;
801 dst_idx =
map[src_idx];
804 dst_idx =
map[dst_idx];
805 }
while (dst_idx != src_idx);
807 }
while ((src_idx = *inplace_idx++));
813 .
name = TX_NAME_STR(
"fft"),
827 .
name = TX_NAME_STR(
"fft_inplace_small"),
841 .
name = TX_NAME_STR(
"fft_inplace"),
866 for (
int i = 0;
i <
len;
i++) {
867 for (
int j = 0; j <
len; j++) {
868 const double factor = phase*
i*j;
884 const int n =
s->len;
885 double phase =
s->inv ? 2.0*
M_PI/n : -2.0*
M_PI/n;
889 for (
int i = 0;
i < n;
i++) {
891 for (
int j = 0; j < n; j++) {
892 const double factor = phase*
i*j;
911 const int n =
s->len;
915 for (
int i = 0;
i < n;
i++) {
917 for (
int j = 0; j < n; j++) {
929 .
name = TX_NAME_STR(
"fft_naive_small"),
943 .
name = TX_NAME_STR(
"fft_naive"),
965 size_t extra_tmp_len = 0;
972 for (
int i = 0;
i <
ret;
i++) {
973 int len1 = len_list[
i];
974 int len2 =
len / len1;
977 if (len2 & (len2 - 1))
992 }
else if (
ret < 0) {
1013 }
else if (
ret < 0) {
1020 }
else if (
ret < 0) {
1040 s->sub[0].len,
s->sub[1].len)))
1047 tmp = (
int *)
s->tmp;
1048 for (
int k = 0; k <
len; k +=
s->sub[0].len) {
1049 memcpy(
tmp, &
s->map[k],
s->sub[0].len*
sizeof(*
tmp));
1050 for (
int i = 0;
i <
s->sub[0].len;
i++)
1051 s->map[k +
i] =
tmp[
s->sub[0].map[
i]];
1056 extra_tmp_len =
len;
1058 extra_tmp_len =
s->sub[0].len;
1060 if (extra_tmp_len && !(
s->exp =
av_malloc(extra_tmp_len*
sizeof(*
s->exp))))
1067 void *_in, ptrdiff_t
stride)
1069 const int n =
s->sub[0].len, m =
s->sub[1].len, l =
s->len;
1070 const int *in_map =
s->map, *out_map = in_map + l;
1071 const int *sub_map =
s->sub[1].map;
1077 for (
int i = 0;
i < m;
i++) {
1078 for (
int j = 0; j < n; j++)
1079 s->exp[j] = in[in_map[
i*n + j]];
1080 s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]],
s->exp, m*
sizeof(
TXComplex));
1083 for (
int i = 0;
i < n;
i++)
1084 s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i],
sizeof(
TXComplex));
1086 for (
int i = 0;
i < l;
i++)
1091 void *_in, ptrdiff_t
stride)
1093 const int n =
s->sub[0].len, m =
s->sub[1].len, l =
s->len;
1094 const int *in_map =
s->map, *out_map = in_map + l;
1095 const int *sub_map =
s->sub[1].map;
1101 for (
int i = 0;
i < m;
i++)
1102 s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]], &in[
i*n], m*
sizeof(
TXComplex));
1104 for (
int i = 0;
i < n;
i++)
1105 s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i],
sizeof(
TXComplex));
1107 for (
int i = 0;
i < l;
i++)
1112 .
name = TX_NAME_STR(
"fft_pfa"),
1126 .
name = TX_NAME_STR(
"fft_pfa_ns"),
1147 s->scale_d = *((SCALE_TYPE *)
scale);
1148 s->scale_f =
s->scale_d;
1157 double scale =
s->scale_d;
1159 const double phase =
M_PI/(4.0*
len);
1163 for (
int i = 0;
i <
len;
i++) {
1165 for (
int j = 0; j <
len*2; j++) {
1166 int a = (2*j + 1 +
len) * (2*
i + 1);
1167 sum += UNSCALE(
src[j]) * cos(
a * phase);
1178 double scale =
s->scale_d;
1179 int len =
s->len >> 1;
1181 const double phase =
M_PI/(4.0*len2);
1185 for (
int i = 0;
i <
len;
i++) {
1188 double i_d = phase * (4*
len - 2*
i - 1);
1189 double i_u = phase * (3*len2 + 2*
i + 1);
1190 for (
int j = 0; j < len2; j++) {
1191 double a = (2 * j + 1);
1192 double a_d = cos(
a * i_d);
1193 double a_u = cos(
a * i_u);
1204 .
name = TX_NAME_STR(
"mdct_naive_fwd"),
1218 .
name = TX_NAME_STR(
"mdct_naive_inv"),
1243 s->scale_d = *((SCALE_TYPE *)
scale);
1244 s->scale_f =
s->scale_d;
1264 memcpy(
s->map,
s->sub->map, (
len >> 1)*
sizeof(*
s->map));
1266 for (
int i = 0; i < len >> 1;
i++)
1275 for (
int i = 0;
i < (
s->len >> 1);
i++)
1286 const int len2 =
s->len >> 1;
1287 const int len4 =
s->len >> 2;
1288 const int len3 = len2 * 3;
1289 const int *sub_map =
s->map;
1293 for (
int i = 0;
i < len2;
i++) {
1295 const int idx = sub_map[
i];
1297 tmp.re = FOLD(-
src[ len2 + k],
src[1*len2 - 1 - k]);
1298 tmp.im = FOLD(-
src[ len3 + k], -
src[1*len3 - 1 - k]);
1300 tmp.re = FOLD(-
src[ len2 + k], -
src[5*len2 - 1 - k]);
1301 tmp.im = FOLD(
src[-len2 + k], -
src[1*len3 - 1 - k]);
1303 CMUL(z[idx].im, z[idx].re,
tmp.re,
tmp.im,
exp[
i].re,
exp[
i].im);
1308 for (
int i = 0;
i < len4;
i++) {
1309 const int i0 = len4 +
i, i1 = len4 -
i - 1;
1324 const TXSample *
src =
_src, *in1, *in2;
1325 const int len2 =
s->len >> 1;
1326 const int len4 =
s->len >> 2;
1327 const int *sub_map =
s->map;
1333 for (
int i = 0;
i < len2;
i++) {
1342 for (
int i = 0;
i < len4;
i++) {
1343 const int i0 = len4 +
i, i1 = len4 -
i - 1;
1347 CMUL(z[i1].re, z[i0].im,
src1.re,
src1.im,
exp[i1].im,
exp[i1].re);
1348 CMUL(z[i0].re, z[i1].im,
src0.re,
src0.im,
exp[i0].im,
exp[i0].re);
1353 .
name = TX_NAME_STR(
"mdct_fwd"),
1367 .
name = TX_NAME_STR(
"mdct_inv"),
1389 s->scale_d = *((SCALE_TYPE *)
scale);
1390 s->scale_f =
s->scale_d;
1403 int len =
s->len << 1;
1404 int len2 =
len >> 1;
1405 int len4 =
len >> 2;
1412 for (
int i = 0;
i < len4;
i++) {
1419 .
name = TX_NAME_STR(
"mdct_inv_full"),
1444 sub_len =
len / cd->factors[0];
1446 s->scale_d = *((SCALE_TYPE *)
scale);
1447 s->scale_f =
s->scale_d;
1454 sub_len, inv,
scale)))
1461 if (cd->factors[0] == 15)
1468 for (
int i = 0;
i <
len;
i++)
1479 #define DECL_COMP_IMDCT(N) \
1480 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_inv)(AVTXContext *s, void *_dst, \
1481 void *_src, ptrdiff_t stride) \
1483 TXComplex fft##N##in[N]; \
1484 TXComplex *z = _dst, *exp = s->exp; \
1485 const TXSample *src = _src, *in1, *in2; \
1486 const int len4 = s->len >> 2; \
1487 const int len2 = s->len >> 1; \
1488 const int m = s->sub->len; \
1489 const int *in_map = s->map, *out_map = in_map + N*m; \
1490 const int *sub_map = s->sub->map; \
1492 stride /= sizeof(*src); \
1494 in2 = src + ((N*m*2) - 1) * stride; \
1496 for (int i = 0; i < len2; i += N) { \
1497 for (int j = 0; j < N; j++) { \
1498 const int k = in_map[j]; \
1499 TXComplex tmp = { in2[-k*stride], in1[k*stride] }; \
1500 CMUL3(fft##N##in[j], tmp, exp[j]); \
1502 fft##N(s->tmp + *(sub_map++), fft##N##in, m); \
1507 for (int i = 0; i < N; i++) \
1508 s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1510 for (int i = 0; i < len4; i++) { \
1511 const int i0 = len4 + i, i1 = len4 - i - 1; \
1512 const int s0 = out_map[i0], s1 = out_map[i1]; \
1513 TXComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
1514 TXComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
1516 CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
1517 CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
1521 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = { \
1522 .name = TX_NAME_STR("mdct_pfa_" #N "xM_inv"), \
1523 .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_inv), \
1524 .type = TX_TYPE(MDCT), \
1525 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, \
1526 .factors = { N, TX_FACTOR_ANY }, \
1529 .max_len = TX_LEN_UNLIMITED, \
1530 .init = TX_NAME(ff_tx_mdct_pfa_init), \
1531 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1532 .prio = FF_TX_PRIO_BASE, \
1541 #define DECL_COMP_MDCT(N) \
1542 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd)(AVTXContext *s, void *_dst, \
1543 void *_src, ptrdiff_t stride) \
1545 TXComplex fft##N##in[N]; \
1546 TXSample *src = _src, *dst = _dst; \
1547 TXComplex *exp = s->exp, tmp; \
1548 const int m = s->sub->len; \
1549 const int len4 = N*m; \
1550 const int len3 = len4 * 3; \
1551 const int len8 = s->len >> 2; \
1552 const int *in_map = s->map, *out_map = in_map + N*m; \
1553 const int *sub_map = s->sub->map; \
1555 stride /= sizeof(*dst); \
1557 for (int i = 0; i < m; i++) { \
1558 for (int j = 0; j < N; j++) { \
1559 const int k = in_map[i*N + j]; \
1561 tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
1562 tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
1564 tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
1565 tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
1567 CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
1568 exp[k >> 1].re, exp[k >> 1].im); \
1570 fft##N(s->tmp + sub_map[i], fft##N##in, m); \
1573 for (int i = 0; i < N; i++) \
1574 s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex)); \
1576 for (int i = 0; i < len8; i++) { \
1577 const int i0 = len8 + i, i1 = len8 - i - 1; \
1578 const int s0 = out_map[i0], s1 = out_map[i1]; \
1579 TXComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
1580 TXComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
1582 CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
1583 exp[i0].im, exp[i0].re); \
1584 CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
1585 exp[i1].im, exp[i1].re); \
1589 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = { \
1590 .name = TX_NAME_STR("mdct_pfa_" #N "xM_fwd"), \
1591 .function = TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd), \
1592 .type = TX_TYPE(MDCT), \
1593 .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1594 .factors = { N, TX_FACTOR_ANY }, \
1597 .max_len = TX_LEN_UNLIMITED, \
1598 .init = TX_NAME(ff_tx_mdct_pfa_init), \
1599 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1600 .prio = FF_TX_PRIO_BASE, \
1622 s->scale_d = *((SCALE_TYPE *)
scale);
1623 s->scale_f =
s->scale_d;
1630 if (!(
s->exp =
av_mallocz((8 + 2*len4)*
sizeof(*
s->exp))))
1633 tab = (TXSample *)
s->exp;
1637 m = (inv ? 2*
s->scale_d :
s->scale_d);
1639 *
tab++ = RESCALE((inv ? 0.5 : 1.0) * m);
1640 *
tab++ = RESCALE(inv ? 0.5*m : 1.0*m);
1641 *
tab++ = RESCALE( m);
1642 *
tab++ = RESCALE(-m);
1644 *
tab++ = RESCALE( (0.5 - 0.0) * m);
1646 *
tab++ = 1 /
s->scale_f;
1648 *
tab++ = RESCALE( (0.0 - 0.5) * m);
1649 *
tab++ = RESCALE( (0.5 - inv) * m);
1650 *
tab++ = RESCALE(-(0.5 - inv) * m);
1652 for (
int i = 0;
i < len4;
i++)
1653 *
tab++ = RESCALE(cos(
i*
f));
1655 tab = ((TXSample *)
s->exp) + len4 + 8;
1657 for (
int i = 0;
i < len4;
i++)
1658 *
tab++ = RESCALE(cos(((
len -
i*4)/4.0)*
f)) * (inv ? 1 : -1);
1663 #define DECL_RDFT(n, inv) \
1664 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
1665 void *_src, ptrdiff_t stride) \
1667 const int len2 = s->len >> 1; \
1668 const int len4 = s->len >> 2; \
1669 const TXSample *fact = (void *)s->exp; \
1670 const TXSample *tcos = fact + 8; \
1671 const TXSample *tsin = tcos + len4; \
1672 TXComplex *data = inv ? _src : _dst; \
1676 s->fn[0](&s->sub[0], data, _src, sizeof(TXComplex)); \
1678 data[0].im = data[len2].re; \
1683 t[0].re = data[0].re; \
1684 data[0].re = t[0].re + data[0].im; \
1685 data[0].im = t[0].re - data[0].im; \
1686 data[ 0].re = MULT(fact[0], data[ 0].re); \
1687 data[ 0].im = MULT(fact[1], data[ 0].im); \
1688 data[len4].re = MULT(fact[2], data[len4].re); \
1689 data[len4].im = MULT(fact[3], data[len4].im); \
1691 for (int i = 1; i < len4; i++) { \
1693 t[0].re = MULT(fact[4], (data[i].re + data[len2 - i].re)); \
1694 t[0].im = MULT(fact[5], (data[i].im - data[len2 - i].im)); \
1695 t[1].re = MULT(fact[6], (data[i].im + data[len2 - i].im)); \
1696 t[1].im = MULT(fact[7], (data[i].re - data[len2 - i].re)); \
1699 CMUL(t[2].re, t[2].im, t[1].re, t[1].im, tcos[i], tsin[i]); \
1701 data[ i].re = t[0].re + t[2].re; \
1702 data[ i].im = t[2].im - t[0].im; \
1703 data[len2 - i].re = t[0].re - t[2].re; \
1704 data[len2 - i].im = t[2].im + t[0].im; \
1708 s->fn[0](&s->sub[0], _dst, data, sizeof(TXComplex)); \
1711 data[len2].re = data[0].im; \
1712 data[ 0].im = data[len2].im = 0; \
1716 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
1717 .name = TX_NAME_STR("rdft_" #n), \
1718 .function = TX_NAME(ff_tx_rdft_ ##n), \
1719 .type = TX_TYPE(RDFT), \
1720 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
1721 (inv ? FF_TX_INVERSE_ONLY : FF_TX_FORWARD_ONLY), \
1722 .factors = { 4, TX_FACTOR_ANY }, \
1725 .max_len = TX_LEN_UNLIMITED, \
1726 .init = TX_NAME(ff_tx_rdft_init), \
1727 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1728 .prio = FF_TX_PRIO_BASE, \
1734 #define DECL_RDFT_HALF(n, mode, mod2) \
1735 static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
1736 void *_src, ptrdiff_t stride) \
1738 const int len = s->len; \
1739 const int len2 = len >> 1; \
1740 const int len4 = len >> 2; \
1741 const int aligned_len4 = FFALIGN(len, 4)/4; \
1742 const TXSample *fact = (void *)s->exp; \
1743 const TXSample *tcos = fact + 8; \
1744 const TXSample *tsin = tcos + aligned_len4; \
1745 TXComplex *data = _dst; \
1746 TXSample *out = _dst; \
1748 av_unused TXSample tmp_mid; \
1752 s->fn[0](&s->sub[0], _dst, _src, sizeof(TXComplex)); \
1754 tmp_dc = data[0].re; \
1755 data[ 0].re = tmp_dc + data[0].im; \
1756 tmp_dc = tmp_dc - data[0].im; \
1758 data[ 0].re = MULT(fact[0], data[ 0].re); \
1759 tmp_dc = MULT(fact[1], tmp_dc); \
1760 data[len4].re = MULT(fact[2], data[len4].re); \
1763 data[len4].im = MULT(fact[3], data[len4].im); \
1766 sl = data[len4 + 1]; \
1767 if (mode == AV_TX_REAL_TO_REAL) \
1768 tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
1770 tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
1771 tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
1772 tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
1774 if (mode == AV_TX_REAL_TO_REAL) { \
1775 tmp[3] = tmp[1]*tcos[len4] - tmp[2]*tsin[len4]; \
1776 tmp_mid = (tmp[0] - tmp[3]); \
1778 tmp[3] = tmp[1]*tsin[len4] + tmp[2]*tcos[len4]; \
1779 tmp_mid = (tmp[0] + tmp[3]); \
1784 for (int i = 1; i <= len4; i++) { \
1786 TXComplex sf = data[i]; \
1787 TXComplex sl = data[len2 - i]; \
1789 if (mode == AV_TX_REAL_TO_REAL) \
1790 tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
1792 tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
1794 tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
1795 tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
1797 if (mode == AV_TX_REAL_TO_REAL) { \
1798 tmp[3] = tmp[1]*tcos[i] - tmp[2]*tsin[i]; \
1799 out[i] = (tmp[0] + tmp[3]); \
1800 out[len - i] = (tmp[0] - tmp[3]); \
1802 tmp[3] = tmp[1]*tsin[i] + tmp[2]*tcos[i]; \
1803 out[i - 1] = (tmp[3] - tmp[0]); \
1804 out[len - i - 1] = (tmp[0] + tmp[3]); \
1808 for (int i = 1; i < (len4 + (mode == AV_TX_REAL_TO_IMAGINARY)); i++) \
1809 out[len2 - i] = out[len - i]; \
1811 if (mode == AV_TX_REAL_TO_REAL) { \
1812 out[len2] = tmp_dc; \
1814 out[len4 + 1] = tmp_mid * fact[5]; \
1815 } else if (mod2) { \
1816 out[len4] = tmp_mid; \
1820 static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
1821 .name = TX_NAME_STR("rdft_" #n), \
1822 .function = TX_NAME(ff_tx_rdft_ ##n), \
1823 .type = TX_TYPE(RDFT), \
1824 .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | mode | \
1825 FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
1826 .factors = { 2 + 2*(!mod2), TX_FACTOR_ANY }, \
1828 .min_len = 2 + 2*(!mod2), \
1829 .max_len = TX_LEN_UNLIMITED, \
1830 .init = TX_NAME(ff_tx_rdft_init), \
1831 .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
1832 .prio = FF_TX_PRIO_BASE, \
1850 SCALE_TYPE rsc = *((SCALE_TYPE *)
scale);
1865 tab = (TXSample *)
s->exp;
1869 for (
int i = 0;
i <
len;
i++)
1870 tab[
i] = RESCALE(cos(
i*freq)*(!inv + 1));
1873 for (
int i = 0;
i <
len/2;
i++)
1874 tab[
len +
i] = RESCALE(0.5 / sin((2*
i + 1)*freq));
1876 for (
int i = 0;
i <
len/2;
i++)
1877 tab[
len +
i] = RESCALE(cos((
len - 2*
i - 1)*freq));
1888 const int len =
s->len;
1889 const int len2 =
len >> 1;
1890 const TXSample *
exp = (
void *)
s->exp;
1895 TXSample tmp1, tmp2;
1898 for (
int i = 0;
i < len2;
i++) {
1899 TXSample in1 =
src[
i];
1900 TXSample in2 =
src[
len -
i - 1];
1910 tmp2 = (tmp2 + 0x40000000) >> 31;
1912 tmp1 = (in1 + in2)*0.5;
1913 tmp2 = (in1 - in2)*
s;
1916 src[
i] = tmp1 + tmp2;
1917 src[
len -
i - 1] = tmp1 - tmp2;
1924 for (
int i =
len - 2;
i > 0;
i -= 2) {
1936 dst[0] = (tmp1 + 0x40000000) >> 31;
1948 const int len =
s->len;
1949 const int len2 =
len >> 1;
1950 const TXSample *
exp = (
void *)
s->exp;
1953 tmp2 = (2*tmp2 + 0x40000000) >> 31;
1955 TXSample tmp1, tmp2 = 2*
src[
len - 1];
1960 for (
int i =
len - 2;
i >= 2;
i -= 2) {
1961 TXSample val1 =
src[
i - 0];
1962 TXSample val2 =
src[
i - 1] -
src[
i + 1];
1969 for (
int i = 0;
i < len2;
i++) {
1970 TXSample in1 =
dst[
i];
1971 TXSample in2 =
dst[
len -
i - 1];
1978 tmp2 = (tmp2 + 0x40000000) >> 31;
1981 dst[
i] = tmp1 + tmp2;
1982 dst[
len -
i - 1] = tmp1 - tmp2;
1987 .
name = TX_NAME_STR(
"dctII"),
2001 .
name = TX_NAME_STR(
"dctIII"),
2022 SCALE_TYPE rsc = *((SCALE_TYPE *)
scale);
2051 const int len =
s->len - 1;
2052 TXSample *
tmp = (TXSample *)
s->tmp;
2054 stride /=
sizeof(TXSample);
2056 for (
int i = 0;
i <
len;
i++)
2061 s->fn[0](&
s->sub[0],
dst,
tmp,
sizeof(TXSample));
2069 const int len =
s->len + 1;
2070 TXSample *
tmp = (
void *)
s->tmp;
2072 stride /=
sizeof(TXSample);
2076 for (
int i = 1;
i <
len;
i++) {
2088 .
name = TX_NAME_STR(
"dctI"),
2102 .
name = TX_NAME_STR(
"dstI"),
2118 int len4 =
s->len >> 1;
2119 double scale =
s->scale_d;
2120 const double theta = (
scale < 0 ? len4 : 0) + 1.0/8.0;
2121 size_t alloc = pre_tab ? 2*len4 : len4;
2131 for (
int i = 0;
i < len4;
i++) {
2138 for (
int i = 0;
i < len4;
i++)
2139 s->exp[
i] =
s->exp[len4 + pre_tab[
i]];
2152 &
TX_NAME(ff_tx_fft128_ns_def),
2153 &
TX_NAME(ff_tx_fft256_ns_def),
2154 &
TX_NAME(ff_tx_fft512_ns_def),
2155 &
TX_NAME(ff_tx_fft1024_ns_def),
2156 &
TX_NAME(ff_tx_fft2048_ns_def),
2157 &
TX_NAME(ff_tx_fft4096_ns_def),
2158 &
TX_NAME(ff_tx_fft8192_ns_def),
2159 &
TX_NAME(ff_tx_fft16384_ns_def),
2160 &
TX_NAME(ff_tx_fft32768_ns_def),
2161 &
TX_NAME(ff_tx_fft65536_ns_def),
2162 &
TX_NAME(ff_tx_fft131072_ns_def),
2163 &
TX_NAME(ff_tx_fft262144_ns_def),
2164 &
TX_NAME(ff_tx_fft524288_ns_def),
2165 &
TX_NAME(ff_tx_fft1048576_ns_def),
2166 &
TX_NAME(ff_tx_fft2097152_ns_def),
2183 &
TX_NAME(ff_tx_fft_inplace_def),
2184 &
TX_NAME(ff_tx_fft_inplace_small_def),
2186 &
TX_NAME(ff_tx_fft_pfa_ns_def),
2187 &
TX_NAME(ff_tx_fft_naive_def),
2188 &
TX_NAME(ff_tx_fft_naive_small_def),
2191 &
TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def),
2192 &
TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def),
2193 &
TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),
2194 &
TX_NAME(ff_tx_mdct_pfa_9xM_fwd_def),
2195 &
TX_NAME(ff_tx_mdct_pfa_15xM_fwd_def),
2196 &
TX_NAME(ff_tx_mdct_pfa_3xM_inv_def),
2197 &
TX_NAME(ff_tx_mdct_pfa_5xM_inv_def),
2198 &
TX_NAME(ff_tx_mdct_pfa_7xM_inv_def),
2199 &
TX_NAME(ff_tx_mdct_pfa_9xM_inv_def),
2200 &
TX_NAME(ff_tx_mdct_pfa_15xM_inv_def),
2201 &
TX_NAME(ff_tx_mdct_naive_fwd_def),
2202 &
TX_NAME(ff_tx_mdct_naive_inv_def),
2203 &
TX_NAME(ff_tx_mdct_inv_full_def),
2206 &
TX_NAME(ff_tx_rdft_r2r_mod2_def),
2208 &
TX_NAME(ff_tx_rdft_r2i_mod2_def),
int(* func)(AVBPrint *dst, const char *in, const char *arg)
static void TX_NAME() ff_tx_fft_sr_combine(TXComplex *z, const TXSample *cos, int len)
static av_cold int TX_NAME() ff_tx_dct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
@ AV_TX_REAL_TO_REAL
Perform a real to half-complex RDFT.
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
static void TX_NAME() ff_tx_fft(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define TX_MAX_DECOMPOSITIONS
static void TX_NAME() ff_tx_fft_pfa(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
static void TX_NAME() ff_tx_fft16_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
int ff_tx_gen_inplace_map(AVTXContext *s, int len)
static av_always_inline void fft15(TXComplex *out, TXComplex *in, ptrdiff_t stride)
#define FF_TX_CPU_FLAGS_ALL
int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts, int inv, int n, int m)
static void TX_NAME() ff_tx_dctI(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static void TX_NAME() ff_tx_fft_naive(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
uint8_t ptrdiff_t const uint8_t * _src
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
static void TX_NAME() ff_tx_mdct_naive_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_rdft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define DECL_SR_CODELET_DEF(n)
static SR_POW2_TABLES void(*const sr_tabs_init_funcs[])(void)
static const struct twinvq_data tab
static const FFTXCodelet TX_NAME(ff_tx_fft_def)
static void sum_d(const int *input, int *output, int len)
static AVOnce sr_tabs_init_once[]
static double val(void *priv, double ch)
#define TABLE_DEF(name, size)
static int16_t mult(Float11 *f1, Float11 *f2)
static int ff_thread_once(char *control, void(*routine)(void))
#define FF_ARRAY_ELEMS(a)
static void c2r(float *buffer, int size)
static av_cold int TX_NAME() ff_tx_fft_factor_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static void TX_NAME() ff_tx_mdct_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_mdct_naive_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define FF_TX_FORWARD_ONLY
static void TX_NAME() ff_tx_dstI(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
static __device__ float fabs(float a)
@ AV_TX_REAL_TO_IMAGINARY
static av_cold int TX_NAME() ff_tx_mdct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
@ AV_TX_INPLACE
Allows for in-place transformations, where input == output.
int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
static void r2c(float *buffer, int size)
#define FF_TX_OUT_OF_PLACE
@ AV_TX_UNALIGNED
Relaxes alignment requirement for the in and out arrays of av_tx_fn().
static void TX_NAME() ff_tx_dctIII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define DECL_COMP_MDCT(N)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static av_cold int TX_NAME() ff_tx_fft_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
void ff_tx_clear_ctx(AVTXContext *s)
static void TX_NAME() ff_tx_fft2_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_fft_sr_codelet_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
static av_cold void TX_TAB() ff_tx_init_tab_53(void)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
static void TX_NAME() ff_tx_fft8_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_always_inline void fft9(TXComplex *out, TXComplex *in, ptrdiff_t stride)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define TX_EMBED_INPUT_PFA_MAP(map, tot_len, d1, d2)
static void TX_NAME() ff_tx_fft_inplace(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define DECL_RDFT_HALF(n, mode, mod2)
static av_cold int TX_NAME() ff_tx_fft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static void TX_NAME() ff_tx_mdct_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define i(width, name, range_min, range_max)
#define av_malloc_array(a, b)
static AVOnce nptwo_tabs_init_once[]
static av_cold int TX_NAME() ff_tx_fft_init_naive_small(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define DECL_SR_CODELET(n, n2, n4)
#define DECL_COMP_IMDCT(N)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
static av_always_inline void fft3(TXComplex *out, TXComplex *in, ptrdiff_t stride)
static const FFTabInitData nptwo_tabs_init_data[]
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
#define FFSWAP(type, a, b)
static av_cold void TX_TAB() ff_tx_init_tab_7(void)
#define FF_TX_INVERSE_ONLY
static void TX_NAME() ff_tx_fft_naive_small(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold void TX_TAB() ff_tx_init_tab_9(void)
av_cold void TX_TAB() ff_tx_init_tabs(int len)
static void TX_NAME() ff_tx_mdct_naive_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static void TX_NAME() ff_tx_dctII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
#define BUTTERFLIES(a0, a1, a2, a3)
static void TX_NAME() ff_tx_fft_pfa_ns(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
static const int factor[16]
static av_cold int TX_NAME() ff_tx_dcstI_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static av_cold int TX_NAME() ff_tx_fft_inplace_small_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
const VDPAUPixFmtMap * map
static void scale(int *out, const int *in, const int w, const int h, const int shift)
static const int16_t alpha[]
static av_always_inline void fft7(TXComplex *out, TXComplex *in, ptrdiff_t stride)
#define flags(name, subs,...)
int TX_TAB() ff_tx_mdct_gen_exp(AVTXContext *s, int *pre_tab)
int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts, int d1, int d2)
#define DECL_RDFT(n, inv)
static av_cold int TX_NAME() ff_tx_mdct_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
static void TX_NAME() ff_tx_fft4_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
static av_cold int TX_NAME() ff_tx_mdct_inv_full_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type, int len, int inv)
static void TX_NAME() ff_tx_mdct_inv_full(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)