FFmpeg
fft_init.c
Go to the documentation of this file.
1 /*
2  * FFT/IFFT transforms
3  * AltiVec-enabled
4  * Copyright (c) 2009 Loren Merritt
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 #include "libavutil/attributes.h"
25 #include "libavutil/cpu.h"
26 #include "libavutil/ppc/cpu.h"
28 #include "libavcodec/fft.h"
29 
30 /**
31  * Do a complex FFT with the parameters defined in ff_fft_init().
32  * The input data must be permuted before with s->revtab table.
33  * No 1.0 / sqrt(n) normalization is done.
34  * AltiVec-enabled:
35  * This code assumes that the 'z' pointer is 16 bytes-aligned.
36  * It also assumes all FFTComplex are 8 bytes-aligned pairs of floats.
37  */
38 
39 #if HAVE_VSX
40 #include "fft_vsx.h"
41 #else
44 #endif
45 
46 #if HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX)
47 static void imdct_half_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
48 {
49  int j, k;
50  int n = 1 << s->mdct_bits;
51  int n4 = n >> 2;
52  int n8 = n >> 3;
53  int n32 = n >> 5;
54  const uint16_t *revtabj = s->revtab;
55  const uint16_t *revtabk = s->revtab+n4;
56  const vec_f *tcos = (const vec_f*)(s->tcos+n8);
57  const vec_f *tsin = (const vec_f*)(s->tsin+n8);
58  const vec_f *pin = (const vec_f*)(input+n4);
59  vec_f *pout = (vec_f*)(output+n4);
60 
61  /* pre rotation */
62  k = n32-1;
63  do {
64  vec_f cos,sin,cos0,sin0,cos1,sin1,re,im,r0,i0,r1,i1,a,b,c,d;
65 #define CMULA(p,o0,o1,o2,o3)\
66  a = pin[ k*2+p]; /* { z[k].re, z[k].im, z[k+1].re, z[k+1].im } */\
67  b = pin[-k*2-p-1]; /* { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } */\
68  re = vec_perm(a, b, vcprm(0,2,s0,s2)); /* { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re } */\
69  im = vec_perm(a, b, vcprm(s3,s1,3,1)); /* { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im } */\
70  cos = vec_perm(cos0, cos1, vcprm(o0,o1,s##o2,s##o3)); /* { cos[k], cos[k+1], cos[-k-2], cos[-k-1] } */\
71  sin = vec_perm(sin0, sin1, vcprm(o0,o1,s##o2,s##o3));\
72  r##p = im*cos - re*sin;\
73  i##p = re*cos + im*sin;
74 #define STORE2(v,dst)\
75  j = dst;\
76  vec_ste(v, 0, output+j*2);\
77  vec_ste(v, 4, output+j*2);
78 #define STORE8(p)\
79  a = vec_perm(r##p, i##p, vcprm(0,s0,0,s0));\
80  b = vec_perm(r##p, i##p, vcprm(1,s1,1,s1));\
81  c = vec_perm(r##p, i##p, vcprm(2,s2,2,s2));\
82  d = vec_perm(r##p, i##p, vcprm(3,s3,3,s3));\
83  STORE2(a, revtabk[ p*2-4]);\
84  STORE2(b, revtabk[ p*2-3]);\
85  STORE2(c, revtabj[-p*2+2]);\
86  STORE2(d, revtabj[-p*2+3]);
87 
88  cos0 = tcos[k];
89  sin0 = tsin[k];
90  cos1 = tcos[-k-1];
91  sin1 = tsin[-k-1];
92  CMULA(0, 0,1,2,3);
93  CMULA(1, 2,3,0,1);
94  STORE8(0);
95  STORE8(1);
96  revtabj += 4;
97  revtabk -= 4;
98  k--;
99  } while(k >= 0);
100 
101 #if HAVE_VSX
102  ff_fft_calc_vsx(s, (FFTComplex*)output);
103 #else
105 #endif
106 
107  /* post rotation + reordering */
108  j = -n32;
109  k = n32-1;
110  do {
111  vec_f cos,sin,re,im,a,b,c,d;
112 #define CMULB(d0,d1,o)\
113  re = pout[o*2];\
114  im = pout[o*2+1];\
115  cos = tcos[o];\
116  sin = tsin[o];\
117  d0 = im*sin - re*cos;\
118  d1 = re*sin + im*cos;
119 
120  CMULB(a,b,j);
121  CMULB(c,d,k);
122  pout[2*j] = vec_perm(a, d, vcprm(0,s3,1,s2));
123  pout[2*j+1] = vec_perm(a, d, vcprm(2,s1,3,s0));
124  pout[2*k] = vec_perm(c, b, vcprm(0,s3,1,s2));
125  pout[2*k+1] = vec_perm(c, b, vcprm(2,s1,3,s0));
126  j++;
127  k--;
128  } while(k >= 0);
129 }
130 
131 static void imdct_calc_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
132 {
133  int k;
134  int n = 1 << s->mdct_bits;
135  int n4 = n >> 2;
136  int n16 = n >> 4;
137  vec_u32 sign = {1U<<31,1U<<31,1U<<31,1U<<31};
138  vec_u32 *p0 = (vec_u32*)(output+n4);
139  vec_u32 *p1 = (vec_u32*)(output+n4*3);
140 
141  imdct_half_altivec(s, output + n4, input);
142 
143  for (k = 0; k < n16; k++) {
144  vec_u32 a = p0[k] ^ sign;
145  vec_u32 b = p1[-k-1];
146  p0[-k-1] = vec_perm(a, a, vcprm(3,2,1,0));
147  p1[k] = vec_perm(b, b, vcprm(3,2,1,0));
148  }
149 }
150 #endif /* HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX) */
151 
153 {
154 #if HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX)
156  return;
157 
158 #if HAVE_VSX
159  s->fft_calc = ff_fft_calc_interleave_vsx;
160 #else
161  s->fft_calc = ff_fft_calc_interleave_altivec;
162 #endif
163  if (s->mdct_bits >= 5) {
164  s->imdct_calc = imdct_calc_altivec;
165  s->imdct_half = imdct_half_altivec;
166  }
167 #endif /* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
168 }
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
im
float im
Definition: fft.c:79
b
#define b
Definition: input.c:34
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:101
s3
#define s3
Definition: regdef.h:40
U
#define U(x)
Definition: vp56_arith.h:37
fft_vsx.h
av_cold
#define av_cold
Definition: attributes.h:90
s
#define s(width, name)
Definition: cbs_vp9.c:256
ff_fft_calc_interleave_altivec
void ff_fft_calc_interleave_altivec(FFTContext *s, FFTComplex *z)
s1
#define s1
Definition: regdef.h:38
ff_fft_init_ppc
av_cold void ff_fft_init_ppc(FFTContext *s)
Definition: fft_init.c:152
FFTSample
float FFTSample
Definition: avfft.h:35
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
s2
#define s2
Definition: regdef.h:39
PPC_ALTIVEC
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
vec_u32
#define vec_u32
Definition: util_altivec.h:38
cpu.h
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
attributes.h
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
FFTContext
Definition: fft.h:75
ff_fft_calc_altivec
void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
Do a complex FFT with the parameters defined in ff_fft_init().
fft.h
s0
#define s0
Definition: regdef.h:37
util_altivec.h
d
d
Definition: ffmpeg_filter.c:153
cpu.h
vec_f
#define vec_f
Definition: util_altivec.h:40
FFTComplex
Definition: avfft.h:37
re
float re
Definition: fft.c:79