FFmpeg
float_dsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include <float.h>
20 #include <stdint.h>
21 
22 #include "libavutil/float_dsp.h"
23 #include "libavutil/internal.h"
24 #include "libavutil/mem_internal.h"
25 
26 #include "checkasm.h"
27 
28 #define LEN 256
29 
30 #define randomize_buffer(buf) \
31 do { \
32  int i; \
33  double bmg[2], stddev = 10.0, mean = 0.0; \
34  \
35  for (i = 0; i < LEN; i += 2) { \
36  av_bmg_get(&checkasm_lfg, bmg); \
37  buf[i] = bmg[0] * stddev + mean; \
38  buf[i + 1] = bmg[1] * stddev + mean; \
39  } \
40 } while(0);
41 
42 static void test_vector_fmul(const float *src0, const float *src1)
43 {
44  LOCAL_ALIGNED_32(float, cdst, [LEN]);
45  LOCAL_ALIGNED_32(float, odst, [LEN]);
46  int i;
47 
48  declare_func(void, float *dst, const float *src0, const float *src1,
49  int len);
50 
51  call_ref(cdst, src0, src1, LEN);
52  call_new(odst, src0, src1, LEN);
53  for (i = 0; i < LEN; i++) {
54  double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
55  if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
56  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
57  i, cdst[i], odst[i], cdst[i] - odst[i]);
58  fail();
59  break;
60  }
61  }
62  bench_new(odst, src0, src1, LEN);
63 }
64 
65 static void test_vector_dmul(const double *src0, const double *src1)
66 {
67  LOCAL_ALIGNED_32(double, cdst, [LEN]);
68  LOCAL_ALIGNED_32(double, odst, [LEN]);
69  int i;
70 
71  declare_func(void, double *dst, const double *src0, const double *src1,
72  int len);
73 
74  call_ref(cdst, src0, src1, LEN);
75  call_new(odst, src0, src1, LEN);
76  for (i = 0; i < LEN; i++) {
77  double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
78  if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
79  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
80  i, cdst[i], odst[i], cdst[i] - odst[i]);
81  fail();
82  break;
83  }
84  }
85  bench_new(odst, src0, src1, LEN);
86 }
87 
88 #define ARBITRARY_FMUL_ADD_CONST 0.005
89 static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
90 {
91  LOCAL_ALIGNED_32(float, cdst, [LEN]);
92  LOCAL_ALIGNED_32(float, odst, [LEN]);
93  int i;
94 
95  declare_func(void, float *dst, const float *src0, const float *src1,
96  const float *src2, int len);
97 
98  call_ref(cdst, src0, src1, src2, LEN);
99  call_new(odst, src0, src1, src2, LEN);
100  for (i = 0; i < LEN; i++) {
101  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
102  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
103  i, cdst[i], odst[i], cdst[i] - odst[i]);
104  fail();
105  break;
106  }
107  }
108  bench_new(odst, src0, src1, src2, LEN);
109 }
110 
111 static void test_vector_fmul_scalar(const float *src0, const float *src1)
112 {
113  LOCAL_ALIGNED_16(float, cdst, [LEN]);
114  LOCAL_ALIGNED_16(float, odst, [LEN]);
115  int i;
116 
117  declare_func(void, float *dst, const float *src, float mul, int len);
118 
119  call_ref(cdst, src0, src1[0], LEN);
120  call_new(odst, src0, src1[0], LEN);
121  for (i = 0; i < LEN; i++) {
122  double t = fabs(src0[i]) + fabs(src1[0]) + fabs(src0[i] * src1[0]) + 1.0;
123  if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
124  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
125  i, cdst[i], odst[i], cdst[i] - odst[i]);
126  fail();
127  break;
128  }
129  }
130  bench_new(odst, src0, src1[0], LEN);
131 }
132 
133 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
134 static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
135 {
136  LOCAL_ALIGNED_16(float, cdst, [LEN]);
137  LOCAL_ALIGNED_16(float, odst, [LEN]);
138  int i;
139 
140  declare_func(void, float *dst, const float *src0, const float *src1,
141  const float *win, int len);
142 
143  call_ref(cdst, src0, src1, win, LEN / 2);
144  call_new(odst, src0, src1, win, LEN / 2);
145  for (i = 0; i < LEN; i++) {
146  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
147  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
148  i, cdst[i], odst[i], cdst[i] - odst[i]);
149  fail();
150  break;
151  }
152  }
153  bench_new(odst, src0, src1, win, LEN / 2);
154 }
155 
156 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
157 static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
158 {
159  LOCAL_ALIGNED_32(float, cdst, [LEN]);
160  LOCAL_ALIGNED_32(float, odst, [LEN]);
161  int i;
162 
163  declare_func(void, float *dst, const float *src, float mul, int len);
164 
165  memcpy(cdst, src2, LEN * sizeof(*src2));
166  memcpy(odst, src2, LEN * sizeof(*src2));
167 
168  call_ref(cdst, src0, src1[0], LEN);
169  call_new(odst, src0, src1[0], LEN);
170  for (i = 0; i < LEN; i++) {
171  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
172  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
173  i, cdst[i], odst[i], cdst[i] - odst[i]);
174  fail();
175  break;
176  }
177  }
178  memcpy(odst, src2, LEN * sizeof(*src2));
179  bench_new(odst, src0, src1[0], LEN);
180 }
181 
182 static void test_vector_dmul_scalar(const double *src0, const double *src1)
183 {
184  LOCAL_ALIGNED_32(double, cdst, [LEN]);
185  LOCAL_ALIGNED_32(double, odst, [LEN]);
186  int i;
187 
188  declare_func(void, double *dst, const double *src, double mul, int len);
189 
190  call_ref(cdst, src0, src1[0], LEN);
191  call_new(odst, src0, src1[0], LEN);
192  for (i = 0; i < LEN; i++) {
193  double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
194  if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
195  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
196  cdst[i], odst[i], cdst[i] - odst[i]);
197  fail();
198  break;
199  }
200  }
201  bench_new(odst, src0, src1[0], LEN);
202 }
203 
204 #define ARBITRARY_DMAC_SCALAR_CONST 0.005
205 static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
206 {
207  LOCAL_ALIGNED_32(double, cdst, [LEN]);
208  LOCAL_ALIGNED_32(double, odst, [LEN]);
209  int i;
210 
211  declare_func(void, double *dst, const double *src, double mul, int len);
212 
213  memcpy(cdst, src2, LEN * sizeof(*src2));
214  memcpy(odst, src2, LEN * sizeof(*src2));
215  call_ref(cdst, src0, src1[0], LEN);
216  call_new(odst, src0, src1[0], LEN);
217  for (i = 0; i < LEN; i++) {
218  if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
219  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
220  i, cdst[i], odst[i], cdst[i] - odst[i]);
221  fail();
222  break;
223  }
224  }
225  memcpy(odst, src2, LEN * sizeof(*src2));
226  bench_new(odst, src0, src1[0], LEN);
227 }
228 
229 static void test_butterflies_float(const float *src0, const float *src1)
230 {
231  LOCAL_ALIGNED_16(float, cdst, [LEN]);
232  LOCAL_ALIGNED_16(float, odst, [LEN]);
233  LOCAL_ALIGNED_16(float, cdst1, [LEN]);
234  LOCAL_ALIGNED_16(float, odst1, [LEN]);
235  int i;
236 
237  declare_func(void, float *restrict src0, float *restrict src1,
238  int len);
239 
240  memcpy(cdst, src0, LEN * sizeof(*src0));
241  memcpy(cdst1, src1, LEN * sizeof(*src1));
242  memcpy(odst, src0, LEN * sizeof(*src0));
243  memcpy(odst1, src1, LEN * sizeof(*src1));
244 
245  call_ref(cdst, cdst1, LEN);
246  call_new(odst, odst1, LEN);
247  for (i = 0; i < LEN; i++) {
248  if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON) ||
249  !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
250  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
251  i, cdst[i], odst[i], cdst[i] - odst[i]);
252  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
253  i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
254  fail();
255  break;
256  }
257  }
258  memcpy(odst, src0, LEN * sizeof(*src0));
259  memcpy(odst1, src1, LEN * sizeof(*src1));
260  bench_new(odst, odst1, LEN);
261 }
262 
263 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
264 static void test_scalarproduct_float(const float *src0, const float *src1)
265 {
266  float cprod, oprod;
267 
268  declare_func_float(float, const float *src0, const float *src1, int len);
269 
270  cprod = call_ref(src0, src1, LEN);
271  oprod = call_new(src0, src1, LEN);
273  fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
274  cprod, oprod, cprod - oprod);
275  fail();
276  }
277  bench_new(src0, src1, LEN);
278 }
279 
281 {
282  LOCAL_ALIGNED_32(float, src0, [LEN]);
283  LOCAL_ALIGNED_32(float, src1, [LEN]);
284  LOCAL_ALIGNED_32(float, src2, [LEN]);
285  LOCAL_ALIGNED_16(float, src3, [LEN]);
286  LOCAL_ALIGNED_16(float, src4, [LEN]);
287  LOCAL_ALIGNED_16(float, src5, [LEN]);
288  LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
289  LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
290  LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
292 
293  if (!fdsp) {
294  fprintf(stderr, "floatdsp: Out of memory error\n");
295  return;
296  }
297 
301  randomize_buffer(src3);
302  randomize_buffer(src4);
303  randomize_buffer(src5);
304  randomize_buffer(dbl_src0);
305  randomize_buffer(dbl_src1);
306  randomize_buffer(dbl_src2);
307 
308  if (check_func(fdsp->vector_fmul, "vector_fmul"))
310  if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
312  if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
313  test_vector_fmul_scalar(src3, src4);
314  if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
316  if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
317  test_vector_fmul_window(src3, src4, src5);
318  report("vector_fmul");
319  if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
321  report("vector_fmac");
322  if (check_func(fdsp->vector_dmul, "vector_dmul"))
323  test_vector_dmul(dbl_src0, dbl_src1);
324  if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
325  test_vector_dmul_scalar(dbl_src0, dbl_src1);
326  report("vector_dmul");
327  if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
328  test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
329  report("vector_dmac");
330  if (check_func(fdsp->butterflies_float, "butterflies_float"))
331  test_butterflies_float(src3, src4);
332  report("butterflies_float");
333  if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
334  test_scalarproduct_float(src3, src4);
335  report("scalarproduct_float");
336 
337  av_freep(&fdsp);
338 }
AVFloatDSPContext::butterflies_float
void(* butterflies_float)(float *restrict v1, float *restrict v2, int len)
Calculate the sum and difference of two vectors of floats.
Definition: float_dsp.h:162
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:421
AVFloatDSPContext::vector_fmul_reverse
void(* vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats, and store the result in a vector of floats...
Definition: float_dsp.h:152
randomize_buffer
#define randomize_buffer(buf)
Definition: float_dsp.c:30
float_near_abs_eps
int float_near_abs_eps(float a, float b, float eps)
Definition: checkasm.c:388
check_func
#define check_func(func,...)
Definition: checkasm.h:170
float.h
declare_func_float
#define declare_func_float(ret,...)
Definition: checkasm.h:175
test_vector_dmac_scalar
static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
Definition: float_dsp.c:205
AVFloatDSPContext::vector_dmul
void(* vector_dmul)(double *dst, const double *src0, const double *src1, int len)
Calculate the entry wise product of two vectors of doubles and store the result in a vector of double...
Definition: float_dsp.h:188
call_ref
#define call_ref(...)
Definition: checkasm.h:185
win
static float win(SuperEqualizerContext *s, float n, int N)
Definition: af_superequalizer.c:119
ARBITRARY_FMUL_ADD_CONST
#define ARBITRARY_FMUL_ADD_CONST
Definition: float_dsp.c:88
double_near_abs_eps
int double_near_abs_eps(double a, double b, double eps)
Definition: checkasm.c:428
fail
#define fail()
Definition: checkasm.h:179
checkasm.h
checkasm_check_float_dsp
void checkasm_check_float_dsp(void)
Definition: float_dsp.c:280
AVFloatDSPContext::scalarproduct_float
float(* scalarproduct_float)(const float *v1, const float *v2, int len)
Calculate the scalar product of two vectors of floats.
Definition: float_dsp.h:173
test_butterflies_float
static void test_butterflies_float(const float *src0, const float *src1)
Definition: float_dsp.c:229
test_vector_fmul_scalar
static void test_vector_fmul_scalar(const float *src0, const float *src1)
Definition: float_dsp.c:111
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:150
call_new
#define call_new(...)
Definition: checkasm.h:288
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
test_vector_fmul
static void test_vector_fmul(const float *src0, const float *src1)
Definition: float_dsp.c:42
AVFloatDSPContext::vector_fmul_scalar
void(* vector_fmul_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float.
Definition: float_dsp.h:83
ARBITRARY_FMAC_SCALAR_CONST
#define ARBITRARY_FMAC_SCALAR_CONST
Definition: float_dsp.c:156
float_dsp.h
AVFloatDSPContext::vector_fmul
void(* vector_fmul)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats and store the result in a vector of floats.
Definition: float_dsp.h:36
ARBITRARY_FMUL_WINDOW_CONST
#define ARBITRARY_FMUL_WINDOW_CONST
Definition: float_dsp.c:133
test_vector_dmul_scalar
static void test_vector_dmul_scalar(const double *src0, const double *src1)
Definition: float_dsp.c:182
AVFloatDSPContext
Definition: float_dsp.h:22
test_vector_fmul_add
static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:89
LEN
#define LEN
Definition: float_dsp.c:28
ARBITRARY_SCALARPRODUCT_CONST
#define ARBITRARY_SCALARPRODUCT_CONST
Definition: float_dsp.c:263
report
#define report
Definition: checkasm.h:182
bench_new
#define bench_new(...)
Definition: checkasm.h:358
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
AVFloatDSPContext::vector_fmac_scalar
void(* vector_fmac_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float and add to destination vector.
Definition: float_dsp.h:52
internal.h
src2
const pixel * src2
Definition: h264pred_template.c:422
AVFloatDSPContext::vector_fmul_add
void(* vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len)
Calculate the entry wise product of two vectors of floats, add a third vector of floats and store the...
Definition: float_dsp.h:135
len
int len
Definition: vorbis_enc_data.h:426
ARBITRARY_DMAC_SCALAR_CONST
#define ARBITRARY_DMAC_SCALAR_CONST
Definition: float_dsp.c:204
test_vector_fmul_window
static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
Definition: float_dsp.c:134
AVFloatDSPContext::vector_dmul_scalar
void(* vector_dmul_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of double by a scalar double.
Definition: float_dsp.h:98
test_scalarproduct_float
static void test_scalarproduct_float(const float *src0, const float *src1)
Definition: float_dsp.c:264
src0
const pixel *const src0
Definition: h264pred_template.c:420
AVFloatDSPContext::vector_fmul_window
void(* vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len)
Overlap/add with window function.
Definition: float_dsp.h:117
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:174
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
avpriv_float_dsp_alloc
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
AVFloatDSPContext::vector_dmac_scalar
void(* vector_dmac_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of doubles by a scalar double and add to destination vector.
Definition: float_dsp.h:68
test_vector_dmul
static void test_vector_dmul(const double *src0, const double *src1)
Definition: float_dsp.c:65
test_vector_fmac_scalar
static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:157