FFmpeg
float_dsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include "config.h"
20 
21 #include <float.h>
22 #include <stdint.h>
23 
24 #include "libavutil/float_dsp.h"
25 #include "libavutil/internal.h"
26 #include "libavutil/mem_internal.h"
27 
28 #include "checkasm.h"
29 
30 #define LEN 256
31 
32 #define randomize_buffer(buf) \
33 do { \
34  int i; \
35  double bmg[2], stddev = 10.0, mean = 0.0; \
36  \
37  for (i = 0; i < LEN; i += 2) { \
38  av_bmg_get(&checkasm_lfg, bmg); \
39  buf[i] = bmg[0] * stddev + mean; \
40  buf[i + 1] = bmg[1] * stddev + mean; \
41  } \
42 } while(0);
43 
44 static void test_vector_fmul(const float *src0, const float *src1)
45 {
46  LOCAL_ALIGNED_32(float, cdst, [LEN]);
47  LOCAL_ALIGNED_32(float, odst, [LEN]);
48  int i;
49 
50  declare_func(void, float *dst, const float *src0, const float *src1,
51  int len);
52 
53  call_ref(cdst, src0, src1, LEN);
54  call_new(odst, src0, src1, LEN);
55  for (i = 0; i < LEN; i++) {
56  double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
57  if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
58  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
59  i, cdst[i], odst[i], cdst[i] - odst[i]);
60  fail();
61  break;
62  }
63  }
64  bench_new(odst, src0, src1, LEN);
65 }
66 
67 static void test_vector_dmul(const double *src0, const double *src1)
68 {
69  LOCAL_ALIGNED_32(double, cdst, [LEN]);
70  LOCAL_ALIGNED_32(double, odst, [LEN]);
71  int i;
72 
73  declare_func(void, double *dst, const double *src0, const double *src1,
74  int len);
75 
76  call_ref(cdst, src0, src1, LEN);
77  call_new(odst, src0, src1, LEN);
78  for (i = 0; i < LEN; i++) {
79  double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
80  if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
81  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
82  i, cdst[i], odst[i], cdst[i] - odst[i]);
83  fail();
84  break;
85  }
86  }
87  bench_new(odst, src0, src1, LEN);
88 }
89 
90 #define ARBITRARY_FMUL_ADD_CONST 0.005
91 static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
92 {
93  LOCAL_ALIGNED_32(float, cdst, [LEN]);
94  LOCAL_ALIGNED_32(float, odst, [LEN]);
95  int i;
96 
97  declare_func(void, float *dst, const float *src0, const float *src1,
98  const float *src2, int len);
99 
100  call_ref(cdst, src0, src1, src2, LEN);
101  call_new(odst, src0, src1, src2, LEN);
102  for (i = 0; i < LEN; i++) {
103  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
104  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
105  i, cdst[i], odst[i], cdst[i] - odst[i]);
106  fail();
107  break;
108  }
109  }
110  bench_new(odst, src0, src1, src2, LEN);
111 }
112 
113 static void test_vector_fmul_scalar(const float *src0, const float *src1)
114 {
115  LOCAL_ALIGNED_16(float, cdst, [LEN]);
116  LOCAL_ALIGNED_16(float, odst, [LEN]);
117  int i;
118 
119  declare_func(void, float *dst, const float *src, float mul, int len);
120 
121  call_ref(cdst, src0, src1[0], LEN);
122  call_new(odst, src0, src1[0], LEN);
123  for (i = 0; i < LEN; i++) {
124  double t = fabs(src0[i]) + fabs(src1[0]) + fabs(src0[i] * src1[0]) + 1.0;
125  if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
126  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
127  i, cdst[i], odst[i], cdst[i] - odst[i]);
128  fail();
129  break;
130  }
131  }
132  bench_new(odst, src0, src1[0], LEN);
133 }
134 
135 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
136 static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
137 {
138  LOCAL_ALIGNED_16(float, cdst, [LEN]);
139  LOCAL_ALIGNED_16(float, odst, [LEN]);
140  int i;
141 
142  declare_func(void, float *dst, const float *src0, const float *src1,
143  const float *win, int len);
144 
145  call_ref(cdst, src0, src1, win, LEN / 2);
146  call_new(odst, src0, src1, win, LEN / 2);
147  for (i = 0; i < LEN; i++) {
148  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
149  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
150  i, cdst[i], odst[i], cdst[i] - odst[i]);
151  fail();
152  break;
153  }
154  }
155  bench_new(odst, src0, src1, win, LEN / 2);
156 }
157 
158 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
159 static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
160 {
161  LOCAL_ALIGNED_32(float, cdst, [LEN]);
162  LOCAL_ALIGNED_32(float, odst, [LEN]);
163  int i;
164 
165  declare_func(void, float *dst, const float *src, float mul, int len);
166 
167  memcpy(cdst, src2, LEN * sizeof(*src2));
168  memcpy(odst, src2, LEN * sizeof(*src2));
169 
170  call_ref(cdst, src0, src1[0], LEN);
171  call_new(odst, src0, src1[0], LEN);
172  for (i = 0; i < LEN; i++) {
173  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
174  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
175  i, cdst[i], odst[i], cdst[i] - odst[i]);
176  fail();
177  break;
178  }
179  }
180  memcpy(odst, src2, LEN * sizeof(*src2));
181  bench_new(odst, src0, src1[0], LEN);
182 }
183 
184 static void test_vector_dmul_scalar(const double *src0, const double *src1)
185 {
186  LOCAL_ALIGNED_32(double, cdst, [LEN]);
187  LOCAL_ALIGNED_32(double, odst, [LEN]);
188  int i;
189 
190  declare_func(void, double *dst, const double *src, double mul, int len);
191 
192  call_ref(cdst, src0, src1[0], LEN);
193  call_new(odst, src0, src1[0], LEN);
194  for (i = 0; i < LEN; i++) {
195  double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
196  if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
197  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
198  cdst[i], odst[i], cdst[i] - odst[i]);
199  fail();
200  break;
201  }
202  }
203  bench_new(odst, src0, src1[0], LEN);
204 }
205 
206 #define ARBITRARY_DMAC_SCALAR_CONST 0.005
207 static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
208 {
209  LOCAL_ALIGNED_32(double, cdst, [LEN]);
210  LOCAL_ALIGNED_32(double, odst, [LEN]);
211  int i;
212 
213  declare_func(void, double *dst, const double *src, double mul, int len);
214 
215  memcpy(cdst, src2, LEN * sizeof(*src2));
216  memcpy(odst, src2, LEN * sizeof(*src2));
217  call_ref(cdst, src0, src1[0], LEN);
218  call_new(odst, src0, src1[0], LEN);
219  for (i = 0; i < LEN; i++) {
220  if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
221  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
222  i, cdst[i], odst[i], cdst[i] - odst[i]);
223  fail();
224  break;
225  }
226  }
227  memcpy(odst, src2, LEN * sizeof(*src2));
228  bench_new(odst, src0, src1[0], LEN);
229 }
230 
231 static void test_butterflies_float(const float *src0, const float *src1)
232 {
233  LOCAL_ALIGNED_16(float, cdst, [LEN]);
234  LOCAL_ALIGNED_16(float, odst, [LEN]);
235  LOCAL_ALIGNED_16(float, cdst1, [LEN]);
236  LOCAL_ALIGNED_16(float, odst1, [LEN]);
237  int i;
238 
239  declare_func(void, float *av_restrict src0, float *av_restrict src1,
240  int len);
241 
242  memcpy(cdst, src0, LEN * sizeof(*src0));
243  memcpy(cdst1, src1, LEN * sizeof(*src1));
244  memcpy(odst, src0, LEN * sizeof(*src0));
245  memcpy(odst1, src1, LEN * sizeof(*src1));
246 
247  call_ref(cdst, cdst1, LEN);
248  call_new(odst, odst1, LEN);
249  for (i = 0; i < LEN; i++) {
250  if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON) ||
251  !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
252  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
253  i, cdst[i], odst[i], cdst[i] - odst[i]);
254  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
255  i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
256  fail();
257  break;
258  }
259  }
260  memcpy(odst, src0, LEN * sizeof(*src0));
261  memcpy(odst1, src1, LEN * sizeof(*src1));
262  bench_new(odst, odst1, LEN);
263 }
264 
265 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
266 static void test_scalarproduct_float(const float *src0, const float *src1)
267 {
268  float cprod, oprod;
269 
270  declare_func_float(float, const float *src0, const float *src1, int len);
271 
272  cprod = call_ref(src0, src1, LEN);
273  oprod = call_new(src0, src1, LEN);
275  fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
276  cprod, oprod, cprod - oprod);
277  fail();
278  }
279  bench_new(src0, src1, LEN);
280 }
281 
283 {
284  LOCAL_ALIGNED_32(float, src0, [LEN]);
285  LOCAL_ALIGNED_32(float, src1, [LEN]);
286  LOCAL_ALIGNED_32(float, src2, [LEN]);
287  LOCAL_ALIGNED_16(float, src3, [LEN]);
288  LOCAL_ALIGNED_16(float, src4, [LEN]);
289  LOCAL_ALIGNED_16(float, src5, [LEN]);
290  LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
291  LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
292  LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
294 
295  if (!fdsp) {
296  fprintf(stderr, "floatdsp: Out of memory error\n");
297  return;
298  }
299 
303  randomize_buffer(src3);
304  randomize_buffer(src4);
305  randomize_buffer(src5);
306  randomize_buffer(dbl_src0);
307  randomize_buffer(dbl_src1);
308  randomize_buffer(dbl_src2);
309 
310  if (check_func(fdsp->vector_fmul, "vector_fmul"))
312  if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
314  if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
315  test_vector_fmul_scalar(src3, src4);
316  if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
318  if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
319  test_vector_fmul_window(src3, src4, src5);
320  report("vector_fmul");
321  if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
323  report("vector_fmac");
324  if (check_func(fdsp->vector_dmul, "vector_dmul"))
325  test_vector_dmul(dbl_src0, dbl_src1);
326  if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
327  test_vector_dmul_scalar(dbl_src0, dbl_src1);
328  report("vector_dmul");
329  if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
330  test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
331  report("vector_dmac");
332  if (check_func(fdsp->butterflies_float, "butterflies_float"))
333  test_butterflies_float(src3, src4);
334  report("butterflies_float");
335  if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
336  test_scalarproduct_float(src3, src4);
337  report("scalarproduct_float");
338 
339  av_freep(&fdsp);
340 }
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:421
AVFloatDSPContext::vector_fmul_reverse
void(* vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats, and store the result in a vector of floats...
Definition: float_dsp.h:154
randomize_buffer
#define randomize_buffer(buf)
Definition: float_dsp.c:32
float_near_abs_eps
int float_near_abs_eps(float a, float b, float eps)
Definition: checkasm.c:352
check_func
#define check_func(func,...)
Definition: checkasm.h:125
float.h
declare_func_float
#define declare_func_float(ret,...)
Definition: checkasm.h:130
test_vector_dmac_scalar
static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
Definition: float_dsp.c:207
AVFloatDSPContext::vector_dmul
void(* vector_dmul)(double *dst, const double *src0, const double *src1, int len)
Calculate the entry wise product of two vectors of doubles and store the result in a vector of double...
Definition: float_dsp.h:190
call_ref
#define call_ref(...)
Definition: checkasm.h:140
AVFloatDSPContext::butterflies_float
void(* butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len)
Calculate the sum and difference of two vectors of floats.
Definition: float_dsp.h:164
win
static float win(SuperEqualizerContext *s, float n, int N)
Definition: af_superequalizer.c:119
ARBITRARY_FMUL_ADD_CONST
#define ARBITRARY_FMUL_ADD_CONST
Definition: float_dsp.c:90
double_near_abs_eps
int double_near_abs_eps(double a, double b, double eps)
Definition: checkasm.c:392
fail
#define fail()
Definition: checkasm.h:134
checkasm.h
checkasm_check_float_dsp
void checkasm_check_float_dsp(void)
Definition: float_dsp.c:282
AVFloatDSPContext::scalarproduct_float
float(* scalarproduct_float)(const float *v1, const float *v2, int len)
Calculate the scalar product of two vectors of floats.
Definition: float_dsp.h:175
test_butterflies_float
static void test_butterflies_float(const float *src0, const float *src1)
Definition: float_dsp.c:231
test_vector_fmul_scalar
static void test_vector_fmul_scalar(const float *src0, const float *src1)
Definition: float_dsp.c:113
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:129
mul
static float mul(float src0, float src1)
Definition: dnn_backend_native_layer_mathbinary.c:39
call_new
#define call_new(...)
Definition: checkasm.h:222
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:135
test_vector_fmul
static void test_vector_fmul(const float *src0, const float *src1)
Definition: float_dsp.c:44
AVFloatDSPContext::vector_fmul_scalar
void(* vector_fmul_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float.
Definition: float_dsp.h:85
ARBITRARY_FMAC_SCALAR_CONST
#define ARBITRARY_FMAC_SCALAR_CONST
Definition: float_dsp.c:158
float_dsp.h
AVFloatDSPContext::vector_fmul
void(* vector_fmul)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats and store the result in a vector of floats.
Definition: float_dsp.h:38
ARBITRARY_FMUL_WINDOW_CONST
#define ARBITRARY_FMUL_WINDOW_CONST
Definition: float_dsp.c:135
test_vector_dmul_scalar
static void test_vector_dmul_scalar(const double *src0, const double *src1)
Definition: float_dsp.c:184
AVFloatDSPContext
Definition: float_dsp.h:24
test_vector_fmul_add
static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:91
LEN
#define LEN
Definition: float_dsp.c:30
ARBITRARY_SCALARPRODUCT_CONST
#define ARBITRARY_SCALARPRODUCT_CONST
Definition: float_dsp.c:265
report
#define report
Definition: checkasm.h:137
bench_new
#define bench_new(...)
Definition: checkasm.h:287
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
AVFloatDSPContext::vector_fmac_scalar
void(* vector_fmac_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float and add to destination vector.
Definition: float_dsp.h:54
internal.h
src2
const pixel * src2
Definition: h264pred_template.c:422
AVFloatDSPContext::vector_fmul_add
void(* vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len)
Calculate the entry wise product of two vectors of floats, add a third vector of floats and store the...
Definition: float_dsp.h:137
len
int len
Definition: vorbis_enc_data.h:426
ARBITRARY_DMAC_SCALAR_CONST
#define ARBITRARY_DMAC_SCALAR_CONST
Definition: float_dsp.c:206
test_vector_fmul_window
static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
Definition: float_dsp.c:136
AVFloatDSPContext::vector_dmul_scalar
void(* vector_dmul_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of double by a scalar double.
Definition: float_dsp.h:100
test_scalarproduct_float
static void test_scalarproduct_float(const float *src0, const float *src1)
Definition: float_dsp.c:266
src0
const pixel *const src0
Definition: h264pred_template.c:420
AVFloatDSPContext::vector_fmul_window
void(* vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len)
Overlap/add with window function.
Definition: float_dsp.h:119
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:129
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
avpriv_float_dsp_alloc
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
AVFloatDSPContext::vector_dmac_scalar
void(* vector_dmac_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of doubles by a scalar double and add to destination vector.
Definition: float_dsp.h:70
test_vector_dmul
static void test_vector_dmul(const double *src0, const double *src1)
Definition: float_dsp.c:67
test_vector_fmac_scalar
static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:159