FFmpeg
sbcdsp.c
Go to the documentation of this file.
1 /*
2  * Bluetooth low-complexity, subband codec (SBC)
3  *
4  * Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org>
5  * Copyright (C) 2012-2013 Intel Corporation
6  * Copyright (C) 2008-2010 Nokia Corporation
7  * Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
8  * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
9  * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
10  *
11  * This file is part of FFmpeg.
12  *
13  * FFmpeg is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * FFmpeg is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with FFmpeg; if not, write to the Free Software
25  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26  */
27 
28 /**
29  * @file
30  * SBC basic "building bricks"
31  */
32 
33 #include <stdint.h>
34 #include <limits.h>
35 #include <string.h>
36 #include "libavutil/common.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/intreadwrite.h"
39 #include "sbc.h"
40 #include "sbcdsp.h"
41 #include "sbcdsp_data.h"
42 
43 /*
44  * A reference C code of analysis filter with SIMD-friendly tables
45  * reordering and code layout. This code can be used to develop platform
46  * specific SIMD optimizations. Also it may be used as some kind of test
47  * for compiler autovectorization capabilities (who knows, if the compiler
48  * is very good at this stuff, hand optimized assembly may be not strictly
49  * needed for some platform).
50  *
51  * Note: It is also possible to make a simple variant of analysis filter,
52  * which needs only a single constants table without taking care about
53  * even/odd cases. This simple variant of filter can be implemented without
54  * input data permutation. The only thing that would be lost is the
55  * possibility to use pairwise SIMD multiplications. But for some simple
56  * CPU cores without SIMD extensions it can be useful. If anybody is
57  * interested in implementing such variant of a filter, sourcecode from
58  * bluez versions 4.26/4.27 can be used as a reference and the history of
59  * the changes in git repository done around that time may be worth checking.
60  */
61 
62 static av_always_inline void sbc_analyze_simd(const int16_t *in, int32_t *out,
63  const int16_t *consts,
64  unsigned subbands)
65 {
66  int32_t t1[8];
67  int16_t t2[8];
68  int i, j, hop = 0;
69 
70  /* rounding coefficient */
71  for (i = 0; i < subbands; i++)
72  t1[i] = 1 << (SBC_PROTO_FIXED_SCALE - 1);
73 
74  /* low pass polyphase filter */
75  for (hop = 0; hop < 10*subbands; hop += 2*subbands)
76  for (i = 0; i < 2*subbands; i++)
77  t1[i >> 1] += in[hop + i] * consts[hop + i];
78 
79  /* scaling */
80  for (i = 0; i < subbands; i++)
81  t2[i] = t1[i] >> SBC_PROTO_FIXED_SCALE;
82 
83  memset(t1, 0, sizeof(t1));
84 
85  /* do the cos transform */
86  for (i = 0; i < subbands/2; i++)
87  for (j = 0; j < 2*subbands; j++)
88  t1[j>>1] += t2[i * 2 + (j&1)] * consts[10*subbands + i*2*subbands + j];
89 
90  for (i = 0; i < subbands; i++)
92 }
93 
94 static void sbc_analyze_4_simd(const int16_t *in, int32_t *out,
95  const int16_t *consts)
96 {
97  sbc_analyze_simd(in, out, consts, 4);
98 }
99 
100 static void sbc_analyze_8_simd(const int16_t *in, int32_t *out,
101  const int16_t *consts)
102 {
103  sbc_analyze_simd(in, out, consts, 8);
104 }
105 
106 static inline void sbc_analyze_4b_4s_simd(SBCDSPContext *s,
107  int16_t *x, int32_t *out, int out_stride)
108 {
109  /* Analyze blocks */
110  s->sbc_analyze_4(x + 12, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
111  out += out_stride;
112  s->sbc_analyze_4(x + 8, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
113  out += out_stride;
114  s->sbc_analyze_4(x + 4, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
115  out += out_stride;
116  s->sbc_analyze_4(x + 0, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
117 }
118 
119 static inline void sbc_analyze_4b_8s_simd(SBCDSPContext *s,
120  int16_t *x, int32_t *out, int out_stride)
121 {
122  /* Analyze blocks */
123  s->sbc_analyze_8(x + 24, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
124  out += out_stride;
125  s->sbc_analyze_8(x + 16, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
126  out += out_stride;
127  s->sbc_analyze_8(x + 8, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
128  out += out_stride;
129  s->sbc_analyze_8(x + 0, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
130 }
131 
132 static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
133  int16_t *x, int32_t *out,
134  int out_stride);
135 
136 static inline void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s,
137  int16_t *x, int32_t *out,
138  int out_stride)
139 {
140  s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
141  s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_even;
142 }
143 
144 static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
145  int16_t *x, int32_t *out,
146  int out_stride)
147 {
149  s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
150 }
151 
152 /*
153  * Input data processing functions. The data is endian converted if needed,
154  * channels are deintrleaved and audio samples are reordered for use in
155  * SIMD-friendly analysis filter function. The results are put into "X"
156  * array, getting appended to the previous data (or it is better to say
157  * prepended, as the buffer is filled from top to bottom). Old data is
158  * discarded when neededed, but availability of (10 * nrof_subbands)
159  * contiguous samples is always guaranteed for the input to the analysis
160  * filter. This is achieved by copying a sufficient part of old data
161  * to the top of the buffer on buffer wraparound.
162  */
163 
164 static int sbc_enc_process_input_4s(int position, const uint8_t *pcm,
165  int16_t X[2][SBC_X_BUFFER_SIZE],
166  int nsamples, int nchannels)
167 {
168  int c;
169 
170  /* handle X buffer wraparound */
171  if (position < nsamples) {
172  for (c = 0; c < nchannels; c++)
173  memcpy(&X[c][SBC_X_BUFFER_SIZE - 40], &X[c][position],
174  36 * sizeof(int16_t));
175  position = SBC_X_BUFFER_SIZE - 40;
176  }
177 
178  /* copy/permutate audio samples */
179  for (; nsamples >= 8; nsamples -= 8, pcm += 16 * nchannels) {
180  position -= 8;
181  for (c = 0; c < nchannels; c++) {
182  int16_t *x = &X[c][position];
183  x[0] = AV_RN16(pcm + 14*nchannels + 2*c);
184  x[1] = AV_RN16(pcm + 6*nchannels + 2*c);
185  x[2] = AV_RN16(pcm + 12*nchannels + 2*c);
186  x[3] = AV_RN16(pcm + 8*nchannels + 2*c);
187  x[4] = AV_RN16(pcm + 0*nchannels + 2*c);
188  x[5] = AV_RN16(pcm + 4*nchannels + 2*c);
189  x[6] = AV_RN16(pcm + 2*nchannels + 2*c);
190  x[7] = AV_RN16(pcm + 10*nchannels + 2*c);
191  }
192  }
193 
194  return position;
195 }
196 
197 static int sbc_enc_process_input_8s(int position, const uint8_t *pcm,
198  int16_t X[2][SBC_X_BUFFER_SIZE],
199  int nsamples, int nchannels)
200 {
201  int c;
202 
203  /* handle X buffer wraparound */
204  if (position < nsamples) {
205  for (c = 0; c < nchannels; c++)
206  memcpy(&X[c][SBC_X_BUFFER_SIZE - 72], &X[c][position],
207  72 * sizeof(int16_t));
208  position = SBC_X_BUFFER_SIZE - 72;
209  }
210 
211  if (position % 16 == 8) {
212  position -= 8;
213  nsamples -= 8;
214  for (c = 0; c < nchannels; c++) {
215  int16_t *x = &X[c][position];
216  x[0] = AV_RN16(pcm + 14*nchannels + 2*c);
217  x[2] = AV_RN16(pcm + 12*nchannels + 2*c);
218  x[3] = AV_RN16(pcm + 0*nchannels + 2*c);
219  x[4] = AV_RN16(pcm + 10*nchannels + 2*c);
220  x[5] = AV_RN16(pcm + 2*nchannels + 2*c);
221  x[6] = AV_RN16(pcm + 8*nchannels + 2*c);
222  x[7] = AV_RN16(pcm + 4*nchannels + 2*c);
223  x[8] = AV_RN16(pcm + 6*nchannels + 2*c);
224  }
225  pcm += 16 * nchannels;
226  }
227 
228  /* copy/permutate audio samples */
229  for (; nsamples >= 16; nsamples -= 16, pcm += 32 * nchannels) {
230  position -= 16;
231  for (c = 0; c < nchannels; c++) {
232  int16_t *x = &X[c][position];
233  x[0] = AV_RN16(pcm + 30*nchannels + 2*c);
234  x[1] = AV_RN16(pcm + 14*nchannels + 2*c);
235  x[2] = AV_RN16(pcm + 28*nchannels + 2*c);
236  x[3] = AV_RN16(pcm + 16*nchannels + 2*c);
237  x[4] = AV_RN16(pcm + 26*nchannels + 2*c);
238  x[5] = AV_RN16(pcm + 18*nchannels + 2*c);
239  x[6] = AV_RN16(pcm + 24*nchannels + 2*c);
240  x[7] = AV_RN16(pcm + 20*nchannels + 2*c);
241  x[8] = AV_RN16(pcm + 22*nchannels + 2*c);
242  x[9] = AV_RN16(pcm + 6*nchannels + 2*c);
243  x[10] = AV_RN16(pcm + 12*nchannels + 2*c);
244  x[11] = AV_RN16(pcm + 0*nchannels + 2*c);
245  x[12] = AV_RN16(pcm + 10*nchannels + 2*c);
246  x[13] = AV_RN16(pcm + 2*nchannels + 2*c);
247  x[14] = AV_RN16(pcm + 8*nchannels + 2*c);
248  x[15] = AV_RN16(pcm + 4*nchannels + 2*c);
249  }
250  }
251 
252  if (nsamples == 8) {
253  position -= 8;
254  for (c = 0; c < nchannels; c++) {
255  int16_t *x = &X[c][position];
256  x[-7] = AV_RN16(pcm + 14*nchannels + 2*c);
257  x[1] = AV_RN16(pcm + 6*nchannels + 2*c);
258  x[2] = AV_RN16(pcm + 12*nchannels + 2*c);
259  x[3] = AV_RN16(pcm + 0*nchannels + 2*c);
260  x[4] = AV_RN16(pcm + 10*nchannels + 2*c);
261  x[5] = AV_RN16(pcm + 2*nchannels + 2*c);
262  x[6] = AV_RN16(pcm + 8*nchannels + 2*c);
263  x[7] = AV_RN16(pcm + 4*nchannels + 2*c);
264  }
265  }
266 
267  return position;
268 }
269 
270 static void sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
271  uint32_t scale_factor[2][8],
272  int blocks, int channels, int subbands)
273 {
274  int ch, sb, blk;
275  for (ch = 0; ch < channels; ch++) {
276  for (sb = 0; sb < subbands; sb++) {
277  uint32_t x = 1 << SCALE_OUT_BITS;
278  for (blk = 0; blk < blocks; blk++) {
279  int32_t tmp = FFABS(sb_sample_f[blk][ch][sb]);
280  if (tmp != 0)
281  x |= tmp - 1;
282  }
283  scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
284  }
285  }
286 }
287 
288 static int sbc_calc_scalefactors_j(int32_t sb_sample_f[16][2][8],
289  uint32_t scale_factor[2][8],
290  int blocks, int subbands)
291 {
292  int blk, joint = 0;
293  int32_t tmp0, tmp1;
294  uint32_t x, y;
295 
296  /* last subband does not use joint stereo */
297  int sb = subbands - 1;
298  x = 1 << SCALE_OUT_BITS;
299  y = 1 << SCALE_OUT_BITS;
300  for (blk = 0; blk < blocks; blk++) {
301  tmp0 = FFABS(sb_sample_f[blk][0][sb]);
302  tmp1 = FFABS(sb_sample_f[blk][1][sb]);
303  if (tmp0 != 0)
304  x |= tmp0 - 1;
305  if (tmp1 != 0)
306  y |= tmp1 - 1;
307  }
308  scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
309  scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - ff_clz(y);
310 
311  /* the rest of subbands can use joint stereo */
312  while (--sb >= 0) {
313  int32_t sb_sample_j[16][2];
314  x = 1 << SCALE_OUT_BITS;
315  y = 1 << SCALE_OUT_BITS;
316  for (blk = 0; blk < blocks; blk++) {
317  tmp0 = sb_sample_f[blk][0][sb];
318  tmp1 = sb_sample_f[blk][1][sb];
319  sb_sample_j[blk][0] = (tmp0 >> 1) + (tmp1 >> 1);
320  sb_sample_j[blk][1] = (tmp0 >> 1) - (tmp1 >> 1);
321  tmp0 = FFABS(tmp0);
322  tmp1 = FFABS(tmp1);
323  if (tmp0 != 0)
324  x |= tmp0 - 1;
325  if (tmp1 != 0)
326  y |= tmp1 - 1;
327  }
328  scale_factor[0][sb] = (31 - SCALE_OUT_BITS) -
329  ff_clz(x);
330  scale_factor[1][sb] = (31 - SCALE_OUT_BITS) -
331  ff_clz(y);
332  x = 1 << SCALE_OUT_BITS;
333  y = 1 << SCALE_OUT_BITS;
334  for (blk = 0; blk < blocks; blk++) {
335  tmp0 = FFABS(sb_sample_j[blk][0]);
336  tmp1 = FFABS(sb_sample_j[blk][1]);
337  if (tmp0 != 0)
338  x |= tmp0 - 1;
339  if (tmp1 != 0)
340  y |= tmp1 - 1;
341  }
342  x = (31 - SCALE_OUT_BITS) - ff_clz(x);
343  y = (31 - SCALE_OUT_BITS) - ff_clz(y);
344 
345  /* decide whether to use joint stereo for this subband */
346  if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) {
347  joint |= 1 << (subbands - 1 - sb);
348  scale_factor[0][sb] = x;
349  scale_factor[1][sb] = y;
350  for (blk = 0; blk < blocks; blk++) {
351  sb_sample_f[blk][0][sb] = sb_sample_j[blk][0];
352  sb_sample_f[blk][1][sb] = sb_sample_j[blk][1];
353  }
354  }
355  }
356 
357  /* bitmask with the information about subbands using joint stereo */
358  return joint;
359 }
360 
361 /*
362  * Detect CPU features and setup function pointers
363  */
364 av_cold void ff_sbcdsp_init(SBCDSPContext *s)
365 {
366  /* Default implementation for analyze functions */
367  s->sbc_analyze_4 = sbc_analyze_4_simd;
368  s->sbc_analyze_8 = sbc_analyze_8_simd;
369  s->sbc_analyze_4s = sbc_analyze_4b_4s_simd;
370  if (s->increment == 1)
371  s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
372  else
373  s->sbc_analyze_8s = sbc_analyze_4b_8s_simd;
374 
375  /* Default implementation for input reordering / deinterleaving */
376  s->sbc_enc_process_input_4s = sbc_enc_process_input_4s;
377  s->sbc_enc_process_input_8s = sbc_enc_process_input_8s;
378 
379  /* Default implementation for scale factors calculation */
380  s->sbc_calc_scalefactors = sbc_calc_scalefactors;
381  s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
382 
383 #if ARCH_ARM
385 #elif ARCH_X86
387 #endif
388 }
out
FILE * out
Definition: movenc.c:54
sbc_enc_process_input_8s
static int sbc_enc_process_input_8s(int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], int nsamples, int nchannels)
Definition: sbcdsp.c:197
ff_sbcdsp_analysis_consts_fixed8_simd_even
const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_even[80+64]
Definition: sbcdsp_data.c:139
ff_clz
#define ff_clz
Definition: intmath.h:143
AV_RN16
#define AV_RN16(p)
Definition: intreadwrite.h:358
ff_sbcdsp_analysis_consts_fixed4_simd_odd
const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_odd[40+16]
Definition: sbcdsp_data.c:94
sbc_analyze_8_simd
static void sbc_analyze_8_simd(const int16_t *in, int32_t *out, const int16_t *consts)
Definition: sbcdsp.c:100
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
sbc_analyze_4b_4s_simd
static void sbc_analyze_4b_4s_simd(SBCDSPContext *s, int16_t *x, int32_t *out, int out_stride)
Definition: sbcdsp.c:106
t1
#define t1
Definition: regdef.h:29
subbands
subbands
Definition: aptx.h:37
sbc_analyze_1b_8s_simd_odd
static void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s, int16_t *x, int32_t *out, int out_stride)
Definition: sbcdsp.c:136
ff_sbcdsp_init_x86
void ff_sbcdsp_init_x86(SBCDSPContext *s)
Definition: sbcdsp_init.c:43
SBC_PROTO_FIXED_SCALE
#define SBC_PROTO_FIXED_SCALE
Definition: sbcdsp_data.h:37
sbcdsp.h
sbc_calc_scalefactors_j
static int sbc_calc_scalefactors_j(int32_t sb_sample_f[16][2][8], uint32_t scale_factor[2][8], int blocks, int subbands)
Definition: sbcdsp.c:288
sbc_analyze_4b_8s_simd
static void sbc_analyze_4b_8s_simd(SBCDSPContext *s, int16_t *x, int32_t *out, int out_stride)
Definition: sbcdsp.c:119
ff_sbcdsp_analysis_consts_fixed4_simd_even
const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_even[40+16]
Definition: sbcdsp_data.c:49
av_cold
#define av_cold
Definition: attributes.h:90
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:198
ff_sbcdsp_init
av_cold void ff_sbcdsp_init(SBCDSPContext *s)
Definition: sbcdsp.c:364
channels
channels
Definition: aptx.h:31
limits.h
blk
#define blk(i)
Definition: sha.c:186
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
sbcdsp_data.h
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
sbc_enc_process_input_4s
static int sbc_enc_process_input_4s(int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], int nsamples, int nchannels)
Definition: sbcdsp.c:164
X
@ X
Definition: vf_addroi.c:27
sbc_calc_scalefactors
static void sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8], uint32_t scale_factor[2][8], int blocks, int channels, int subbands)
Definition: sbcdsp.c:270
sbc.h
SBC_X_BUFFER_SIZE
#define SBC_X_BUFFER_SIZE
Definition: sbcdsp.h:41
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
sbc_analyze_4_simd
static void sbc_analyze_4_simd(const int16_t *in, int32_t *out, const int16_t *consts)
Definition: sbcdsp.c:94
ff_sbcdsp_init_arm
av_cold void ff_sbcdsp_init_arm(SBCDSPContext *s)
Definition: sbcdsp_init_arm.c:88
common.h
av_always_inline
#define av_always_inline
Definition: attributes.h:49
sbc_analyze_simd
static av_always_inline void sbc_analyze_simd(const int16_t *in, int32_t *out, const int16_t *consts, unsigned subbands)
Definition: sbcdsp.c:62
t2
#define t2
Definition: regdef.h:30
ff_sbcdsp_analysis_consts_fixed8_simd_odd
const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_odd[80+64]
Definition: sbcdsp_data.c:236
SCALE_OUT_BITS
#define SCALE_OUT_BITS
Definition: sbcdsp.h:40
SBC_COS_TABLE_FIXED_SCALE
#define SBC_COS_TABLE_FIXED_SCALE
Definition: sbcdsp_data.h:38
sbc_analyze_1b_8s_simd_even
static void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s, int16_t *x, int32_t *out, int out_stride)
Definition: sbcdsp.c:144
int32_t
int32_t
Definition: audioconvert.c:56
intmath.h