FFmpeg
aacsbr_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Authors: Djordje Pesut (djordje@mips.com)
30  * Mirjana Vulin (mvulin@mips.com)
31  *
32  * This file is part of FFmpeg.
33  *
34  * FFmpeg is free software; you can redistribute it and/or
35  * modify it under the terms of the GNU Lesser General Public
36  * License as published by the Free Software Foundation; either
37  * version 2.1 of the License, or (at your option) any later version.
38  *
39  * FFmpeg is distributed in the hope that it will be useful,
40  * but WITHOUT ANY WARRANTY; without even the implied warranty of
41  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
42  * Lesser General Public License for more details.
43  *
44  * You should have received a copy of the GNU Lesser General Public
45  * License along with FFmpeg; if not, write to the Free Software
46  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
47  */
48 
49 /**
50  * @file
51  * Reference: libavcodec/aacsbr.c
52  */
53 
54 #include "libavcodec/aacdec.h"
55 #include "libavcodec/aacsbr.h"
56 #include "libavutil/mem_internal.h"
57 #include "libavutil/mips/asmdefs.h"
58 
59 #define ENVELOPE_ADJUSTMENT_OFFSET 2
60 
61 #if HAVE_INLINE_ASM
62 #if HAVE_MIPSFPU
63 static int sbr_lf_gen_mips(SpectralBandReplication *sbr,
64  float X_low[32][40][2], const float W[2][32][32][2],
65  int buf_idx)
66 {
67  int i, k;
68  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
69  float *p_x_low = &X_low[0][8][0];
70  float *p_w = (float*)&W[buf_idx][0][0][0];
71  float *p_x1_low = &X_low[0][0][0];
72  float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
73 
74  float *loop_end=p_x1_low + 2560;
75 
76  /* loop unrolled 8 times */
77  __asm__ volatile (
78  "1: \n\t"
79  "sw $0, 0(%[p_x1_low]) \n\t"
80  "sw $0, 4(%[p_x1_low]) \n\t"
81  "sw $0, 8(%[p_x1_low]) \n\t"
82  "sw $0, 12(%[p_x1_low]) \n\t"
83  "sw $0, 16(%[p_x1_low]) \n\t"
84  "sw $0, 20(%[p_x1_low]) \n\t"
85  "sw $0, 24(%[p_x1_low]) \n\t"
86  "sw $0, 28(%[p_x1_low]) \n\t"
87  PTR_ADDIU "%[p_x1_low],%[p_x1_low], 32 \n\t"
88  "bne %[p_x1_low], %[loop_end], 1b \n\t"
89  PTR_ADDIU "%[p_x1_low],%[p_x1_low], -10240 \n\t"
90 
91  : [p_x1_low]"+r"(p_x1_low)
92  : [loop_end]"r"(loop_end)
93  : "memory"
94  );
95 
96  for (k = 0; k < sbr->kx[1]; k++) {
97  for (i = 0; i < 32; i+=4) {
98  /* loop unrolled 4 times */
99  __asm__ volatile (
100  "lw %[temp0], 0(%[p_w]) \n\t"
101  "lw %[temp1], 4(%[p_w]) \n\t"
102  "lw %[temp2], 256(%[p_w]) \n\t"
103  "lw %[temp3], 260(%[p_w]) \n\t"
104  "lw %[temp4], 512(%[p_w]) \n\t"
105  "lw %[temp5], 516(%[p_w]) \n\t"
106  "lw %[temp6], 768(%[p_w]) \n\t"
107  "lw %[temp7], 772(%[p_w]) \n\t"
108  "sw %[temp0], 0(%[p_x_low]) \n\t"
109  "sw %[temp1], 4(%[p_x_low]) \n\t"
110  "sw %[temp2], 8(%[p_x_low]) \n\t"
111  "sw %[temp3], 12(%[p_x_low]) \n\t"
112  "sw %[temp4], 16(%[p_x_low]) \n\t"
113  "sw %[temp5], 20(%[p_x_low]) \n\t"
114  "sw %[temp6], 24(%[p_x_low]) \n\t"
115  "sw %[temp7], 28(%[p_x_low]) \n\t"
116  PTR_ADDIU "%[p_x_low], %[p_x_low], 32 \n\t"
117  PTR_ADDIU "%[p_w], %[p_w], 1024 \n\t"
118 
119  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
120  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
121  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
122  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
123  [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
124  :
125  : "memory"
126  );
127  }
128  p_x_low += 16;
129  p_w -= 2046;
130  }
131 
132  for (k = 0; k < sbr->kx[0]; k++) {
133  for (i = 0; i < 2; i++) {
134 
135  /* loop unrolled 4 times */
136  __asm__ volatile (
137  "lw %[temp0], 0(%[p_w1]) \n\t"
138  "lw %[temp1], 4(%[p_w1]) \n\t"
139  "lw %[temp2], 256(%[p_w1]) \n\t"
140  "lw %[temp3], 260(%[p_w1]) \n\t"
141  "lw %[temp4], 512(%[p_w1]) \n\t"
142  "lw %[temp5], 516(%[p_w1]) \n\t"
143  "lw %[temp6], 768(%[p_w1]) \n\t"
144  "lw %[temp7], 772(%[p_w1]) \n\t"
145  "sw %[temp0], 0(%[p_x1_low]) \n\t"
146  "sw %[temp1], 4(%[p_x1_low]) \n\t"
147  "sw %[temp2], 8(%[p_x1_low]) \n\t"
148  "sw %[temp3], 12(%[p_x1_low]) \n\t"
149  "sw %[temp4], 16(%[p_x1_low]) \n\t"
150  "sw %[temp5], 20(%[p_x1_low]) \n\t"
151  "sw %[temp6], 24(%[p_x1_low]) \n\t"
152  "sw %[temp7], 28(%[p_x1_low]) \n\t"
153  PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32 \n\t"
154  PTR_ADDIU "%[p_w1], %[p_w1], 1024 \n\t"
155 
156  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
157  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
158  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
159  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
160  [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
161  :
162  : "memory"
163  );
164  }
165  p_x1_low += 64;
166  p_w1 -= 510;
167  }
168  return 0;
169 }
170 
171 static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
172  const float Y0[38][64][2], const float Y1[38][64][2],
173  const float X_low[32][40][2], int ch)
174 {
175  int k, i;
176  const int i_f = 32;
177  int temp0, temp1, temp2, temp3;
178  const float *X_low1, *Y01, *Y11;
179  float *x1=&X[0][0][0];
180  float *j=x1+4864;
181  const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
182 
183  /* loop unrolled 8 times */
184  __asm__ volatile (
185  "1: \n\t"
186  "sw $0, 0(%[x1]) \n\t"
187  "sw $0, 4(%[x1]) \n\t"
188  "sw $0, 8(%[x1]) \n\t"
189  "sw $0, 12(%[x1]) \n\t"
190  "sw $0, 16(%[x1]) \n\t"
191  "sw $0, 20(%[x1]) \n\t"
192  "sw $0, 24(%[x1]) \n\t"
193  "sw $0, 28(%[x1]) \n\t"
194  PTR_ADDIU "%[x1],%[x1], 32 \n\t"
195  "bne %[x1], %[j], 1b \n\t"
196  PTR_ADDIU "%[x1],%[x1], -19456 \n\t"
197 
198  : [x1]"+r"(x1)
199  : [j]"r"(j)
200  : "memory"
201  );
202 
203  if (i_Temp != 0) {
204 
205  X_low1=&X_low[0][2][0];
206 
207  for (k = 0; k < sbr->kx[0]; k++) {
208 
209  __asm__ volatile (
210  "move %[i], $zero \n\t"
211  "2: \n\t"
212  "lw %[temp0], 0(%[X_low1]) \n\t"
213  "lw %[temp1], 4(%[X_low1]) \n\t"
214  "sw %[temp0], 0(%[x1]) \n\t"
215  "sw %[temp1], 9728(%[x1]) \n\t"
216  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
217  PTR_ADDIU "%[X_low1], %[X_low1], 8 \n\t"
218  "addiu %[i], %[i], 1 \n\t"
219  "bne %[i], %[i_Temp], 2b \n\t"
220 
221  : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
222  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
223  : [i_Temp]"r"(i_Temp)
224  : "memory"
225  );
226  x1-=(i_Temp<<6)-1;
227  X_low1-=(i_Temp<<1)-80;
228  }
229 
230  x1=&X[0][0][k];
231  Y01=(float*)&Y0[32][k][0];
232 
233  for (; k < sbr->kx[0] + sbr->m[0]; k++) {
234  __asm__ volatile (
235  "move %[i], $zero \n\t"
236  "3: \n\t"
237  "lw %[temp0], 0(%[Y01]) \n\t"
238  "lw %[temp1], 4(%[Y01]) \n\t"
239  "sw %[temp0], 0(%[x1]) \n\t"
240  "sw %[temp1], 9728(%[x1]) \n\t"
241  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
242  PTR_ADDIU "%[Y01], %[Y01], 512 \n\t"
243  "addiu %[i], %[i], 1 \n\t"
244  "bne %[i], %[i_Temp], 3b \n\t"
245 
246  : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
247  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
248  : [i_Temp]"r"(i_Temp)
249  : "memory"
250  );
251  x1 -=(i_Temp<<6)-1;
252  Y01 -=(i_Temp<<7)-2;
253  }
254  }
255 
256  x1=&X[0][i_Temp][0];
257  X_low1=&X_low[0][i_Temp+2][0];
258  temp3=38;
259 
260  for (k = 0; k < sbr->kx[1]; k++) {
261 
262  __asm__ volatile (
263  "move %[i], %[i_Temp] \n\t"
264  "4: \n\t"
265  "lw %[temp0], 0(%[X_low1]) \n\t"
266  "lw %[temp1], 4(%[X_low1]) \n\t"
267  "sw %[temp0], 0(%[x1]) \n\t"
268  "sw %[temp1], 9728(%[x1]) \n\t"
269  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
270  PTR_ADDIU "%[X_low1],%[X_low1], 8 \n\t"
271  "addiu %[i], %[i], 1 \n\t"
272  "bne %[i], %[temp3], 4b \n\t"
273 
274  : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
275  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
276  [temp2]"=&r"(temp2)
277  : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
278  : "memory"
279  );
280  x1 -= ((38-i_Temp)<<6)-1;
281  X_low1 -= ((38-i_Temp)<<1)- 80;
282  }
283 
284  x1=&X[0][i_Temp][k];
285  Y11=&Y1[i_Temp][k][0];
286  temp2=32;
287 
288  for (; k < sbr->kx[1] + sbr->m[1]; k++) {
289 
290  __asm__ volatile (
291  "move %[i], %[i_Temp] \n\t"
292  "5: \n\t"
293  "lw %[temp0], 0(%[Y11]) \n\t"
294  "lw %[temp1], 4(%[Y11]) \n\t"
295  "sw %[temp0], 0(%[x1]) \n\t"
296  "sw %[temp1], 9728(%[x1]) \n\t"
297  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
298  PTR_ADDIU "%[Y11], %[Y11], 512 \n\t"
299  "addiu %[i], %[i], 1 \n\t"
300  "bne %[i], %[temp2], 5b \n\t"
301 
302  : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
303  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
304  : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
305  [temp2]"r"(temp2)
306  : "memory"
307  );
308 
309  x1 -= ((32-i_Temp)<<6)-1;
310  Y11 -= ((32-i_Temp)<<7)-2;
311  }
312  return 0;
313 }
314 
315 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
316 static void sbr_hf_assemble_mips(float Y1[38][64][2],
317  const float X_high[64][40][2],
318  SpectralBandReplication *sbr, SBRData *ch_data,
319  const int e_a[2])
320 {
321  int e, i, j, m;
322  const int h_SL = 4 * !sbr->bs_smoothing_mode;
323  const int kx = sbr->kx[1];
324  const int m_max = sbr->m[1];
325  static const float h_smooth[5] = {
326  0.33333333333333,
327  0.30150283239582,
328  0.21816949906249,
329  0.11516383427084,
330  0.03183050093751,
331  };
332 
333  float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
334  int indexnoise = ch_data->f_indexnoise;
335  int indexsine = ch_data->f_indexsine;
336  float *g_temp1, *q_temp1, *pok, *pok1;
337  uint32_t temp1, temp2, temp3, temp4;
338  int size = m_max;
339 
340  if (sbr->reset) {
341  for (i = 0; i < h_SL; i++) {
342  memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
343  memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0]));
344  }
345  } else if (h_SL) {
346  memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
347  memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
348  }
349 
350  for (e = 0; e < ch_data->bs_num_env; e++) {
351  for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
352  g_temp1 = g_temp[h_SL + i];
353  pok = sbr->gain[e];
354  q_temp1 = q_temp[h_SL + i];
355  pok1 = sbr->q_m[e];
356 
357  /* loop unrolled 4 times */
358  for (j=0; j<(size>>2); j++) {
359  __asm__ volatile (
360  "lw %[temp1], 0(%[pok]) \n\t"
361  "lw %[temp2], 4(%[pok]) \n\t"
362  "lw %[temp3], 8(%[pok]) \n\t"
363  "lw %[temp4], 12(%[pok]) \n\t"
364  "sw %[temp1], 0(%[g_temp1]) \n\t"
365  "sw %[temp2], 4(%[g_temp1]) \n\t"
366  "sw %[temp3], 8(%[g_temp1]) \n\t"
367  "sw %[temp4], 12(%[g_temp1]) \n\t"
368  "lw %[temp1], 0(%[pok1]) \n\t"
369  "lw %[temp2], 4(%[pok1]) \n\t"
370  "lw %[temp3], 8(%[pok1]) \n\t"
371  "lw %[temp4], 12(%[pok1]) \n\t"
372  "sw %[temp1], 0(%[q_temp1]) \n\t"
373  "sw %[temp2], 4(%[q_temp1]) \n\t"
374  "sw %[temp3], 8(%[q_temp1]) \n\t"
375  "sw %[temp4], 12(%[q_temp1]) \n\t"
376  PTR_ADDIU "%[pok], %[pok], 16 \n\t"
377  PTR_ADDIU "%[g_temp1], %[g_temp1], 16 \n\t"
378  PTR_ADDIU "%[pok1], %[pok1], 16 \n\t"
379  PTR_ADDIU "%[q_temp1], %[q_temp1], 16 \n\t"
380 
381  : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
382  [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
383  [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
384  [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
385  :
386  : "memory"
387  );
388  }
389 
390  for (j=0; j<(size&3); j++) {
391  __asm__ volatile (
392  "lw %[temp1], 0(%[pok]) \n\t"
393  "lw %[temp2], 0(%[pok1]) \n\t"
394  "sw %[temp1], 0(%[g_temp1]) \n\t"
395  "sw %[temp2], 0(%[q_temp1]) \n\t"
396  PTR_ADDIU "%[pok], %[pok], 4 \n\t"
397  PTR_ADDIU "%[g_temp1], %[g_temp1], 4 \n\t"
398  PTR_ADDIU "%[pok1], %[pok1], 4 \n\t"
399  PTR_ADDIU "%[q_temp1], %[q_temp1], 4 \n\t"
400 
401  : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
402  [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
403  [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
404  [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
405  :
406  : "memory"
407  );
408  }
409  }
410  }
411 
412  for (e = 0; e < ch_data->bs_num_env; e++) {
413  for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
414  LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
415  LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
416  float *g_filt, *q_filt;
417 
418  if (h_SL && e != e_a[0] && e != e_a[1]) {
419  g_filt = g_filt_tab;
420  q_filt = q_filt_tab;
421 
422  for (m = 0; m < m_max; m++) {
423  const int idx1 = i + h_SL;
424  g_filt[m] = 0.0f;
425  q_filt[m] = 0.0f;
426 
427  for (j = 0; j <= h_SL; j++) {
428  g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
429  q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
430  }
431  }
432  } else {
433  g_filt = g_temp[i + h_SL];
434  q_filt = q_temp[i];
435  }
436 
437  sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
439 
440  if (e != e_a[0] && e != e_a[1]) {
441  sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
442  q_filt, indexnoise,
443  kx, m_max);
444  } else {
445  int idx = indexsine&1;
446  int A = (1-((indexsine+(kx & 1))&2));
447  int B = (A^(-idx)) + idx;
448  float *out = &Y1[i][kx][idx];
449  float *in = sbr->s_m[e];
450  float temp0, temp1, temp2, temp3, temp4, temp5;
451  float A_f = (float)A;
452  float B_f = (float)B;
453 
454  for (m = 0; m+1 < m_max; m+=2) {
455 
456  temp2 = out[0];
457  temp3 = out[2];
458 
459  __asm__ volatile(
460  "lwc1 %[temp0], 0(%[in]) \n\t"
461  "lwc1 %[temp1], 4(%[in]) \n\t"
462  "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t"
463  "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
464  "swc1 %[temp4], 0(%[out]) \n\t"
465  "swc1 %[temp5], 8(%[out]) \n\t"
466  PTR_ADDIU "%[in], %[in], 8 \n\t"
467  PTR_ADDIU "%[out], %[out], 16 \n\t"
468 
469  : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
470  [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
471  [in]"+r"(in), [out]"+r"(out)
472  : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
473  [temp3]"f"(temp3)
474  : "memory"
475  );
476  }
477  if(m_max&1)
478  out[2*m ] += in[m ] * A;
479  }
480  indexnoise = (indexnoise + m_max) & 0x1ff;
481  indexsine = (indexsine + 1) & 3;
482  }
483  }
484  ch_data->f_indexnoise = indexnoise;
485  ch_data->f_indexsine = indexsine;
486 }
487 
488 static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
489  float (*alpha0)[2], float (*alpha1)[2],
490  const float X_low[32][40][2], int k0)
491 {
492  int k;
493  float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
494  float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
495 
496  c = 1.000001f;
497 
498  for (k = 0; k < k0; k++) {
499  LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
500  float dk;
501  phi1 = &phi[0][0][0];
502  alpha_1 = &alpha1[k][0];
503  alpha_0 = &alpha0[k][0];
504  dsp->autocorrelate(X_low[k], phi);
505 
506  __asm__ volatile (
507  "lwc1 %[temp0], 40(%[phi1]) \n\t"
508  "lwc1 %[temp1], 16(%[phi1]) \n\t"
509  "lwc1 %[temp2], 24(%[phi1]) \n\t"
510  "lwc1 %[temp3], 28(%[phi1]) \n\t"
511  "mul.s %[dk], %[temp0], %[temp1] \n\t"
512  "lwc1 %[temp4], 0(%[phi1]) \n\t"
513  "mul.s %[res2], %[temp2], %[temp2] \n\t"
514  "lwc1 %[temp5], 4(%[phi1]) \n\t"
515  "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t"
516  "lwc1 %[temp6], 8(%[phi1]) \n\t"
517  "div.s %[res2], %[res2], %[c] \n\t"
518  "lwc1 %[temp0], 12(%[phi1]) \n\t"
519  "sub.s %[dk], %[dk], %[res2] \n\t"
520 
521  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
522  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
523  [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
524  : [phi1]"r"(phi1), [c]"f"(c)
525  : "memory"
526  );
527 
528  if (!dk) {
529  alpha_1[0] = 0;
530  alpha_1[1] = 0;
531  } else {
532  __asm__ volatile (
533  "mul.s %[temp_real], %[temp4], %[temp2] \n\t"
534  "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t"
535  "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t"
536  "mul.s %[temp_im], %[temp4], %[temp3] \n\t"
537  "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t"
538  "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t"
539  "div.s %[temp_real], %[temp_real], %[dk] \n\t"
540  "div.s %[temp_im], %[temp_im], %[dk] \n\t"
541  "swc1 %[temp_real], 0(%[alpha_1]) \n\t"
542  "swc1 %[temp_im], 4(%[alpha_1]) \n\t"
543 
544  : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
545  : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
546  [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
547  [temp5]"f"(temp5), [temp6]"f"(temp6),
548  [alpha_1]"r"(alpha_1), [dk]"f"(dk)
549  : "memory"
550  );
551  }
552 
553  if (!phi1[4]) {
554  alpha_0[0] = 0;
555  alpha_0[1] = 0;
556  } else {
557  __asm__ volatile (
558  "lwc1 %[temp6], 0(%[alpha_1]) \n\t"
559  "lwc1 %[temp7], 4(%[alpha_1]) \n\t"
560  "mul.s %[temp_real], %[temp6], %[temp2] \n\t"
561  "add.s %[temp_real], %[temp_real], %[temp4] \n\t"
562  "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t"
563  "mul.s %[temp_im], %[temp7], %[temp2] \n\t"
564  "add.s %[temp_im], %[temp_im], %[temp5] \n\t"
565  "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t"
566  "div.s %[temp_real], %[temp_real], %[temp1] \n\t"
567  "div.s %[temp_im], %[temp_im], %[temp1] \n\t"
568  "neg.s %[temp_real], %[temp_real] \n\t"
569  "neg.s %[temp_im], %[temp_im] \n\t"
570  "swc1 %[temp_real], 0(%[alpha_0]) \n\t"
571  "swc1 %[temp_im], 4(%[alpha_0]) \n\t"
572 
573  : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
574  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
575  [res1]"=&f"(res1), [res2]"=&f"(res2)
576  : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
577  [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
578  [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
579  : "memory"
580  );
581  }
582 
583  __asm__ volatile (
584  "lwc1 %[temp1], 0(%[alpha_1]) \n\t"
585  "lwc1 %[temp2], 4(%[alpha_1]) \n\t"
586  "lwc1 %[temp_real], 0(%[alpha_0]) \n\t"
587  "lwc1 %[temp_im], 4(%[alpha_0]) \n\t"
588  "mul.s %[res1], %[temp1], %[temp1] \n\t"
589  "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t"
590  "mul.s %[res2], %[temp_real], %[temp_real] \n\t"
591  "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t"
592 
593  : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
594  [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
595  [res1]"=&f"(res1), [res2]"=&f"(res2)
596  : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
597  : "memory"
598  );
599 
600  if (res1 >= 16.0f || res2 >= 16.0f) {
601  alpha_1[0] = 0;
602  alpha_1[1] = 0;
603  alpha_0[0] = 0;
604  alpha_0[1] = 0;
605  }
606  }
607 }
608 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
609 #endif /* HAVE_MIPSFPU */
610 #endif /* HAVE_INLINE_ASM */
611 
613 {
614 #if HAVE_INLINE_ASM
615 #if HAVE_MIPSFPU
616  c->sbr_lf_gen = sbr_lf_gen_mips;
617  c->sbr_x_gen = sbr_x_gen_mips;
618 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
619  c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
620  c->sbr_hf_assemble = sbr_hf_assemble_mips;
621 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
622 #endif /* HAVE_MIPSFPU */
623 #endif /* HAVE_INLINE_ASM */
624 }
A
#define A(x)
Definition: vpx_arith.h:28
SpectralBandReplication::data
SBRData data[2]
Definition: sbr.h:167
mem_internal.h
SBRDSPContext
Definition: sbrdsp.h:27
out
FILE * out
Definition: movenc.c:54
aacsbr.h
SpectralBandReplication::m
AAC_SIGNE m[2]
M' and M respectively, M is the number of QMF subbands that use SBR.
Definition: sbr.h:163
SpectralBandReplication::q_m
AAC_FLOAT q_m[7][48]
Amplitude adjusted noise scalefactors.
Definition: sbr.h:207
SBRData::t_env_num_env_old
uint8_t t_env_num_env_old
Envelope time border of the last envelope of the previous frame.
Definition: sbr.h:108
asmdefs.h
SBRData::t_env
uint8_t t_env[8]
Envelope time borders.
Definition: sbr.h:106
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SpectralBandReplication::reset
int reset
Definition: sbr.h:145
float
float
Definition: af_crystalizer.c:121
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:150
B
#define B
Definition: huffyuv.h:42
SBRData::g_temp
AAC_FLOAT g_temp[42][48]
Definition: sbr.h:96
SBRDSPContext::autocorrelate
void(* autocorrelate)(const INTFLOAT x[40][2], AAC_FLOAT phi[3][2][2])
Definition: sbrdsp.h:35
SpectralBandReplication::s_m
AAC_FLOAT s_m[7][48]
Sinusoidal levels.
Definition: sbr.h:209
SBRData::f_indexnoise
unsigned f_indexnoise
Definition: sbr.h:111
AACSBRContext
aacsbr functions pointers
Definition: sbr.h:121
SBRDSPContext::hf_g_filt
void(* hf_g_filt)(INTFLOAT(*Y)[2], const INTFLOAT(*X_high)[40][2], const AAC_FLOAT *g_filt, int m_max, intptr_t ixh)
Definition: sbrdsp.h:39
SBRData::bs_num_env
AAC_SIGNE bs_num_env
Definition: sbr.h:70
SpectralBandReplication::bs_smoothing_mode
unsigned bs_smoothing_mode
Definition: sbr.h:155
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
X
@ X
Definition: vf_addroi.c:27
ff_aacsbr_func_ptr_init_mips
void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
Definition: aacsbr_mips.c:612
f
f
Definition: af_crystalizer.c:121
size
int size
Definition: twinvq_data.h:10344
SpectralBandReplication::gain
AAC_FLOAT gain[7][48]
Definition: sbr.h:210
SpectralBandReplication
Spectral Band Replication.
Definition: sbr.h:140
SBRDSPContext::hf_apply_noise
void(* hf_apply_noise[4])(INTFLOAT(*Y)[2], const AAC_FLOAT *s_m, const AAC_FLOAT *q_filt, int noise, int kx, int m_max)
Definition: sbrdsp.h:41
W
@ W
Definition: vf_addroi.c:27
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
SBRData
Spectral Band Replication per channel data.
Definition: sbr.h:63
ENVELOPE_ADJUSTMENT_OFFSET
#define ENVELOPE_ADJUSTMENT_OFFSET
Definition: aacsbr_mips.c:59
SBRData::f_indexsine
unsigned f_indexsine
Definition: sbr.h:112
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
SBRData::q_temp
AAC_FLOAT q_temp[42][48]
Definition: sbr.h:97
aacdec.h
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
SpectralBandReplication::kx
AAC_SIGNE kx[2]
kx', and kx respectively, kx is the first QMF subband where SBR is used.
Definition: sbr.h:161
SpectralBandReplication::dsp
SBRDSPContext dsp
Definition: sbr.h:216