FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aacsbr_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Authors: Djordje Pesut (djordje@mips.com)
30  * Mirjana Vulin (mvulin@mips.com)
31  *
32  * This file is part of FFmpeg.
33  *
34  * FFmpeg is free software; you can redistribute it and/or
35  * modify it under the terms of the GNU Lesser General Public
36  * License as published by the Free Software Foundation; either
37  * version 2.1 of the License, or (at your option) any later version.
38  *
39  * FFmpeg is distributed in the hope that it will be useful,
40  * but WITHOUT ANY WARRANTY; without even the implied warranty of
41  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
42  * Lesser General Public License for more details.
43  *
44  * You should have received a copy of the GNU Lesser General Public
45  * License along with FFmpeg; if not, write to the Free Software
46  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
47  */
48 
49 /**
50  * @file
51  * Reference: libavcodec/aacsbr.c
52  */
53 
54 #include "libavcodec/aac.h"
55 #include "libavcodec/aacsbr.h"
56 #include "libavutil/mips/asmdefs.h"
57 
58 #define ENVELOPE_ADJUSTMENT_OFFSET 2
59 
60 #if HAVE_INLINE_ASM
61 static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
62  float X_low[32][40][2], const float W[2][32][32][2],
63  int buf_idx)
64 {
65  int i, k;
66  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
67  float *p_x_low = &X_low[0][8][0];
68  float *p_w = (float*)&W[buf_idx][0][0][0];
69  float *p_x1_low = &X_low[0][0][0];
70  float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
71 
72  float *loop_end=p_x1_low + 2560;
73 
74  /* loop unrolled 8 times */
75  __asm__ volatile (
76  "1: \n\t"
77  "sw $0, 0(%[p_x1_low]) \n\t"
78  "sw $0, 4(%[p_x1_low]) \n\t"
79  "sw $0, 8(%[p_x1_low]) \n\t"
80  "sw $0, 12(%[p_x1_low]) \n\t"
81  "sw $0, 16(%[p_x1_low]) \n\t"
82  "sw $0, 20(%[p_x1_low]) \n\t"
83  "sw $0, 24(%[p_x1_low]) \n\t"
84  "sw $0, 28(%[p_x1_low]) \n\t"
85  PTR_ADDIU "%[p_x1_low],%[p_x1_low], 32 \n\t"
86  "bne %[p_x1_low], %[loop_end], 1b \n\t"
87  PTR_ADDIU "%[p_x1_low],%[p_x1_low], -10240 \n\t"
88 
89  : [p_x1_low]"+r"(p_x1_low)
90  : [loop_end]"r"(loop_end)
91  : "memory"
92  );
93 
94  for (k = 0; k < sbr->kx[1]; k++) {
95  for (i = 0; i < 32; i+=4) {
96  /* loop unrolled 4 times */
97  __asm__ volatile (
98  "lw %[temp0], 0(%[p_w]) \n\t"
99  "lw %[temp1], 4(%[p_w]) \n\t"
100  "lw %[temp2], 256(%[p_w]) \n\t"
101  "lw %[temp3], 260(%[p_w]) \n\t"
102  "lw %[temp4], 512(%[p_w]) \n\t"
103  "lw %[temp5], 516(%[p_w]) \n\t"
104  "lw %[temp6], 768(%[p_w]) \n\t"
105  "lw %[temp7], 772(%[p_w]) \n\t"
106  "sw %[temp0], 0(%[p_x_low]) \n\t"
107  "sw %[temp1], 4(%[p_x_low]) \n\t"
108  "sw %[temp2], 8(%[p_x_low]) \n\t"
109  "sw %[temp3], 12(%[p_x_low]) \n\t"
110  "sw %[temp4], 16(%[p_x_low]) \n\t"
111  "sw %[temp5], 20(%[p_x_low]) \n\t"
112  "sw %[temp6], 24(%[p_x_low]) \n\t"
113  "sw %[temp7], 28(%[p_x_low]) \n\t"
114  PTR_ADDIU "%[p_x_low], %[p_x_low], 32 \n\t"
115  PTR_ADDIU "%[p_w], %[p_w], 1024 \n\t"
116 
117  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
118  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
119  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
120  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
121  [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
122  :
123  : "memory"
124  );
125  }
126  p_x_low += 16;
127  p_w -= 2046;
128  }
129 
130  for (k = 0; k < sbr->kx[0]; k++) {
131  for (i = 0; i < 2; i++) {
132 
133  /* loop unrolled 4 times */
134  __asm__ volatile (
135  "lw %[temp0], 0(%[p_w1]) \n\t"
136  "lw %[temp1], 4(%[p_w1]) \n\t"
137  "lw %[temp2], 256(%[p_w1]) \n\t"
138  "lw %[temp3], 260(%[p_w1]) \n\t"
139  "lw %[temp4], 512(%[p_w1]) \n\t"
140  "lw %[temp5], 516(%[p_w1]) \n\t"
141  "lw %[temp6], 768(%[p_w1]) \n\t"
142  "lw %[temp7], 772(%[p_w1]) \n\t"
143  "sw %[temp0], 0(%[p_x1_low]) \n\t"
144  "sw %[temp1], 4(%[p_x1_low]) \n\t"
145  "sw %[temp2], 8(%[p_x1_low]) \n\t"
146  "sw %[temp3], 12(%[p_x1_low]) \n\t"
147  "sw %[temp4], 16(%[p_x1_low]) \n\t"
148  "sw %[temp5], 20(%[p_x1_low]) \n\t"
149  "sw %[temp6], 24(%[p_x1_low]) \n\t"
150  "sw %[temp7], 28(%[p_x1_low]) \n\t"
151  PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32 \n\t"
152  PTR_ADDIU "%[p_w1], %[p_w1], 1024 \n\t"
153 
154  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
155  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
156  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
157  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
158  [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
159  :
160  : "memory"
161  );
162  }
163  p_x1_low += 64;
164  p_w1 -= 510;
165  }
166  return 0;
167 }
168 
169 static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
170  const float Y0[38][64][2], const float Y1[38][64][2],
171  const float X_low[32][40][2], int ch)
172 {
173  int k, i;
174  const int i_f = 32;
175  int temp0, temp1, temp2, temp3;
176  const float *X_low1, *Y01, *Y11;
177  float *x1=&X[0][0][0];
178  float *j=x1+4864;
179  const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
180 
181  /* loop unrolled 8 times */
182  __asm__ volatile (
183  "1: \n\t"
184  "sw $0, 0(%[x1]) \n\t"
185  "sw $0, 4(%[x1]) \n\t"
186  "sw $0, 8(%[x1]) \n\t"
187  "sw $0, 12(%[x1]) \n\t"
188  "sw $0, 16(%[x1]) \n\t"
189  "sw $0, 20(%[x1]) \n\t"
190  "sw $0, 24(%[x1]) \n\t"
191  "sw $0, 28(%[x1]) \n\t"
192  PTR_ADDIU "%[x1],%[x1], 32 \n\t"
193  "bne %[x1], %[j], 1b \n\t"
194  PTR_ADDIU "%[x1],%[x1], -19456 \n\t"
195 
196  : [x1]"+r"(x1)
197  : [j]"r"(j)
198  : "memory"
199  );
200 
201  if (i_Temp != 0) {
202 
203  X_low1=&X_low[0][2][0];
204 
205  for (k = 0; k < sbr->kx[0]; k++) {
206 
207  __asm__ volatile (
208  "move %[i], $zero \n\t"
209  "2: \n\t"
210  "lw %[temp0], 0(%[X_low1]) \n\t"
211  "lw %[temp1], 4(%[X_low1]) \n\t"
212  "sw %[temp0], 0(%[x1]) \n\t"
213  "sw %[temp1], 9728(%[x1]) \n\t"
214  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
215  PTR_ADDIU "%[X_low1], %[X_low1], 8 \n\t"
216  "addiu %[i], %[i], 1 \n\t"
217  "bne %[i], %[i_Temp], 2b \n\t"
218 
219  : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
220  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
221  : [i_Temp]"r"(i_Temp)
222  : "memory"
223  );
224  x1-=(i_Temp<<6)-1;
225  X_low1-=(i_Temp<<1)-80;
226  }
227 
228  x1=&X[0][0][k];
229  Y01=(float*)&Y0[32][k][0];
230 
231  for (; k < sbr->kx[0] + sbr->m[0]; k++) {
232  __asm__ volatile (
233  "move %[i], $zero \n\t"
234  "3: \n\t"
235  "lw %[temp0], 0(%[Y01]) \n\t"
236  "lw %[temp1], 4(%[Y01]) \n\t"
237  "sw %[temp0], 0(%[x1]) \n\t"
238  "sw %[temp1], 9728(%[x1]) \n\t"
239  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
240  PTR_ADDIU "%[Y01], %[Y01], 512 \n\t"
241  "addiu %[i], %[i], 1 \n\t"
242  "bne %[i], %[i_Temp], 3b \n\t"
243 
244  : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
245  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
246  : [i_Temp]"r"(i_Temp)
247  : "memory"
248  );
249  x1 -=(i_Temp<<6)-1;
250  Y01 -=(i_Temp<<7)-2;
251  }
252  }
253 
254  x1=&X[0][i_Temp][0];
255  X_low1=&X_low[0][i_Temp+2][0];
256  temp3=38;
257 
258  for (k = 0; k < sbr->kx[1]; k++) {
259 
260  __asm__ volatile (
261  "move %[i], %[i_Temp] \n\t"
262  "4: \n\t"
263  "lw %[temp0], 0(%[X_low1]) \n\t"
264  "lw %[temp1], 4(%[X_low1]) \n\t"
265  "sw %[temp0], 0(%[x1]) \n\t"
266  "sw %[temp1], 9728(%[x1]) \n\t"
267  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
268  PTR_ADDIU "%[X_low1],%[X_low1], 8 \n\t"
269  "addiu %[i], %[i], 1 \n\t"
270  "bne %[i], %[temp3], 4b \n\t"
271 
272  : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
273  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
274  [temp2]"=&r"(temp2)
275  : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
276  : "memory"
277  );
278  x1 -= ((38-i_Temp)<<6)-1;
279  X_low1 -= ((38-i_Temp)<<1)- 80;
280  }
281 
282  x1=&X[0][i_Temp][k];
283  Y11=&Y1[i_Temp][k][0];
284  temp2=32;
285 
286  for (; k < sbr->kx[1] + sbr->m[1]; k++) {
287 
288  __asm__ volatile (
289  "move %[i], %[i_Temp] \n\t"
290  "5: \n\t"
291  "lw %[temp0], 0(%[Y11]) \n\t"
292  "lw %[temp1], 4(%[Y11]) \n\t"
293  "sw %[temp0], 0(%[x1]) \n\t"
294  "sw %[temp1], 9728(%[x1]) \n\t"
295  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
296  PTR_ADDIU "%[Y11], %[Y11], 512 \n\t"
297  "addiu %[i], %[i], 1 \n\t"
298  "bne %[i], %[temp2], 5b \n\t"
299 
300  : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
301  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
302  : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
303  [temp2]"r"(temp2)
304  : "memory"
305  );
306 
307  x1 -= ((32-i_Temp)<<6)-1;
308  Y11 -= ((32-i_Temp)<<7)-2;
309  }
310  return 0;
311 }
312 
313 #if HAVE_MIPSFPU
314 static void sbr_hf_assemble_mips(float Y1[38][64][2],
315  const float X_high[64][40][2],
316  SpectralBandReplication *sbr, SBRData *ch_data,
317  const int e_a[2])
318 {
319  int e, i, j, m;
320  const int h_SL = 4 * !sbr->bs_smoothing_mode;
321  const int kx = sbr->kx[1];
322  const int m_max = sbr->m[1];
323  static const float h_smooth[5] = {
324  0.33333333333333,
325  0.30150283239582,
326  0.21816949906249,
327  0.11516383427084,
328  0.03183050093751,
329  };
330 
331  float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
332  int indexnoise = ch_data->f_indexnoise;
333  int indexsine = ch_data->f_indexsine;
334  float *g_temp1, *q_temp1, *pok, *pok1;
335  float temp1, temp2, temp3, temp4;
336  int size = m_max;
337 
338  if (sbr->reset) {
339  for (i = 0; i < h_SL; i++) {
340  memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
341  memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0]));
342  }
343  } else if (h_SL) {
344  memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
345  memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
346  }
347 
348  for (e = 0; e < ch_data->bs_num_env; e++) {
349  for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
350  g_temp1 = g_temp[h_SL + i];
351  pok = sbr->gain[e];
352  q_temp1 = q_temp[h_SL + i];
353  pok1 = sbr->q_m[e];
354 
355  /* loop unrolled 4 times */
356  for (j=0; j<(size>>2); j++) {
357  __asm__ volatile (
358  "lw %[temp1], 0(%[pok]) \n\t"
359  "lw %[temp2], 4(%[pok]) \n\t"
360  "lw %[temp3], 8(%[pok]) \n\t"
361  "lw %[temp4], 12(%[pok]) \n\t"
362  "sw %[temp1], 0(%[g_temp1]) \n\t"
363  "sw %[temp2], 4(%[g_temp1]) \n\t"
364  "sw %[temp3], 8(%[g_temp1]) \n\t"
365  "sw %[temp4], 12(%[g_temp1]) \n\t"
366  "lw %[temp1], 0(%[pok1]) \n\t"
367  "lw %[temp2], 4(%[pok1]) \n\t"
368  "lw %[temp3], 8(%[pok1]) \n\t"
369  "lw %[temp4], 12(%[pok1]) \n\t"
370  "sw %[temp1], 0(%[q_temp1]) \n\t"
371  "sw %[temp2], 4(%[q_temp1]) \n\t"
372  "sw %[temp3], 8(%[q_temp1]) \n\t"
373  "sw %[temp4], 12(%[q_temp1]) \n\t"
374  PTR_ADDIU "%[pok], %[pok], 16 \n\t"
375  PTR_ADDIU "%[g_temp1], %[g_temp1], 16 \n\t"
376  PTR_ADDIU "%[pok1], %[pok1], 16 \n\t"
377  PTR_ADDIU "%[q_temp1], %[q_temp1], 16 \n\t"
378 
379  : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
380  [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
381  [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
382  [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
383  :
384  : "memory"
385  );
386  }
387 
388  for (j=0; j<(size&3); j++) {
389  __asm__ volatile (
390  "lw %[temp1], 0(%[pok]) \n\t"
391  "lw %[temp2], 0(%[pok1]) \n\t"
392  "sw %[temp1], 0(%[g_temp1]) \n\t"
393  "sw %[temp2], 0(%[q_temp1]) \n\t"
394  PTR_ADDIU "%[pok], %[pok], 4 \n\t"
395  PTR_ADDIU "%[g_temp1], %[g_temp1], 4 \n\t"
396  PTR_ADDIU "%[pok1], %[pok1], 4 \n\t"
397  PTR_ADDIU "%[q_temp1], %[q_temp1], 4 \n\t"
398 
399  : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
400  [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
401  [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
402  [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
403  :
404  : "memory"
405  );
406  }
407  }
408  }
409 
410  for (e = 0; e < ch_data->bs_num_env; e++) {
411  for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
412  LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
413  LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
414  float *g_filt, *q_filt;
415 
416  if (h_SL && e != e_a[0] && e != e_a[1]) {
417  g_filt = g_filt_tab;
418  q_filt = q_filt_tab;
419 
420  for (m = 0; m < m_max; m++) {
421  const int idx1 = i + h_SL;
422  g_filt[m] = 0.0f;
423  q_filt[m] = 0.0f;
424 
425  for (j = 0; j <= h_SL; j++) {
426  g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
427  q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
428  }
429  }
430  } else {
431  g_filt = g_temp[i + h_SL];
432  q_filt = q_temp[i];
433  }
434 
435  sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
437 
438  if (e != e_a[0] && e != e_a[1]) {
439  sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
440  q_filt, indexnoise,
441  kx, m_max);
442  } else {
443  int idx = indexsine&1;
444  int A = (1-((indexsine+(kx & 1))&2));
445  int B = (A^(-idx)) + idx;
446  float *out = &Y1[i][kx][idx];
447  float *in = sbr->s_m[e];
448  float temp0, temp1, temp2, temp3, temp4, temp5;
449  float A_f = (float)A;
450  float B_f = (float)B;
451 
452  for (m = 0; m+1 < m_max; m+=2) {
453 
454  temp2 = out[0];
455  temp3 = out[2];
456 
457  __asm__ volatile(
458  "lwc1 %[temp0], 0(%[in]) \n\t"
459  "lwc1 %[temp1], 4(%[in]) \n\t"
460  "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t"
461  "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
462  "swc1 %[temp4], 0(%[out]) \n\t"
463  "swc1 %[temp5], 8(%[out]) \n\t"
464  PTR_ADDIU "%[in], %[in], 8 \n\t"
465  PTR_ADDIU "%[out], %[out], 16 \n\t"
466 
467  : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
468  [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
469  [in]"+r"(in), [out]"+r"(out)
470  : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
471  [temp3]"f"(temp3)
472  : "memory"
473  );
474  }
475  if(m_max&1)
476  out[2*m ] += in[m ] * A;
477  }
478  indexnoise = (indexnoise + m_max) & 0x1ff;
479  indexsine = (indexsine + 1) & 3;
480  }
481  }
482  ch_data->f_indexnoise = indexnoise;
483  ch_data->f_indexsine = indexsine;
484 }
485 
486 static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
487  float (*alpha0)[2], float (*alpha1)[2],
488  const float X_low[32][40][2], int k0)
489 {
490  int k;
491  float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
492  float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
493 
494  c = 1.000001f;
495 
496  for (k = 0; k < k0; k++) {
497  LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
498  float dk;
499  phi1 = &phi[0][0][0];
500  alpha_1 = &alpha1[k][0];
501  alpha_0 = &alpha0[k][0];
502  dsp->autocorrelate(X_low[k], phi);
503 
504  __asm__ volatile (
505  "lwc1 %[temp0], 40(%[phi1]) \n\t"
506  "lwc1 %[temp1], 16(%[phi1]) \n\t"
507  "lwc1 %[temp2], 24(%[phi1]) \n\t"
508  "lwc1 %[temp3], 28(%[phi1]) \n\t"
509  "mul.s %[dk], %[temp0], %[temp1] \n\t"
510  "lwc1 %[temp4], 0(%[phi1]) \n\t"
511  "mul.s %[res2], %[temp2], %[temp2] \n\t"
512  "lwc1 %[temp5], 4(%[phi1]) \n\t"
513  "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t"
514  "lwc1 %[temp6], 8(%[phi1]) \n\t"
515  "div.s %[res2], %[res2], %[c] \n\t"
516  "lwc1 %[temp0], 12(%[phi1]) \n\t"
517  "sub.s %[dk], %[dk], %[res2] \n\t"
518 
519  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
520  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
521  [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
522  : [phi1]"r"(phi1), [c]"f"(c)
523  : "memory"
524  );
525 
526  if (!dk) {
527  alpha_1[0] = 0;
528  alpha_1[1] = 0;
529  } else {
530  __asm__ volatile (
531  "mul.s %[temp_real], %[temp4], %[temp2] \n\t"
532  "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t"
533  "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t"
534  "mul.s %[temp_im], %[temp4], %[temp3] \n\t"
535  "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t"
536  "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t"
537  "div.s %[temp_real], %[temp_real], %[dk] \n\t"
538  "div.s %[temp_im], %[temp_im], %[dk] \n\t"
539  "swc1 %[temp_real], 0(%[alpha_1]) \n\t"
540  "swc1 %[temp_im], 4(%[alpha_1]) \n\t"
541 
542  : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
543  : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
544  [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
545  [temp5]"f"(temp5), [temp6]"f"(temp6),
546  [alpha_1]"r"(alpha_1), [dk]"f"(dk)
547  : "memory"
548  );
549  }
550 
551  if (!phi1[4]) {
552  alpha_0[0] = 0;
553  alpha_0[1] = 0;
554  } else {
555  __asm__ volatile (
556  "lwc1 %[temp6], 0(%[alpha_1]) \n\t"
557  "lwc1 %[temp7], 4(%[alpha_1]) \n\t"
558  "mul.s %[temp_real], %[temp6], %[temp2] \n\t"
559  "add.s %[temp_real], %[temp_real], %[temp4] \n\t"
560  "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t"
561  "mul.s %[temp_im], %[temp7], %[temp2] \n\t"
562  "add.s %[temp_im], %[temp_im], %[temp5] \n\t"
563  "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t"
564  "div.s %[temp_real], %[temp_real], %[temp1] \n\t"
565  "div.s %[temp_im], %[temp_im], %[temp1] \n\t"
566  "neg.s %[temp_real], %[temp_real] \n\t"
567  "neg.s %[temp_im], %[temp_im] \n\t"
568  "swc1 %[temp_real], 0(%[alpha_0]) \n\t"
569  "swc1 %[temp_im], 4(%[alpha_0]) \n\t"
570 
571  : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
572  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
573  [res1]"=&f"(res1), [res2]"=&f"(res2)
574  : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
575  [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
576  [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
577  : "memory"
578  );
579  }
580 
581  __asm__ volatile (
582  "lwc1 %[temp1], 0(%[alpha_1]) \n\t"
583  "lwc1 %[temp2], 4(%[alpha_1]) \n\t"
584  "lwc1 %[temp_real], 0(%[alpha_0]) \n\t"
585  "lwc1 %[temp_im], 4(%[alpha_0]) \n\t"
586  "mul.s %[res1], %[temp1], %[temp1] \n\t"
587  "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t"
588  "mul.s %[res2], %[temp_real], %[temp_real] \n\t"
589  "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t"
590 
591  : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
592  [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
593  [res1]"=&f"(res1), [res2]"=&f"(res2)
594  : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
595  : "memory"
596  );
597 
598  if (res1 >= 16.0f || res2 >= 16.0f) {
599  alpha_1[0] = 0;
600  alpha_1[1] = 0;
601  alpha_0[0] = 0;
602  alpha_0[1] = 0;
603  }
604  }
605 }
606 #endif /* HAVE_MIPSFPU */
607 #endif /* HAVE_INLINE_ASM */
608 
610 {
611 #if HAVE_INLINE_ASM
612  c->sbr_lf_gen = sbr_lf_gen_mips;
613  c->sbr_x_gen = sbr_x_gen_mips;
614 #if HAVE_MIPSFPU
615  c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
616  c->sbr_hf_assemble = sbr_hf_assemble_mips;
617 #endif /* HAVE_MIPSFPU */
618 #endif /* HAVE_INLINE_ASM */
619 }