57 #define ENVELOPE_ADJUSTMENT_OFFSET 2
61 float X_low[32][40][2],
const float W[2][32][32][2],
65 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
66 float *p_x_low = &X_low[0][8][0];
67 float *p_w = (
float*)&W[buf_idx][0][0][0];
68 float *p_x1_low = &X_low[0][0][0];
69 float *p_w1 = (
float*)&W[1-buf_idx][24][0][0];
71 float *loop_end=p_x1_low + 2560;
76 "sw $0, 0(%[p_x1_low]) \n\t"
77 "sw $0, 4(%[p_x1_low]) \n\t"
78 "sw $0, 8(%[p_x1_low]) \n\t"
79 "sw $0, 12(%[p_x1_low]) \n\t"
80 "sw $0, 16(%[p_x1_low]) \n\t"
81 "sw $0, 20(%[p_x1_low]) \n\t"
82 "sw $0, 24(%[p_x1_low]) \n\t"
83 "sw $0, 28(%[p_x1_low]) \n\t"
84 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
85 "bne %[p_x1_low], %[loop_end], 1b \n\t"
86 "addiu %[p_x1_low], %[p_x1_low], -10240 \n\t"
88 : [p_x1_low]
"+r"(p_x1_low)
89 : [loop_end]
"r"(loop_end)
93 for (k = 0; k < sbr->
kx[1]; k++) {
94 for (i = 0; i < 32; i+=4) {
97 "lw %[temp0], 0(%[p_w]) \n\t"
98 "lw %[temp1], 4(%[p_w]) \n\t"
99 "lw %[temp2], 256(%[p_w]) \n\t"
100 "lw %[temp3], 260(%[p_w]) \n\t"
101 "lw %[temp4], 512(%[p_w]) \n\t"
102 "lw %[temp5], 516(%[p_w]) \n\t"
103 "lw %[temp6], 768(%[p_w]) \n\t"
104 "lw %[temp7], 772(%[p_w]) \n\t"
105 "sw %[temp0], 0(%[p_x_low]) \n\t"
106 "sw %[temp1], 4(%[p_x_low]) \n\t"
107 "sw %[temp2], 8(%[p_x_low]) \n\t"
108 "sw %[temp3], 12(%[p_x_low]) \n\t"
109 "sw %[temp4], 16(%[p_x_low]) \n\t"
110 "sw %[temp5], 20(%[p_x_low]) \n\t"
111 "sw %[temp6], 24(%[p_x_low]) \n\t"
112 "sw %[temp7], 28(%[p_x_low]) \n\t"
113 "addiu %[p_x_low], %[p_x_low], 32 \n\t"
114 "addiu %[p_w], %[p_w], 1024 \n\t"
116 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
117 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
118 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
119 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
120 [p_w]
"+r"(p_w), [p_x_low]
"+r"(p_x_low)
129 for (k = 0; k < sbr->
kx[0]; k++) {
130 for (i = 0; i < 2; i++) {
134 "lw %[temp0], 0(%[p_w1]) \n\t"
135 "lw %[temp1], 4(%[p_w1]) \n\t"
136 "lw %[temp2], 256(%[p_w1]) \n\t"
137 "lw %[temp3], 260(%[p_w1]) \n\t"
138 "lw %[temp4], 512(%[p_w1]) \n\t"
139 "lw %[temp5], 516(%[p_w1]) \n\t"
140 "lw %[temp6], 768(%[p_w1]) \n\t"
141 "lw %[temp7], 772(%[p_w1]) \n\t"
142 "sw %[temp0], 0(%[p_x1_low]) \n\t"
143 "sw %[temp1], 4(%[p_x1_low]) \n\t"
144 "sw %[temp2], 8(%[p_x1_low]) \n\t"
145 "sw %[temp3], 12(%[p_x1_low]) \n\t"
146 "sw %[temp4], 16(%[p_x1_low]) \n\t"
147 "sw %[temp5], 20(%[p_x1_low]) \n\t"
148 "sw %[temp6], 24(%[p_x1_low]) \n\t"
149 "sw %[temp7], 28(%[p_x1_low]) \n\t"
150 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
151 "addiu %[p_w1], %[p_w1], 1024 \n\t"
153 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
154 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
155 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
156 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
157 [p_w1]
"+r"(p_w1), [p_x1_low]
"+r"(p_x1_low)
169 const float Y0[38][64][2],
const float Y1[38][64][2],
170 const float X_low[32][40][2],
int ch)
174 int temp0, temp1, temp2, temp3;
175 const float *X_low1, *Y01, *Y11;
176 float *x1=&X[0][0][0];
183 "sw $0, 0(%[x1]) \n\t"
184 "sw $0, 4(%[x1]) \n\t"
185 "sw $0, 8(%[x1]) \n\t"
186 "sw $0, 12(%[x1]) \n\t"
187 "sw $0, 16(%[x1]) \n\t"
188 "sw $0, 20(%[x1]) \n\t"
189 "sw $0, 24(%[x1]) \n\t"
190 "sw $0, 28(%[x1]) \n\t"
191 "addiu %[x1], %[x1], 32 \n\t"
192 "bne %[x1], %[j], 1b \n\t"
193 "addiu %[x1], %[x1], -19456 \n\t"
202 X_low1=&X_low[0][2][0];
204 for (k = 0; k < sbr->
kx[0]; k++) {
207 "move %[i], $zero \n\t"
209 "lw %[temp0], 0(%[X_low1]) \n\t"
210 "lw %[temp1], 4(%[X_low1]) \n\t"
211 "sw %[temp0], 0(%[x1]) \n\t"
212 "sw %[temp1], 9728(%[x1]) \n\t"
213 "addiu %[x1], %[x1], 256 \n\t"
214 "addiu %[X_low1], %[X_low1], 8 \n\t"
215 "addiu %[i], %[i], 1 \n\t"
216 "bne %[i], %[i_Temp], 2b \n\t"
218 : [x1]
"+r"(x1), [X_low1]
"+r"(X_low1), [i]
"=&r"(i),
219 [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1)
220 : [i_Temp]
"r"(i_Temp)
224 X_low1-=(i_Temp<<1)-80;
228 Y01=(
float*)&Y0[32][k][0];
230 for (; k < sbr->
kx[0] + sbr->
m[0]; k++) {
232 "move %[i], $zero \n\t"
234 "lw %[temp0], 0(%[Y01]) \n\t"
235 "lw %[temp1], 4(%[Y01]) \n\t"
236 "sw %[temp0], 0(%[x1]) \n\t"
237 "sw %[temp1], 9728(%[x1]) \n\t"
238 "addiu %[x1], %[x1], 256 \n\t"
239 "addiu %[Y01], %[Y01], 512 \n\t"
240 "addiu %[i], %[i], 1 \n\t"
241 "bne %[i], %[i_Temp], 3b \n\t"
243 : [x1]
"+r"(x1), [Y01]
"+r"(Y01), [i]
"=&r"(i),
244 [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1)
245 : [i_Temp]
"r"(i_Temp)
254 X_low1=&X_low[0][i_Temp+2][0];
257 for (k = 0; k < sbr->
kx[1]; k++) {
260 "move %[i], %[i_Temp] \n\t"
262 "lw %[temp0], 0(%[X_low1]) \n\t"
263 "lw %[temp1], 4(%[X_low1]) \n\t"
264 "sw %[temp0], 0(%[x1]) \n\t"
265 "sw %[temp1], 9728(%[x1]) \n\t"
266 "addiu %[x1], %[x1], 256 \n\t"
267 "addiu %[X_low1], %[X_low1], 8 \n\t"
268 "addiu %[i], %[i], 1 \n\t"
269 "bne %[i], %[temp3], 4b \n\t"
271 : [x1]
"+r"(x1), [X_low1]
"+r"(X_low1), [i]
"=&r"(i),
272 [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
274 : [i_Temp]
"r"(i_Temp), [temp3]
"r"(temp3)
277 x1 -= ((38-i_Temp)<<6)-1;
278 X_low1 -= ((38-i_Temp)<<1)- 80;
282 Y11=&Y1[i_Temp][k][0];
285 for (; k < sbr->
kx[1] + sbr->
m[1]; k++) {
288 "move %[i], %[i_Temp] \n\t"
290 "lw %[temp0], 0(%[Y11]) \n\t"
291 "lw %[temp1], 4(%[Y11]) \n\t"
292 "sw %[temp0], 0(%[x1]) \n\t"
293 "sw %[temp1], 9728(%[x1]) \n\t"
294 "addiu %[x1], %[x1], 256 \n\t"
295 "addiu %[Y11], %[Y11], 512 \n\t"
296 "addiu %[i], %[i], 1 \n\t"
297 "bne %[i], %[temp2], 5b \n\t"
299 : [x1]
"+r"(x1), [Y11]
"+r"(Y11), [i]
"=&r"(i),
300 [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1)
301 : [i_Temp]
"r"(i_Temp), [temp3]
"r"(temp3),
306 x1 -= ((32-i_Temp)<<6)-1;
307 Y11 -= ((32-i_Temp)<<7)-2;
313 static void sbr_hf_assemble_mips(
float Y1[38][64][2],
314 const float X_high[64][40][2],
320 const int kx = sbr->
kx[1];
321 const int m_max = sbr->
m[1];
322 static const float h_smooth[5] = {
330 float (*g_temp)[48] = ch_data->
g_temp, (*q_temp)[48] = ch_data->
q_temp;
333 float *g_temp1, *q_temp1, *pok, *pok1;
334 float temp1, temp2, temp3, temp4;
338 for (i = 0; i < h_SL; i++) {
339 memcpy(g_temp[i + 2*ch_data->
t_env[0]], sbr->
gain[0], m_max *
sizeof(sbr->
gain[0][0]));
340 memcpy(q_temp[i + 2*ch_data->
t_env[0]], sbr->
q_m[0], m_max *
sizeof(sbr->
q_m[0][0]));
348 for (i = 2 * ch_data->
t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
349 g_temp1 = g_temp[h_SL + i];
351 q_temp1 = q_temp[h_SL + i];
355 for (j=0; j<(size>>2); j++) {
357 "lw %[temp1], 0(%[pok]) \n\t"
358 "lw %[temp2], 4(%[pok]) \n\t"
359 "lw %[temp3], 8(%[pok]) \n\t"
360 "lw %[temp4], 12(%[pok]) \n\t"
361 "sw %[temp1], 0(%[g_temp1]) \n\t"
362 "sw %[temp2], 4(%[g_temp1]) \n\t"
363 "sw %[temp3], 8(%[g_temp1]) \n\t"
364 "sw %[temp4], 12(%[g_temp1]) \n\t"
365 "lw %[temp1], 0(%[pok1]) \n\t"
366 "lw %[temp2], 4(%[pok1]) \n\t"
367 "lw %[temp3], 8(%[pok1]) \n\t"
368 "lw %[temp4], 12(%[pok1]) \n\t"
369 "sw %[temp1], 0(%[q_temp1]) \n\t"
370 "sw %[temp2], 4(%[q_temp1]) \n\t"
371 "sw %[temp3], 8(%[q_temp1]) \n\t"
372 "sw %[temp4], 12(%[q_temp1]) \n\t"
373 "addiu %[pok], %[pok], 16 \n\t"
374 "addiu %[g_temp1], %[g_temp1], 16 \n\t"
375 "addiu %[pok1], %[pok1], 16 \n\t"
376 "addiu %[q_temp1], %[q_temp1], 16 \n\t"
378 : [temp1]
"=&r"(temp1), [temp2]
"=&r"(temp2),
379 [temp3]
"=&r"(temp3), [temp4]
"=&r"(temp4),
380 [pok]
"+r"(pok), [g_temp1]
"+r"(g_temp1),
381 [pok1]
"+r"(pok1), [q_temp1]
"+r"(q_temp1)
387 for (j=0; j<(size&3); j++) {
389 "lw %[temp1], 0(%[pok]) \n\t"
390 "lw %[temp2], 0(%[pok1]) \n\t"
391 "sw %[temp1], 0(%[g_temp1]) \n\t"
392 "sw %[temp2], 0(%[q_temp1]) \n\t"
393 "addiu %[pok], %[pok], 4 \n\t"
394 "addiu %[g_temp1], %[g_temp1], 4 \n\t"
395 "addiu %[pok1], %[pok1], 4 \n\t"
396 "addiu %[q_temp1], %[q_temp1], 4 \n\t"
398 : [temp1]
"=&r"(temp1), [temp2]
"=&r"(temp2),
399 [temp3]
"=&r"(temp3), [temp4]
"=&r"(temp4),
400 [pok]
"+r"(pok), [g_temp1]
"+r"(g_temp1),
401 [pok1]
"+r"(pok1), [q_temp1]
"+r"(q_temp1)
410 for (i = 2 * ch_data->
t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
413 float *g_filt, *q_filt;
415 if (h_SL && e != e_a[0] && e != e_a[1]) {
419 for (m = 0; m < m_max; m++) {
420 const int idx1 = i + h_SL;
424 for (j = 0; j <= h_SL; j++) {
425 g_filt[
m] += g_temp[idx1 - j][
m] * h_smooth[j];
426 q_filt[
m] += q_temp[idx1 - j][
m] * h_smooth[j];
430 g_filt = g_temp[i + h_SL];
434 sbr->
dsp.
hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
437 if (e != e_a[0] && e != e_a[1]) {
442 int idx = indexsine&1;
443 int A = (1-((indexsine+(kx & 1))&2));
444 int B = (A^(-idx)) + idx;
445 float *
out = &Y1[i][kx][idx];
446 float *
in = sbr->
s_m[e];
447 float temp0, temp1, temp2, temp3, temp4, temp5;
448 float A_f = (float)A;
449 float B_f = (float)B;
451 for (m = 0; m+1 < m_max; m+=2) {
457 "lwc1 %[temp0], 0(%[in]) \n\t"
458 "lwc1 %[temp1], 4(%[in]) \n\t"
459 "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t"
460 "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
461 "swc1 %[temp4], 0(%[out]) \n\t"
462 "swc1 %[temp5], 8(%[out]) \n\t"
463 "addiu %[in], %[in], 8 \n\t"
464 "addiu %[out], %[out], 16 \n\t"
466 : [temp0]
"=&f" (temp0), [temp1]
"=&f"(temp1),
467 [temp4]
"=&f" (temp4), [temp5]
"=&f"(temp5),
468 [
in]
"+r"(
in), [out]
"+r"(out)
469 : [A_f]
"f"(A_f), [B_f]
"f"(B_f), [temp2]
"f"(temp2),
475 out[2*
m ] += in[
m ] *
A;
477 indexnoise = (indexnoise + m_max) & 0x1ff;
478 indexsine = (indexsine + 1) & 3;
486 float (*alpha0)[2],
float (*alpha1)[2],
487 const float X_low[32][40][2],
int k0)
490 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7,
c;
491 float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
495 for (k = 0; k < k0; k++) {
498 phi1 = &phi[0][0][0];
499 alpha_1 = &alpha1[k][0];
500 alpha_0 = &alpha0[k][0];
504 "lwc1 %[temp0], 40(%[phi1]) \n\t"
505 "lwc1 %[temp1], 16(%[phi1]) \n\t"
506 "lwc1 %[temp2], 24(%[phi1]) \n\t"
507 "lwc1 %[temp3], 28(%[phi1]) \n\t"
508 "mul.s %[dk], %[temp0], %[temp1] \n\t"
509 "lwc1 %[temp4], 0(%[phi1]) \n\t"
510 "mul.s %[res2], %[temp2], %[temp2] \n\t"
511 "lwc1 %[temp5], 4(%[phi1]) \n\t"
512 "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t"
513 "lwc1 %[temp6], 8(%[phi1]) \n\t"
514 "div.s %[res2], %[res2], %[c] \n\t"
515 "lwc1 %[temp0], 12(%[phi1]) \n\t"
516 "sub.s %[dk], %[dk], %[res2] \n\t"
518 : [temp0]
"=&f"(temp0), [temp1]
"=&f"(temp1), [temp2]
"=&f"(temp2),
519 [temp3]
"=&f"(temp3), [temp4]
"=&f"(temp4), [temp5]
"=&f"(temp5),
520 [temp6]
"=&f"(temp6), [res2]
"=&f"(res2), [dk]
"=&f"(dk)
521 : [phi1]
"r"(phi1), [
c]
"f"(
c)
530 "mul.s %[temp_real], %[temp4], %[temp2] \n\t"
531 "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t"
532 "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t"
533 "mul.s %[temp_im], %[temp4], %[temp3] \n\t"
534 "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t"
535 "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t"
536 "div.s %[temp_real], %[temp_real], %[dk] \n\t"
537 "div.s %[temp_im], %[temp_im], %[dk] \n\t"
538 "swc1 %[temp_real], 0(%[alpha_1]) \n\t"
539 "swc1 %[temp_im], 4(%[alpha_1]) \n\t"
541 : [temp_real]
"=&f" (temp_real), [temp_im]
"=&f"(temp_im)
542 : [phi1]
"r"(phi1), [temp0]
"f"(temp0), [temp1]
"f"(temp1),
543 [temp2]
"f"(temp2), [temp3]
"f"(temp3), [temp4]
"f"(temp4),
544 [temp5]
"f"(temp5), [temp6]
"f"(temp6),
545 [alpha_1]
"r"(alpha_1), [dk]
"f"(dk)
555 "lwc1 %[temp6], 0(%[alpha_1]) \n\t"
556 "lwc1 %[temp7], 4(%[alpha_1]) \n\t"
557 "mul.s %[temp_real], %[temp6], %[temp2] \n\t"
558 "add.s %[temp_real], %[temp_real], %[temp4] \n\t"
559 "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t"
560 "mul.s %[temp_im], %[temp7], %[temp2] \n\t"
561 "add.s %[temp_im], %[temp_im], %[temp5] \n\t"
562 "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t"
563 "div.s %[temp_real], %[temp_real], %[temp1] \n\t"
564 "div.s %[temp_im], %[temp_im], %[temp1] \n\t"
565 "neg.s %[temp_real], %[temp_real] \n\t"
566 "neg.s %[temp_im], %[temp_im] \n\t"
567 "swc1 %[temp_real], 0(%[alpha_0]) \n\t"
568 "swc1 %[temp_im], 4(%[alpha_0]) \n\t"
570 : [temp_real]
"=&f"(temp_real), [temp_im]
"=&f"(temp_im),
571 [temp6]
"=&f"(temp6), [temp7]
"=&f"(temp7),
572 [res1]
"=&f"(res1), [res2]
"=&f"(res2)
573 : [alpha_1]
"r"(alpha_1), [alpha_0]
"r"(alpha_0),
574 [temp0]
"f"(temp0), [temp1]
"f"(temp1), [temp2]
"f"(temp2),
575 [temp3]
"f"(temp3), [temp4]
"f"(temp4), [temp5]
"f"(temp5)
581 "lwc1 %[temp1], 0(%[alpha_1]) \n\t"
582 "lwc1 %[temp2], 4(%[alpha_1]) \n\t"
583 "lwc1 %[temp_real], 0(%[alpha_0]) \n\t"
584 "lwc1 %[temp_im], 4(%[alpha_0]) \n\t"
585 "mul.s %[res1], %[temp1], %[temp1] \n\t"
586 "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t"
587 "mul.s %[res2], %[temp_real], %[temp_real] \n\t"
588 "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t"
590 : [temp_real]
"=&f"(temp_real), [temp_im]
"=&f"(temp_im),
591 [temp1]
"=&f"(temp1), [temp2]
"=&f"(temp2),
592 [res1]
"=&f"(res1), [res2]
"=&f"(res2)
593 : [alpha_1]
"r"(alpha_1), [alpha_0]
"r"(alpha_0)
597 if (res1 >= 16.0f || res2 >= 16.0f) {