65 const float *loop_end = src +
count;
76 "lw %[temp0], 0(%[src]) \n\t"
77 "lw %[temp1], 4(%[src]) \n\t"
78 "lw %[temp2], 8(%[src]) \n\t"
79 "lw %[temp3], 12(%[src]) \n\t"
80 "lw %[temp4], 16(%[src]) \n\t"
81 "lw %[temp5], 20(%[src]) \n\t"
82 "lw %[temp6], 24(%[src]) \n\t"
83 "lw %[temp7], 28(%[src]) \n\t"
85 "sw %[temp0], 0(%[dst]) \n\t"
86 "sw %[temp1], 4(%[dst]) \n\t"
87 "sw %[temp2], 8(%[dst]) \n\t"
88 "sw %[temp3], 12(%[dst]) \n\t"
89 "sw %[temp4], 16(%[dst]) \n\t"
90 "sw %[temp5], 20(%[dst]) \n\t"
91 "sw %[temp6], 24(%[dst]) \n\t"
92 "sw %[temp7], 28(%[dst]) \n\t"
93 "bne %[src], %[loop_end], 1b \n\t"
97 : [temp0]
"=&r"(temp[0]), [temp1]
"=&r"(temp[1]),
98 [temp2]
"=&r"(temp[2]), [temp3]
"=&r"(temp[3]),
99 [temp4]
"=&r"(temp[4]), [temp5]
"=&r"(temp[5]),
100 [temp6]
"=&r"(temp[6]), [temp7]
"=&r"(temp[7]),
101 [
src]
"+r"(
src), [dst]
"+r"(dst)
102 : [loop_end]
"r"(loop_end)
109 union {
unsigned u;
int s; }
v = { previous_val * 1664525
u + 1013904223 };
118 float *saved = sce->
saved;
126 for (i = 0; i < 1024; i += 128)
141 float_copy(out, saved, 448);
148 float temp0, temp1, temp2, temp3;
149 float *dst0 = out + 448 + 0*128;
150 float *dst1 = dst0 + 64 + 63;
151 float *dst2 = saved + 63;
152 float *win0 = (
float*)swindow;
153 float *win1 = win0 + 64 + 63;
154 float *win0_prev = (
float*)swindow_prev;
155 float *win1_prev = win0_prev + 64 + 63;
156 float *src0_prev = saved + 448;
157 float *src1_prev = buf + 0*128 + 63;
158 float *src0 = buf + 0*128 + 64;
159 float *src1 = buf + 1*128 + 63;
161 for(i = 0; i < 64; i++)
163 temp0 = src0_prev[0];
164 temp1 = src1_prev[0];
169 dst0[0] = temp0 * wj - temp1 * wi;
170 dst1[0] = temp0 * wi + temp1 * wj;
177 dst0[128] = temp2 * wj - temp3 * wi;
178 dst1[128] = temp2 * wi + temp3 * wj;
182 dst0[256] = temp0 * wj - temp1 * wi;
183 dst1[256] = temp0 * wi + temp1 * wj;
184 dst0[384] = temp2 * wj - temp3 * wi;
185 dst1[384] = temp2 * wi + temp3 * wj;
189 dst0[512] = temp0 * wj - temp1 * wi;
190 dst2[0] = temp0 * wi + temp1 * wj;
207 float_copy(out + 576, buf + 64, 448);
216 float_copy(saved + 448, buf + 7*128 + 64, 64);
218 float_copy(saved, buf + 512, 448);
219 float_copy(saved + 448, buf + 7*128 + 64, 64);
221 float_copy(saved, buf + 512, 512);
233 float *predTime = sce->
ret;
236 int16_t num_samples = 2048;
239 num_samples = ltp->
lag + 1024;
240 j = (2048 - num_samples) >> 2;
241 k = (2048 - num_samples) & 3;
242 p_predTime = &predTime[num_samples];
244 for (i = 0; i < num_samples; i++)
246 for (i = 0; i < j; i++) {
250 "sw $0, 0(%[p_predTime]) \n\t"
251 "sw $0, 4(%[p_predTime]) \n\t"
252 "sw $0, 8(%[p_predTime]) \n\t"
253 "sw $0, 12(%[p_predTime]) \n\t"
254 PTR_ADDIU "%[p_predTime], %[p_predTime], 16 \n\t"
256 : [p_predTime]
"+r"(p_predTime)
261 for (i = 0; i < k; i++) {
264 "sw $0, 0(%[p_predTime]) \n\t"
265 PTR_ADDIU "%[p_predTime], %[p_predTime], 4 \n\t"
267 : [p_predTime]
"+r"(p_predTime)
280 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
281 sce->
coeffs[i] += predFreq[i];
286 static av_always_inline void fmul_and_reverse(
float *dst,
const float *src0,
const float *src1,
int count)
298 for (; count > 0; count -= 4){
303 "lwc1 %[temp0], 0(%[ptr2]) \n\t"
304 "lwc1 %[temp1], -4(%[ptr2]) \n\t"
305 "lwc1 %[temp2], -8(%[ptr2]) \n\t"
306 "lwc1 %[temp3], -12(%[ptr2]) \n\t"
307 "lwc1 %[temp4], 0(%[ptr3]) \n\t"
308 "lwc1 %[temp5], -4(%[ptr3]) \n\t"
309 "lwc1 %[temp6], -8(%[ptr3]) \n\t"
310 "lwc1 %[temp7], -12(%[ptr3]) \n\t"
311 "mul.s %[temp8], %[temp0], %[temp4] \n\t"
312 "mul.s %[temp9], %[temp1], %[temp5] \n\t"
313 "mul.s %[temp10], %[temp2], %[temp6] \n\t"
314 "mul.s %[temp11], %[temp3], %[temp7] \n\t"
315 "swc1 %[temp8], 0(%[ptr1]) \n\t"
316 "swc1 %[temp9], 4(%[ptr1]) \n\t"
317 "swc1 %[temp10], 8(%[ptr1]) \n\t"
318 "swc1 %[temp11], 12(%[ptr1]) \n\t"
323 : [temp0]
"=&f"(temp[0]), [temp1]
"=&f"(temp[1]),
324 [temp2]
"=&f"(temp[2]), [temp3]
"=&f"(temp[3]),
325 [temp4]
"=&f"(temp[4]), [temp5]
"=&f"(temp[5]),
326 [temp6]
"=&f"(temp[6]), [temp7]
"=&f"(temp[7]),
327 [temp8]
"=&f"(temp[8]), [temp9]
"=&f"(temp[9]),
328 [temp10]
"=&f"(temp[10]), [temp11]
"=&f"(temp[11]),
329 [ptr1]
"+r"(dst), [ptr2]
"+r"(src0), [ptr3]
"+r"(src1)
339 float *saved = sce->
saved;
340 float *saved_ltp = sce->
coeffs;
343 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
346 float *p_saved_ltp = saved_ltp + 576;
347 float *loop_end1 = p_saved_ltp + 448;
349 float_copy(saved_ltp, saved, 512);
354 "sw $0, 0(%[p_saved_ltp]) \n\t"
355 "sw $0, 4(%[p_saved_ltp]) \n\t"
356 "sw $0, 8(%[p_saved_ltp]) \n\t"
357 "sw $0, 12(%[p_saved_ltp]) \n\t"
358 "sw $0, 16(%[p_saved_ltp]) \n\t"
359 "sw $0, 20(%[p_saved_ltp]) \n\t"
360 "sw $0, 24(%[p_saved_ltp]) \n\t"
361 "sw $0, 28(%[p_saved_ltp]) \n\t"
362 PTR_ADDIU "%[p_saved_ltp],%[p_saved_ltp], 32 \n\t"
363 "bne %[p_saved_ltp], %[loop_end1], 1b \n\t"
365 : [p_saved_ltp]
"+r"(p_saved_ltp)
366 : [loop_end1]
"r"(loop_end1)
371 fmul_and_reverse(saved_ltp + 512, ac->
buf_mdct + 960, swindow, 64);
373 float *buff0 = saved;
374 float *buff1 = saved_ltp;
375 float *loop_end = saved + 448;
380 ".set noreorder \n\t"
382 "lw %[temp0], 0(%[src]) \n\t"
383 "lw %[temp1], 4(%[src]) \n\t"
384 "lw %[temp2], 8(%[src]) \n\t"
385 "lw %[temp3], 12(%[src]) \n\t"
386 "lw %[temp4], 16(%[src]) \n\t"
387 "lw %[temp5], 20(%[src]) \n\t"
388 "lw %[temp6], 24(%[src]) \n\t"
389 "lw %[temp7], 28(%[src]) \n\t"
391 "sw %[temp0], 0(%[dst]) \n\t"
392 "sw %[temp1], 4(%[dst]) \n\t"
393 "sw %[temp2], 8(%[dst]) \n\t"
394 "sw %[temp3], 12(%[dst]) \n\t"
395 "sw %[temp4], 16(%[dst]) \n\t"
396 "sw %[temp5], 20(%[dst]) \n\t"
397 "sw %[temp6], 24(%[dst]) \n\t"
398 "sw %[temp7], 28(%[dst]) \n\t"
399 "sw $0, 2304(%[dst]) \n\t"
400 "sw $0, 2308(%[dst]) \n\t"
401 "sw $0, 2312(%[dst]) \n\t"
402 "sw $0, 2316(%[dst]) \n\t"
403 "sw $0, 2320(%[dst]) \n\t"
404 "sw $0, 2324(%[dst]) \n\t"
405 "sw $0, 2328(%[dst]) \n\t"
406 "sw $0, 2332(%[dst]) \n\t"
407 "bne %[src], %[loop_end], 1b \n\t"
411 : [temp0]
"=&r"(temp0), [temp1]
"=&r"(temp1),
412 [temp2]
"=&r"(temp2), [temp3]
"=&r"(temp3),
413 [temp4]
"=&r"(temp4), [temp5]
"=&r"(temp5),
414 [temp6]
"=&r"(temp6), [temp7]
"=&r"(temp7),
415 [
src]
"+r"(buff0), [dst]
"+r"(buff1)
416 : [loop_end]
"r"(loop_end)
420 fmul_and_reverse(saved_ltp + 512, ac->
buf_mdct + 960, swindow, 64);
423 fmul_and_reverse(saved_ltp + 512, ac->
buf_mdct + 512, lwindow, 512);
428 float_copy(sce->
ltp_state + 2048, saved_ltp, 1024);