74 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
75 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
76 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
77 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
81 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
88 static const uint8_t uquad_sign_bits[81] = {
89 0, 1, 1, 1, 2, 2, 1, 2, 2,
90 1, 2, 2, 2, 3, 3, 2, 3, 3,
91 1, 2, 2, 2, 3, 3, 2, 3, 3,
92 1, 2, 2, 2, 3, 3, 2, 3, 3,
93 2, 3, 3, 3, 4, 4, 3, 4, 4,
94 2, 3, 3, 3, 4, 4, 3, 4, 4,
95 1, 2, 2, 2, 3, 3, 2, 3, 3,
96 2, 3, 3, 3, 4, 4, 3, 4, 4,
97 2, 3, 3, 3, 4, 4, 3, 4, 4
100 static const uint8_t upair7_sign_bits[64] = {
101 0, 1, 1, 1, 1, 1, 1, 1,
102 1, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2,
111 static const uint8_t upair12_sign_bits[169] = {
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
115 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
127 static const uint8_t esc_sign_bits[289] = {
128 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
134 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
135 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
136 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
140 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
144 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
148 #ifndef USE_REALLY_FULL_SEARCH
151 float ax, bx, cx, dx;
153 for (i = 0; i <
size; i += 4) {
177 static float find_max_val(
int group_len,
int swb_size,
const float *scaled) {
180 for (w2 = 0; w2 < group_len; w2++) {
181 for (i = 0; i < swb_size; i++) {
182 maxval =
FFMAX(maxval, scaled[w2*128+i]);
190 float Q34 = sqrtf(Q * sqrtf(Q));
192 qmaxval = maxval * Q34 + 0.4054f;
193 if (qmaxval == 0) cb = 0;
194 else if (qmaxval == 1) cb = 1;
195 else if (qmaxval == 2) cb = 3;
196 else if (qmaxval <= 4) cb = 5;
197 else if (qmaxval <= 7) cb = 7;
198 else if (qmaxval <= 12) cb = 9;
206 static void quantize_and_encode_band_cost_SQUAD_mips(
struct AACEncContext *
s,
208 const float *scaled,
int size,
int scale_idx,
209 int cb,
const float lambda,
const float uplim,
214 int qc1, qc2, qc3, qc4;
221 for (i = 0; i <
size; i += 4) {
223 int *in_int = (
int *)&in[i];
226 qc1 = scaled[i ] * Q34 + 0.4054f;
227 qc2 = scaled[i+1] * Q34 + 0.4054f;
228 qc3 = scaled[i+2] * Q34 + 0.4054f;
229 qc4 = scaled[i+3] * Q34 + 0.4054f;
233 ".set noreorder \n\t"
235 "slt %[qc1], $zero, %[qc1] \n\t"
236 "slt %[qc2], $zero, %[qc2] \n\t"
237 "slt %[qc3], $zero, %[qc3] \n\t"
238 "slt %[qc4], $zero, %[qc4] \n\t"
239 "lw %[t0], 0(%[in_int]) \n\t"
240 "lw %[t1], 4(%[in_int]) \n\t"
241 "lw %[t2], 8(%[in_int]) \n\t"
242 "lw %[t3], 12(%[in_int]) \n\t"
243 "srl %[t0], %[t0], 31 \n\t"
244 "srl %[t1], %[t1], 31 \n\t"
245 "srl %[t2], %[t2], 31 \n\t"
246 "srl %[t3], %[t3], 31 \n\t"
247 "subu %[t4], $zero, %[qc1] \n\t"
248 "subu %[t5], $zero, %[qc2] \n\t"
249 "subu %[t6], $zero, %[qc3] \n\t"
250 "subu %[t7], $zero, %[qc4] \n\t"
251 "movn %[qc1], %[t4], %[t0] \n\t"
252 "movn %[qc2], %[t5], %[t1] \n\t"
253 "movn %[qc3], %[t6], %[t2] \n\t"
254 "movn %[qc4], %[t7], %[t3] \n\t"
258 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
259 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
260 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
261 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
262 : [in_int]
"r"(in_int)
275 put_bits(pb, p_bits[curidx], p_codes[curidx]);
279 static void quantize_and_encode_band_cost_UQUAD_mips(
struct AACEncContext *s,
281 const float *scaled,
int size,
int scale_idx,
282 int cb,
const float lambda,
const float uplim,
287 int qc1, qc2, qc3, qc4;
294 for (i = 0; i <
size; i += 4) {
295 int curidx, sign,
count;
296 int *in_int = (
int *)&in[i];
298 unsigned int v_codes;
301 qc1 = scaled[i ] * Q34 + 0.4054f;
302 qc2 = scaled[i+1] * Q34 + 0.4054f;
303 qc3 = scaled[i+2] * Q34 + 0.4054f;
304 qc4 = scaled[i+3] * Q34 + 0.4054f;
308 ".set noreorder \n\t"
310 "ori %[t4], $zero, 2 \n\t"
311 "ori %[sign], $zero, 0 \n\t"
312 "slt %[t0], %[t4], %[qc1] \n\t"
313 "slt %[t1], %[t4], %[qc2] \n\t"
314 "slt %[t2], %[t4], %[qc3] \n\t"
315 "slt %[t3], %[t4], %[qc4] \n\t"
316 "movn %[qc1], %[t4], %[t0] \n\t"
317 "movn %[qc2], %[t4], %[t1] \n\t"
318 "movn %[qc3], %[t4], %[t2] \n\t"
319 "movn %[qc4], %[t4], %[t3] \n\t"
320 "lw %[t0], 0(%[in_int]) \n\t"
321 "lw %[t1], 4(%[in_int]) \n\t"
322 "lw %[t2], 8(%[in_int]) \n\t"
323 "lw %[t3], 12(%[in_int]) \n\t"
324 "slt %[t0], %[t0], $zero \n\t"
325 "movn %[sign], %[t0], %[qc1] \n\t"
326 "slt %[t1], %[t1], $zero \n\t"
327 "slt %[t2], %[t2], $zero \n\t"
328 "slt %[t3], %[t3], $zero \n\t"
329 "sll %[t0], %[sign], 1 \n\t"
330 "or %[t0], %[t0], %[t1] \n\t"
331 "movn %[sign], %[t0], %[qc2] \n\t"
332 "slt %[t4], $zero, %[qc1] \n\t"
333 "slt %[t1], $zero, %[qc2] \n\t"
334 "slt %[count], $zero, %[qc3] \n\t"
335 "sll %[t0], %[sign], 1 \n\t"
336 "or %[t0], %[t0], %[t2] \n\t"
337 "movn %[sign], %[t0], %[qc3] \n\t"
338 "slt %[t2], $zero, %[qc4] \n\t"
339 "addu %[count], %[count], %[t4] \n\t"
340 "addu %[count], %[count], %[t1] \n\t"
341 "sll %[t0], %[sign], 1 \n\t"
342 "or %[t0], %[t0], %[t3] \n\t"
343 "movn %[sign], %[t0], %[qc4] \n\t"
344 "addu %[count], %[count], %[t2] \n\t"
348 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
349 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
350 [sign]
"=&r"(sign), [count]
"=&r"(count),
351 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
353 : [in_int]
"r"(in_int)
365 v_codes = (p_codes[curidx] <<
count) | (sign & ((1 << count) - 1));
366 v_bits = p_bits[curidx] +
count;
371 static void quantize_and_encode_band_cost_SPAIR_mips(
struct AACEncContext *s,
373 const float *scaled,
int size,
int scale_idx,
374 int cb,
const float lambda,
const float uplim,
379 int qc1, qc2, qc3, qc4;
386 for (i = 0; i <
size; i += 4) {
388 int *in_int = (
int *)&in[i];
390 unsigned int v_codes;
393 qc1 = scaled[i ] * Q34 + 0.4054f;
394 qc2 = scaled[i+1] * Q34 + 0.4054f;
395 qc3 = scaled[i+2] * Q34 + 0.4054f;
396 qc4 = scaled[i+3] * Q34 + 0.4054f;
400 ".set noreorder \n\t"
402 "ori %[t4], $zero, 4 \n\t"
403 "slt %[t0], %[t4], %[qc1] \n\t"
404 "slt %[t1], %[t4], %[qc2] \n\t"
405 "slt %[t2], %[t4], %[qc3] \n\t"
406 "slt %[t3], %[t4], %[qc4] \n\t"
407 "movn %[qc1], %[t4], %[t0] \n\t"
408 "movn %[qc2], %[t4], %[t1] \n\t"
409 "movn %[qc3], %[t4], %[t2] \n\t"
410 "movn %[qc4], %[t4], %[t3] \n\t"
411 "lw %[t0], 0(%[in_int]) \n\t"
412 "lw %[t1], 4(%[in_int]) \n\t"
413 "lw %[t2], 8(%[in_int]) \n\t"
414 "lw %[t3], 12(%[in_int]) \n\t"
415 "srl %[t0], %[t0], 31 \n\t"
416 "srl %[t1], %[t1], 31 \n\t"
417 "srl %[t2], %[t2], 31 \n\t"
418 "srl %[t3], %[t3], 31 \n\t"
419 "subu %[t4], $zero, %[qc1] \n\t"
420 "subu %[t5], $zero, %[qc2] \n\t"
421 "subu %[t6], $zero, %[qc3] \n\t"
422 "subu %[t7], $zero, %[qc4] \n\t"
423 "movn %[qc1], %[t4], %[t0] \n\t"
424 "movn %[qc2], %[t5], %[t1] \n\t"
425 "movn %[qc3], %[t6], %[t2] \n\t"
426 "movn %[qc4], %[t7], %[t3] \n\t"
430 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
431 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
432 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
433 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
434 : [in_int]
"r"(in_int)
444 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
445 v_bits = p_bits[curidx] + p_bits[curidx2];
450 static void quantize_and_encode_band_cost_UPAIR7_mips(
struct AACEncContext *s,
452 const float *scaled,
int size,
int scale_idx,
453 int cb,
const float lambda,
const float uplim,
458 int qc1, qc2, qc3, qc4;
465 for (i = 0; i <
size; i += 4) {
466 int curidx, sign1, count1, sign2, count2;
467 int *in_int = (
int *)&in[i];
469 unsigned int v_codes;
472 qc1 = scaled[i ] * Q34 + 0.4054f;
473 qc2 = scaled[i+1] * Q34 + 0.4054f;
474 qc3 = scaled[i+2] * Q34 + 0.4054f;
475 qc4 = scaled[i+3] * Q34 + 0.4054f;
479 ".set noreorder \n\t"
481 "ori %[t4], $zero, 7 \n\t"
482 "ori %[sign1], $zero, 0 \n\t"
483 "ori %[sign2], $zero, 0 \n\t"
484 "slt %[t0], %[t4], %[qc1] \n\t"
485 "slt %[t1], %[t4], %[qc2] \n\t"
486 "slt %[t2], %[t4], %[qc3] \n\t"
487 "slt %[t3], %[t4], %[qc4] \n\t"
488 "movn %[qc1], %[t4], %[t0] \n\t"
489 "movn %[qc2], %[t4], %[t1] \n\t"
490 "movn %[qc3], %[t4], %[t2] \n\t"
491 "movn %[qc4], %[t4], %[t3] \n\t"
492 "lw %[t0], 0(%[in_int]) \n\t"
493 "lw %[t1], 4(%[in_int]) \n\t"
494 "lw %[t2], 8(%[in_int]) \n\t"
495 "lw %[t3], 12(%[in_int]) \n\t"
496 "slt %[t0], %[t0], $zero \n\t"
497 "movn %[sign1], %[t0], %[qc1] \n\t"
498 "slt %[t2], %[t2], $zero \n\t"
499 "movn %[sign2], %[t2], %[qc3] \n\t"
500 "slt %[t1], %[t1], $zero \n\t"
501 "sll %[t0], %[sign1], 1 \n\t"
502 "or %[t0], %[t0], %[t1] \n\t"
503 "movn %[sign1], %[t0], %[qc2] \n\t"
504 "slt %[t3], %[t3], $zero \n\t"
505 "sll %[t0], %[sign2], 1 \n\t"
506 "or %[t0], %[t0], %[t3] \n\t"
507 "movn %[sign2], %[t0], %[qc4] \n\t"
508 "slt %[count1], $zero, %[qc1] \n\t"
509 "slt %[t1], $zero, %[qc2] \n\t"
510 "slt %[count2], $zero, %[qc3] \n\t"
511 "slt %[t2], $zero, %[qc4] \n\t"
512 "addu %[count1], %[count1], %[t1] \n\t"
513 "addu %[count2], %[count2], %[t2] \n\t"
517 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
518 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
519 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
520 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
521 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
523 : [in_int]
"r"(in_int)
524 :
"t0",
"t1",
"t2",
"t3",
"t4",
531 v_codes = (p_codes[curidx] << count1) | sign1;
532 v_bits = p_bits[curidx] + count1;
538 v_codes = (p_codes[curidx] << count2) | sign2;
539 v_bits = p_bits[curidx] + count2;
544 static void quantize_and_encode_band_cost_UPAIR12_mips(
struct AACEncContext *s,
546 const float *scaled,
int size,
int scale_idx,
547 int cb,
const float lambda,
const float uplim,
552 int qc1, qc2, qc3, qc4;
559 for (i = 0; i <
size; i += 4) {
560 int curidx, sign1, count1, sign2, count2;
561 int *in_int = (
int *)&in[i];
563 unsigned int v_codes;
566 qc1 = scaled[i ] * Q34 + 0.4054f;
567 qc2 = scaled[i+1] * Q34 + 0.4054f;
568 qc3 = scaled[i+2] * Q34 + 0.4054f;
569 qc4 = scaled[i+3] * Q34 + 0.4054f;
573 ".set noreorder \n\t"
575 "ori %[t4], $zero, 12 \n\t"
576 "ori %[sign1], $zero, 0 \n\t"
577 "ori %[sign2], $zero, 0 \n\t"
578 "slt %[t0], %[t4], %[qc1] \n\t"
579 "slt %[t1], %[t4], %[qc2] \n\t"
580 "slt %[t2], %[t4], %[qc3] \n\t"
581 "slt %[t3], %[t4], %[qc4] \n\t"
582 "movn %[qc1], %[t4], %[t0] \n\t"
583 "movn %[qc2], %[t4], %[t1] \n\t"
584 "movn %[qc3], %[t4], %[t2] \n\t"
585 "movn %[qc4], %[t4], %[t3] \n\t"
586 "lw %[t0], 0(%[in_int]) \n\t"
587 "lw %[t1], 4(%[in_int]) \n\t"
588 "lw %[t2], 8(%[in_int]) \n\t"
589 "lw %[t3], 12(%[in_int]) \n\t"
590 "slt %[t0], %[t0], $zero \n\t"
591 "movn %[sign1], %[t0], %[qc1] \n\t"
592 "slt %[t2], %[t2], $zero \n\t"
593 "movn %[sign2], %[t2], %[qc3] \n\t"
594 "slt %[t1], %[t1], $zero \n\t"
595 "sll %[t0], %[sign1], 1 \n\t"
596 "or %[t0], %[t0], %[t1] \n\t"
597 "movn %[sign1], %[t0], %[qc2] \n\t"
598 "slt %[t3], %[t3], $zero \n\t"
599 "sll %[t0], %[sign2], 1 \n\t"
600 "or %[t0], %[t0], %[t3] \n\t"
601 "movn %[sign2], %[t0], %[qc4] \n\t"
602 "slt %[count1], $zero, %[qc1] \n\t"
603 "slt %[t1], $zero, %[qc2] \n\t"
604 "slt %[count2], $zero, %[qc3] \n\t"
605 "slt %[t2], $zero, %[qc4] \n\t"
606 "addu %[count1], %[count1], %[t1] \n\t"
607 "addu %[count2], %[count2], %[t2] \n\t"
611 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
612 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
613 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
614 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
615 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
617 : [in_int]
"r"(in_int)
624 v_codes = (p_codes[curidx] << count1) | sign1;
625 v_bits = p_bits[curidx] + count1;
631 v_codes = (p_codes[curidx] << count2) | sign2;
632 v_bits = p_bits[curidx] + count2;
637 static void quantize_and_encode_band_cost_ESC_mips(
struct AACEncContext *s,
639 const float *scaled,
int size,
int scale_idx,
640 int cb,
const float lambda,
const float uplim,
645 int qc1, qc2, qc3, qc4;
655 for (i = 0; i <
size; i += 4) {
656 int curidx, curidx2, sign1, count1, sign2, count2;
657 int *in_int = (
int *)&in[i];
659 unsigned int v_codes;
662 qc1 = scaled[i ] * Q34 + 0.4054f;
663 qc2 = scaled[i+1] * Q34 + 0.4054f;
664 qc3 = scaled[i+2] * Q34 + 0.4054f;
665 qc4 = scaled[i+3] * Q34 + 0.4054f;
669 ".set noreorder \n\t"
671 "ori %[t4], $zero, 16 \n\t"
672 "ori %[sign1], $zero, 0 \n\t"
673 "ori %[sign2], $zero, 0 \n\t"
674 "slt %[t0], %[t4], %[qc1] \n\t"
675 "slt %[t1], %[t4], %[qc2] \n\t"
676 "slt %[t2], %[t4], %[qc3] \n\t"
677 "slt %[t3], %[t4], %[qc4] \n\t"
678 "movn %[qc1], %[t4], %[t0] \n\t"
679 "movn %[qc2], %[t4], %[t1] \n\t"
680 "movn %[qc3], %[t4], %[t2] \n\t"
681 "movn %[qc4], %[t4], %[t3] \n\t"
682 "lw %[t0], 0(%[in_int]) \n\t"
683 "lw %[t1], 4(%[in_int]) \n\t"
684 "lw %[t2], 8(%[in_int]) \n\t"
685 "lw %[t3], 12(%[in_int]) \n\t"
686 "slt %[t0], %[t0], $zero \n\t"
687 "movn %[sign1], %[t0], %[qc1] \n\t"
688 "slt %[t2], %[t2], $zero \n\t"
689 "movn %[sign2], %[t2], %[qc3] \n\t"
690 "slt %[t1], %[t1], $zero \n\t"
691 "sll %[t0], %[sign1], 1 \n\t"
692 "or %[t0], %[t0], %[t1] \n\t"
693 "movn %[sign1], %[t0], %[qc2] \n\t"
694 "slt %[t3], %[t3], $zero \n\t"
695 "sll %[t0], %[sign2], 1 \n\t"
696 "or %[t0], %[t0], %[t3] \n\t"
697 "movn %[sign2], %[t0], %[qc4] \n\t"
698 "slt %[count1], $zero, %[qc1] \n\t"
699 "slt %[t1], $zero, %[qc2] \n\t"
700 "slt %[count2], $zero, %[qc3] \n\t"
701 "slt %[t2], $zero, %[qc4] \n\t"
702 "addu %[count1], %[count1], %[t1] \n\t"
703 "addu %[count2], %[count2], %[t2] \n\t"
707 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
708 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
709 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
710 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
711 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
713 : [in_int]
"r"(in_int)
722 v_codes = (p_codes[curidx] << count1) | sign1;
723 v_bits = p_bits[curidx] + count1;
726 v_codes = (p_codes[curidx2] << count2) | sign2;
727 v_bits = p_bits[curidx2] + count2;
731 for (i = 0; i <
size; i += 4) {
732 int curidx, curidx2, sign1, count1, sign2, count2;
733 int *in_int = (
int *)&in[i];
735 unsigned int v_codes;
739 qc1 = scaled[i ] * Q34 + 0.4054f;
740 qc2 = scaled[i+1] * Q34 + 0.4054f;
741 qc3 = scaled[i+2] * Q34 + 0.4054f;
742 qc4 = scaled[i+3] * Q34 + 0.4054f;
746 ".set noreorder \n\t"
748 "ori %[t4], $zero, 16 \n\t"
749 "ori %[sign1], $zero, 0 \n\t"
750 "ori %[sign2], $zero, 0 \n\t"
751 "shll_s.w %[c1], %[qc1], 18 \n\t"
752 "shll_s.w %[c2], %[qc2], 18 \n\t"
753 "shll_s.w %[c3], %[qc3], 18 \n\t"
754 "shll_s.w %[c4], %[qc4], 18 \n\t"
755 "srl %[c1], %[c1], 18 \n\t"
756 "srl %[c2], %[c2], 18 \n\t"
757 "srl %[c3], %[c3], 18 \n\t"
758 "srl %[c4], %[c4], 18 \n\t"
759 "slt %[t0], %[t4], %[qc1] \n\t"
760 "slt %[t1], %[t4], %[qc2] \n\t"
761 "slt %[t2], %[t4], %[qc3] \n\t"
762 "slt %[t3], %[t4], %[qc4] \n\t"
763 "movn %[qc1], %[t4], %[t0] \n\t"
764 "movn %[qc2], %[t4], %[t1] \n\t"
765 "movn %[qc3], %[t4], %[t2] \n\t"
766 "movn %[qc4], %[t4], %[t3] \n\t"
767 "lw %[t0], 0(%[in_int]) \n\t"
768 "lw %[t1], 4(%[in_int]) \n\t"
769 "lw %[t2], 8(%[in_int]) \n\t"
770 "lw %[t3], 12(%[in_int]) \n\t"
771 "slt %[t0], %[t0], $zero \n\t"
772 "movn %[sign1], %[t0], %[qc1] \n\t"
773 "slt %[t2], %[t2], $zero \n\t"
774 "movn %[sign2], %[t2], %[qc3] \n\t"
775 "slt %[t1], %[t1], $zero \n\t"
776 "sll %[t0], %[sign1], 1 \n\t"
777 "or %[t0], %[t0], %[t1] \n\t"
778 "movn %[sign1], %[t0], %[qc2] \n\t"
779 "slt %[t3], %[t3], $zero \n\t"
780 "sll %[t0], %[sign2], 1 \n\t"
781 "or %[t0], %[t0], %[t3] \n\t"
782 "movn %[sign2], %[t0], %[qc4] \n\t"
783 "slt %[count1], $zero, %[qc1] \n\t"
784 "slt %[t1], $zero, %[qc2] \n\t"
785 "slt %[count2], $zero, %[qc3] \n\t"
786 "slt %[t2], $zero, %[qc4] \n\t"
787 "addu %[count1], %[count1], %[t1] \n\t"
788 "addu %[count2], %[count2], %[t2] \n\t"
792 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
793 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
794 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
795 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
796 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
797 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
798 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
800 : [in_int]
"r"(in_int)
810 v_codes = (p_codes[curidx] << count1) | sign1;
811 v_bits = p_bits[curidx] + count1;
814 if (p_vectors[curidx*2 ] == 64.0f) {
816 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 <<
len) - 1));
819 if (p_vectors[curidx*2+1] == 64.0f) {
821 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 <<
len) - 1));
825 v_codes = (p_codes[curidx2] << count2) | sign2;
826 v_bits = p_bits[curidx2] + count2;
829 if (p_vectors[curidx2*2 ] == 64.0f) {
831 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 <<
len) - 1));
834 if (p_vectors[curidx2*2+1] == 64.0f) {
836 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 <<
len) - 1));
845 const float *scaled,
int size,
int scale_idx,
846 int cb,
const float lambda,
const float uplim,
849 quantize_and_encode_band_cost_SQUAD_mips,
850 quantize_and_encode_band_cost_SQUAD_mips,
851 quantize_and_encode_band_cost_UQUAD_mips,
852 quantize_and_encode_band_cost_UQUAD_mips,
853 quantize_and_encode_band_cost_SPAIR_mips,
854 quantize_and_encode_band_cost_SPAIR_mips,
855 quantize_and_encode_band_cost_UPAIR7_mips,
856 quantize_and_encode_band_cost_UPAIR7_mips,
857 quantize_and_encode_band_cost_UPAIR12_mips,
858 quantize_and_encode_band_cost_UPAIR12_mips,
859 quantize_and_encode_band_cost_ESC_mips,
862 #define quantize_and_encode_band_cost( \
863 s, pb, in, scaled, size, scale_idx, cb, \
864 lambda, uplim, bits) \
865 quantize_and_encode_band_cost_arr[cb]( \
866 s, pb, in, scaled, size, scale_idx, cb, \
870 const float *in,
int size,
int scale_idx,
871 int cb,
const float lambda)
880 static float get_band_numbits_ZERO_mips(
struct AACEncContext *s,
882 const float *scaled,
int size,
int scale_idx,
883 int cb,
const float lambda,
const float uplim,
889 static float get_band_numbits_SQUAD_mips(
struct AACEncContext *s,
891 const float *scaled,
int size,
int scale_idx,
892 int cb,
const float lambda,
const float uplim,
897 int qc1, qc2, qc3, qc4;
902 for (i = 0; i <
size; i += 4) {
904 int *in_int = (
int *)&in[i];
907 qc1 = scaled[i ] * Q34 + 0.4054f;
908 qc2 = scaled[i+1] * Q34 + 0.4054f;
909 qc3 = scaled[i+2] * Q34 + 0.4054f;
910 qc4 = scaled[i+3] * Q34 + 0.4054f;
914 ".set noreorder \n\t"
916 "slt %[qc1], $zero, %[qc1] \n\t"
917 "slt %[qc2], $zero, %[qc2] \n\t"
918 "slt %[qc3], $zero, %[qc3] \n\t"
919 "slt %[qc4], $zero, %[qc4] \n\t"
920 "lw %[t0], 0(%[in_int]) \n\t"
921 "lw %[t1], 4(%[in_int]) \n\t"
922 "lw %[t2], 8(%[in_int]) \n\t"
923 "lw %[t3], 12(%[in_int]) \n\t"
924 "srl %[t0], %[t0], 31 \n\t"
925 "srl %[t1], %[t1], 31 \n\t"
926 "srl %[t2], %[t2], 31 \n\t"
927 "srl %[t3], %[t3], 31 \n\t"
928 "subu %[t4], $zero, %[qc1] \n\t"
929 "subu %[t5], $zero, %[qc2] \n\t"
930 "subu %[t6], $zero, %[qc3] \n\t"
931 "subu %[t7], $zero, %[qc4] \n\t"
932 "movn %[qc1], %[t4], %[t0] \n\t"
933 "movn %[qc2], %[t5], %[t1] \n\t"
934 "movn %[qc3], %[t6], %[t2] \n\t"
935 "movn %[qc4], %[t7], %[t3] \n\t"
939 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
940 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
941 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
942 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
943 : [in_int]
"r"(in_int)
956 curbits += p_bits[curidx];
961 static float get_band_numbits_UQUAD_mips(
struct AACEncContext *s,
963 const float *scaled,
int size,
int scale_idx,
964 int cb,
const float lambda,
const float uplim,
970 int qc1, qc2, qc3, qc4;
974 for (i = 0; i <
size; i += 4) {
978 qc1 = scaled[i ] * Q34 + 0.4054f;
979 qc2 = scaled[i+1] * Q34 + 0.4054f;
980 qc3 = scaled[i+2] * Q34 + 0.4054f;
981 qc4 = scaled[i+3] * Q34 + 0.4054f;
985 ".set noreorder \n\t"
987 "ori %[t4], $zero, 2 \n\t"
988 "slt %[t0], %[t4], %[qc1] \n\t"
989 "slt %[t1], %[t4], %[qc2] \n\t"
990 "slt %[t2], %[t4], %[qc3] \n\t"
991 "slt %[t3], %[t4], %[qc4] \n\t"
992 "movn %[qc1], %[t4], %[t0] \n\t"
993 "movn %[qc2], %[t4], %[t1] \n\t"
994 "movn %[qc3], %[t4], %[t2] \n\t"
995 "movn %[qc4], %[t4], %[t3] \n\t"
999 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1000 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1001 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1013 curbits += p_bits[curidx];
1014 curbits += uquad_sign_bits[curidx];
1019 static float get_band_numbits_SPAIR_mips(
struct AACEncContext *s,
1021 const float *scaled,
int size,
int scale_idx,
1022 int cb,
const float lambda,
const float uplim,
1027 int qc1, qc2, qc3, qc4;
1032 for (i = 0; i <
size; i += 4) {
1033 int curidx, curidx2;
1034 int *in_int = (
int *)&in[i];
1037 qc1 = scaled[i ] * Q34 + 0.4054f;
1038 qc2 = scaled[i+1] * Q34 + 0.4054f;
1039 qc3 = scaled[i+2] * Q34 + 0.4054f;
1040 qc4 = scaled[i+3] * Q34 + 0.4054f;
1044 ".set noreorder \n\t"
1046 "ori %[t4], $zero, 4 \n\t"
1047 "slt %[t0], %[t4], %[qc1] \n\t"
1048 "slt %[t1], %[t4], %[qc2] \n\t"
1049 "slt %[t2], %[t4], %[qc3] \n\t"
1050 "slt %[t3], %[t4], %[qc4] \n\t"
1051 "movn %[qc1], %[t4], %[t0] \n\t"
1052 "movn %[qc2], %[t4], %[t1] \n\t"
1053 "movn %[qc3], %[t4], %[t2] \n\t"
1054 "movn %[qc4], %[t4], %[t3] \n\t"
1055 "lw %[t0], 0(%[in_int]) \n\t"
1056 "lw %[t1], 4(%[in_int]) \n\t"
1057 "lw %[t2], 8(%[in_int]) \n\t"
1058 "lw %[t3], 12(%[in_int]) \n\t"
1059 "srl %[t0], %[t0], 31 \n\t"
1060 "srl %[t1], %[t1], 31 \n\t"
1061 "srl %[t2], %[t2], 31 \n\t"
1062 "srl %[t3], %[t3], 31 \n\t"
1063 "subu %[t4], $zero, %[qc1] \n\t"
1064 "subu %[t5], $zero, %[qc2] \n\t"
1065 "subu %[t6], $zero, %[qc3] \n\t"
1066 "subu %[t7], $zero, %[qc4] \n\t"
1067 "movn %[qc1], %[t4], %[t0] \n\t"
1068 "movn %[qc2], %[t5], %[t1] \n\t"
1069 "movn %[qc3], %[t6], %[t2] \n\t"
1070 "movn %[qc4], %[t7], %[t3] \n\t"
1074 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1075 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1076 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1077 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1078 : [in_int]
"r"(in_int)
1086 curidx2 += qc4 + 40;
1088 curbits += p_bits[curidx] + p_bits[curidx2];
1093 static float get_band_numbits_UPAIR7_mips(
struct AACEncContext *s,
1095 const float *scaled,
int size,
int scale_idx,
1096 int cb,
const float lambda,
const float uplim,
1101 int qc1, qc2, qc3, qc4;
1106 for (i = 0; i <
size; i += 4) {
1107 int curidx, curidx2;
1110 qc1 = scaled[i ] * Q34 + 0.4054f;
1111 qc2 = scaled[i+1] * Q34 + 0.4054f;
1112 qc3 = scaled[i+2] * Q34 + 0.4054f;
1113 qc4 = scaled[i+3] * Q34 + 0.4054f;
1117 ".set noreorder \n\t"
1119 "ori %[t4], $zero, 7 \n\t"
1120 "slt %[t0], %[t4], %[qc1] \n\t"
1121 "slt %[t1], %[t4], %[qc2] \n\t"
1122 "slt %[t2], %[t4], %[qc3] \n\t"
1123 "slt %[t3], %[t4], %[qc4] \n\t"
1124 "movn %[qc1], %[t4], %[t0] \n\t"
1125 "movn %[qc2], %[t4], %[t1] \n\t"
1126 "movn %[qc3], %[t4], %[t2] \n\t"
1127 "movn %[qc4], %[t4], %[t3] \n\t"
1131 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1132 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1133 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1143 curbits += p_bits[curidx] +
1144 upair7_sign_bits[curidx] +
1146 upair7_sign_bits[curidx2];
1151 static float get_band_numbits_UPAIR12_mips(
struct AACEncContext *s,
1153 const float *scaled,
int size,
int scale_idx,
1154 int cb,
const float lambda,
const float uplim,
1159 int qc1, qc2, qc3, qc4;
1164 for (i = 0; i <
size; i += 4) {
1165 int curidx, curidx2;
1168 qc1 = scaled[i ] * Q34 + 0.4054f;
1169 qc2 = scaled[i+1] * Q34 + 0.4054f;
1170 qc3 = scaled[i+2] * Q34 + 0.4054f;
1171 qc4 = scaled[i+3] * Q34 + 0.4054f;
1175 ".set noreorder \n\t"
1177 "ori %[t4], $zero, 12 \n\t"
1178 "slt %[t0], %[t4], %[qc1] \n\t"
1179 "slt %[t1], %[t4], %[qc2] \n\t"
1180 "slt %[t2], %[t4], %[qc3] \n\t"
1181 "slt %[t3], %[t4], %[qc4] \n\t"
1182 "movn %[qc1], %[t4], %[t0] \n\t"
1183 "movn %[qc2], %[t4], %[t1] \n\t"
1184 "movn %[qc3], %[t4], %[t2] \n\t"
1185 "movn %[qc4], %[t4], %[t3] \n\t"
1189 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1190 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1191 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1201 curbits += p_bits[curidx] +
1203 upair12_sign_bits[curidx] +
1204 upair12_sign_bits[curidx2];
1209 static float get_band_numbits_ESC_mips(
struct AACEncContext *s,
1211 const float *scaled,
int size,
int scale_idx,
1212 int cb,
const float lambda,
const float uplim,
1217 int qc1, qc2, qc3, qc4;
1222 for (i = 0; i <
size; i += 4) {
1223 int curidx, curidx2;
1224 int cond0, cond1, cond2, cond3;
1228 qc1 = scaled[i ] * Q34 + 0.4054f;
1229 qc2 = scaled[i+1] * Q34 + 0.4054f;
1230 qc3 = scaled[i+2] * Q34 + 0.4054f;
1231 qc4 = scaled[i+3] * Q34 + 0.4054f;
1235 ".set noreorder \n\t"
1237 "ori %[t4], $zero, 15 \n\t"
1238 "ori %[t5], $zero, 16 \n\t"
1239 "shll_s.w %[c1], %[qc1], 18 \n\t"
1240 "shll_s.w %[c2], %[qc2], 18 \n\t"
1241 "shll_s.w %[c3], %[qc3], 18 \n\t"
1242 "shll_s.w %[c4], %[qc4], 18 \n\t"
1243 "srl %[c1], %[c1], 18 \n\t"
1244 "srl %[c2], %[c2], 18 \n\t"
1245 "srl %[c3], %[c3], 18 \n\t"
1246 "srl %[c4], %[c4], 18 \n\t"
1247 "slt %[cond0], %[t4], %[qc1] \n\t"
1248 "slt %[cond1], %[t4], %[qc2] \n\t"
1249 "slt %[cond2], %[t4], %[qc3] \n\t"
1250 "slt %[cond3], %[t4], %[qc4] \n\t"
1251 "movn %[qc1], %[t5], %[cond0] \n\t"
1252 "movn %[qc2], %[t5], %[cond1] \n\t"
1253 "movn %[qc3], %[t5], %[cond2] \n\t"
1254 "movn %[qc4], %[t5], %[cond3] \n\t"
1255 "ori %[t5], $zero, 31 \n\t"
1256 "clz %[c1], %[c1] \n\t"
1257 "clz %[c2], %[c2] \n\t"
1258 "clz %[c3], %[c3] \n\t"
1259 "clz %[c4], %[c4] \n\t"
1260 "subu %[c1], %[t5], %[c1] \n\t"
1261 "subu %[c2], %[t5], %[c2] \n\t"
1262 "subu %[c3], %[t5], %[c3] \n\t"
1263 "subu %[c4], %[t5], %[c4] \n\t"
1264 "sll %[c1], %[c1], 1 \n\t"
1265 "sll %[c2], %[c2], 1 \n\t"
1266 "sll %[c3], %[c3], 1 \n\t"
1267 "sll %[c4], %[c4], 1 \n\t"
1268 "addiu %[c1], %[c1], -3 \n\t"
1269 "addiu %[c2], %[c2], -3 \n\t"
1270 "addiu %[c3], %[c3], -3 \n\t"
1271 "addiu %[c4], %[c4], -3 \n\t"
1272 "subu %[cond0], $zero, %[cond0] \n\t"
1273 "subu %[cond1], $zero, %[cond1] \n\t"
1274 "subu %[cond2], $zero, %[cond2] \n\t"
1275 "subu %[cond3], $zero, %[cond3] \n\t"
1276 "and %[c1], %[c1], %[cond0] \n\t"
1277 "and %[c2], %[c2], %[cond1] \n\t"
1278 "and %[c3], %[c3], %[cond2] \n\t"
1279 "and %[c4], %[c4], %[cond3] \n\t"
1283 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1284 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1285 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
1286 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
1287 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
1288 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
1289 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5)
1298 curbits += p_bits[curidx];
1299 curbits += esc_sign_bits[curidx];
1300 curbits += p_bits[curidx2];
1301 curbits += esc_sign_bits[curidx2];
1311 static float (*
const get_band_numbits_arr[])(
struct AACEncContext *
s,
1313 const float *scaled,
int size,
int scale_idx,
1314 int cb,
const float lambda,
const float uplim,
1316 get_band_numbits_ZERO_mips,
1317 get_band_numbits_SQUAD_mips,
1318 get_band_numbits_SQUAD_mips,
1319 get_band_numbits_UQUAD_mips,
1320 get_band_numbits_UQUAD_mips,
1321 get_band_numbits_SPAIR_mips,
1322 get_band_numbits_SPAIR_mips,
1323 get_band_numbits_UPAIR7_mips,
1324 get_band_numbits_UPAIR7_mips,
1325 get_band_numbits_UPAIR12_mips,
1326 get_band_numbits_UPAIR12_mips,
1327 get_band_numbits_ESC_mips,
1330 #define get_band_numbits( \
1331 s, pb, in, scaled, size, scale_idx, cb, \
1332 lambda, uplim, bits) \
1333 get_band_numbits_arr[cb]( \
1334 s, pb, in, scaled, size, scale_idx, cb, \
1335 lambda, uplim, bits)
1337 static float quantize_band_cost_bits(
struct AACEncContext *s,
const float *in,
1338 const float *scaled,
int size,
int scale_idx,
1339 int cb,
const float lambda,
const float uplim,
1342 return get_band_numbits(s,
NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1349 static float get_band_cost_ZERO_mips(
struct AACEncContext *s,
1351 const float *scaled,
int size,
int scale_idx,
1352 int cb,
const float lambda,
const float uplim,
1358 for (i = 0; i <
size; i += 4) {
1359 cost += in[i ] * in[i ];
1360 cost += in[i+1] * in[i+1];
1361 cost += in[i+2] * in[i+2];
1362 cost += in[i+3] * in[i+3];
1369 static float get_band_cost_SQUAD_mips(
struct AACEncContext *s,
1371 const float *scaled,
int size,
int scale_idx,
1372 int cb,
const float lambda,
const float uplim,
1379 int qc1, qc2, qc3, qc4;
1385 for (i = 0; i <
size; i += 4) {
1388 int *in_int = (
int *)&in[i];
1389 float *in_pos = (
float *)&in[i];
1390 float di0, di1, di2, di3;
1393 qc1 = scaled[i ] * Q34 + 0.4054f;
1394 qc2 = scaled[i+1] * Q34 + 0.4054f;
1395 qc3 = scaled[i+2] * Q34 + 0.4054f;
1396 qc4 = scaled[i+3] * Q34 + 0.4054f;
1400 ".set noreorder \n\t"
1402 "slt %[qc1], $zero, %[qc1] \n\t"
1403 "slt %[qc2], $zero, %[qc2] \n\t"
1404 "slt %[qc3], $zero, %[qc3] \n\t"
1405 "slt %[qc4], $zero, %[qc4] \n\t"
1406 "lw %[t0], 0(%[in_int]) \n\t"
1407 "lw %[t1], 4(%[in_int]) \n\t"
1408 "lw %[t2], 8(%[in_int]) \n\t"
1409 "lw %[t3], 12(%[in_int]) \n\t"
1410 "srl %[t0], %[t0], 31 \n\t"
1411 "srl %[t1], %[t1], 31 \n\t"
1412 "srl %[t2], %[t2], 31 \n\t"
1413 "srl %[t3], %[t3], 31 \n\t"
1414 "subu %[t4], $zero, %[qc1] \n\t"
1415 "subu %[t5], $zero, %[qc2] \n\t"
1416 "subu %[t6], $zero, %[qc3] \n\t"
1417 "subu %[t7], $zero, %[qc4] \n\t"
1418 "movn %[qc1], %[t4], %[t0] \n\t"
1419 "movn %[qc2], %[t5], %[t1] \n\t"
1420 "movn %[qc3], %[t6], %[t2] \n\t"
1421 "movn %[qc4], %[t7], %[t3] \n\t"
1425 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1426 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1427 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1428 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1429 : [in_int]
"r"(in_int)
1442 curbits += p_bits[curidx];
1443 vec = &p_codes[curidx*4];
1447 ".set noreorder \n\t"
1449 "lwc1 $f0, 0(%[in_pos]) \n\t"
1450 "lwc1 $f1, 0(%[vec]) \n\t"
1451 "lwc1 $f2, 4(%[in_pos]) \n\t"
1452 "lwc1 $f3, 4(%[vec]) \n\t"
1453 "lwc1 $f4, 8(%[in_pos]) \n\t"
1454 "lwc1 $f5, 8(%[vec]) \n\t"
1455 "lwc1 $f6, 12(%[in_pos]) \n\t"
1456 "lwc1 $f7, 12(%[vec]) \n\t"
1457 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1458 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1459 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1460 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1464 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1465 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1466 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1468 :
"$f0",
"$f1",
"$f2",
"$f3",
1469 "$f4",
"$f5",
"$f6",
"$f7",
1473 cost += di0 * di0 + di1 * di1
1474 + di2 * di2 + di3 * di3;
1479 return cost * lambda + curbits;
1482 static float get_band_cost_UQUAD_mips(
struct AACEncContext *s,
1484 const float *scaled,
int size,
int scale_idx,
1485 int cb,
const float lambda,
const float uplim,
1493 int qc1, qc2, qc3, qc4;
1498 for (i = 0; i <
size; i += 4) {
1501 float *in_pos = (
float *)&in[i];
1502 float di0, di1, di2, di3;
1505 qc1 = scaled[i ] * Q34 + 0.4054f;
1506 qc2 = scaled[i+1] * Q34 + 0.4054f;
1507 qc3 = scaled[i+2] * Q34 + 0.4054f;
1508 qc4 = scaled[i+3] * Q34 + 0.4054f;
1512 ".set noreorder \n\t"
1514 "ori %[t4], $zero, 2 \n\t"
1515 "slt %[t0], %[t4], %[qc1] \n\t"
1516 "slt %[t1], %[t4], %[qc2] \n\t"
1517 "slt %[t2], %[t4], %[qc3] \n\t"
1518 "slt %[t3], %[t4], %[qc4] \n\t"
1519 "movn %[qc1], %[t4], %[t0] \n\t"
1520 "movn %[qc2], %[t4], %[t1] \n\t"
1521 "movn %[qc3], %[t4], %[t2] \n\t"
1522 "movn %[qc4], %[t4], %[t3] \n\t"
1526 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1527 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1528 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1540 curbits += p_bits[curidx];
1541 curbits += uquad_sign_bits[curidx];
1542 vec = &p_codes[curidx*4];
1546 ".set noreorder \n\t"
1548 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1549 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1550 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1551 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1552 "abs.s %[di0], %[di0] \n\t"
1553 "abs.s %[di1], %[di1] \n\t"
1554 "abs.s %[di2], %[di2] \n\t"
1555 "abs.s %[di3], %[di3] \n\t"
1556 "lwc1 $f0, 0(%[vec]) \n\t"
1557 "lwc1 $f1, 4(%[vec]) \n\t"
1558 "lwc1 $f2, 8(%[vec]) \n\t"
1559 "lwc1 $f3, 12(%[vec]) \n\t"
1560 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1561 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1562 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1563 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1567 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1568 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1569 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1571 :
"$f0",
"$f1",
"$f2",
"$f3",
1575 cost += di0 * di0 + di1 * di1
1576 + di2 * di2 + di3 * di3;
1581 return cost * lambda + curbits;
1584 static float get_band_cost_SPAIR_mips(
struct AACEncContext *s,
1586 const float *scaled,
int size,
int scale_idx,
1587 int cb,
const float lambda,
const float uplim,
1594 int qc1, qc2, qc3, qc4;
1600 for (i = 0; i <
size; i += 4) {
1601 const float *vec, *vec2;
1602 int curidx, curidx2;
1603 int *in_int = (
int *)&in[i];
1604 float *in_pos = (
float *)&in[i];
1605 float di0, di1, di2, di3;
1608 qc1 = scaled[i ] * Q34 + 0.4054f;
1609 qc2 = scaled[i+1] * Q34 + 0.4054f;
1610 qc3 = scaled[i+2] * Q34 + 0.4054f;
1611 qc4 = scaled[i+3] * Q34 + 0.4054f;
1615 ".set noreorder \n\t"
1617 "ori %[t4], $zero, 4 \n\t"
1618 "slt %[t0], %[t4], %[qc1] \n\t"
1619 "slt %[t1], %[t4], %[qc2] \n\t"
1620 "slt %[t2], %[t4], %[qc3] \n\t"
1621 "slt %[t3], %[t4], %[qc4] \n\t"
1622 "movn %[qc1], %[t4], %[t0] \n\t"
1623 "movn %[qc2], %[t4], %[t1] \n\t"
1624 "movn %[qc3], %[t4], %[t2] \n\t"
1625 "movn %[qc4], %[t4], %[t3] \n\t"
1626 "lw %[t0], 0(%[in_int]) \n\t"
1627 "lw %[t1], 4(%[in_int]) \n\t"
1628 "lw %[t2], 8(%[in_int]) \n\t"
1629 "lw %[t3], 12(%[in_int]) \n\t"
1630 "srl %[t0], %[t0], 31 \n\t"
1631 "srl %[t1], %[t1], 31 \n\t"
1632 "srl %[t2], %[t2], 31 \n\t"
1633 "srl %[t3], %[t3], 31 \n\t"
1634 "subu %[t4], $zero, %[qc1] \n\t"
1635 "subu %[t5], $zero, %[qc2] \n\t"
1636 "subu %[t6], $zero, %[qc3] \n\t"
1637 "subu %[t7], $zero, %[qc4] \n\t"
1638 "movn %[qc1], %[t4], %[t0] \n\t"
1639 "movn %[qc2], %[t5], %[t1] \n\t"
1640 "movn %[qc3], %[t6], %[t2] \n\t"
1641 "movn %[qc4], %[t7], %[t3] \n\t"
1645 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1646 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1647 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1648 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1649 : [in_int]
"r"(in_int)
1657 curidx2 += qc4 + 40;
1659 curbits += p_bits[curidx];
1660 curbits += p_bits[curidx2];
1662 vec = &p_codes[curidx*2];
1663 vec2 = &p_codes[curidx2*2];
1667 ".set noreorder \n\t"
1669 "lwc1 $f0, 0(%[in_pos]) \n\t"
1670 "lwc1 $f1, 0(%[vec]) \n\t"
1671 "lwc1 $f2, 4(%[in_pos]) \n\t"
1672 "lwc1 $f3, 4(%[vec]) \n\t"
1673 "lwc1 $f4, 8(%[in_pos]) \n\t"
1674 "lwc1 $f5, 0(%[vec2]) \n\t"
1675 "lwc1 $f6, 12(%[in_pos]) \n\t"
1676 "lwc1 $f7, 4(%[vec2]) \n\t"
1677 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1678 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1679 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1680 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1684 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1685 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1686 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1687 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1688 :
"$f0",
"$f1",
"$f2",
"$f3",
1689 "$f4",
"$f5",
"$f6",
"$f7",
1693 cost += di0 * di0 + di1 * di1
1694 + di2 * di2 + di3 * di3;
1699 return cost * lambda + curbits;
1702 static float get_band_cost_UPAIR7_mips(
struct AACEncContext *s,
1704 const float *scaled,
int size,
int scale_idx,
1705 int cb,
const float lambda,
const float uplim,
1712 int qc1, qc2, qc3, qc4;
1718 for (i = 0; i <
size; i += 4) {
1719 const float *vec, *vec2;
1720 int curidx, curidx2, sign1, count1, sign2, count2;
1721 int *in_int = (
int *)&in[i];
1722 float *in_pos = (
float *)&in[i];
1723 float di0, di1, di2, di3;
1726 qc1 = scaled[i ] * Q34 + 0.4054f;
1727 qc2 = scaled[i+1] * Q34 + 0.4054f;
1728 qc3 = scaled[i+2] * Q34 + 0.4054f;
1729 qc4 = scaled[i+3] * Q34 + 0.4054f;
1733 ".set noreorder \n\t"
1735 "ori %[t4], $zero, 7 \n\t"
1736 "ori %[sign1], $zero, 0 \n\t"
1737 "ori %[sign2], $zero, 0 \n\t"
1738 "slt %[t0], %[t4], %[qc1] \n\t"
1739 "slt %[t1], %[t4], %[qc2] \n\t"
1740 "slt %[t2], %[t4], %[qc3] \n\t"
1741 "slt %[t3], %[t4], %[qc4] \n\t"
1742 "movn %[qc1], %[t4], %[t0] \n\t"
1743 "movn %[qc2], %[t4], %[t1] \n\t"
1744 "movn %[qc3], %[t4], %[t2] \n\t"
1745 "movn %[qc4], %[t4], %[t3] \n\t"
1746 "lw %[t0], 0(%[in_int]) \n\t"
1747 "lw %[t1], 4(%[in_int]) \n\t"
1748 "lw %[t2], 8(%[in_int]) \n\t"
1749 "lw %[t3], 12(%[in_int]) \n\t"
1750 "slt %[t0], %[t0], $zero \n\t"
1751 "movn %[sign1], %[t0], %[qc1] \n\t"
1752 "slt %[t2], %[t2], $zero \n\t"
1753 "movn %[sign2], %[t2], %[qc3] \n\t"
1754 "slt %[t1], %[t1], $zero \n\t"
1755 "sll %[t0], %[sign1], 1 \n\t"
1756 "or %[t0], %[t0], %[t1] \n\t"
1757 "movn %[sign1], %[t0], %[qc2] \n\t"
1758 "slt %[t3], %[t3], $zero \n\t"
1759 "sll %[t0], %[sign2], 1 \n\t"
1760 "or %[t0], %[t0], %[t3] \n\t"
1761 "movn %[sign2], %[t0], %[qc4] \n\t"
1762 "slt %[count1], $zero, %[qc1] \n\t"
1763 "slt %[t1], $zero, %[qc2] \n\t"
1764 "slt %[count2], $zero, %[qc3] \n\t"
1765 "slt %[t2], $zero, %[qc4] \n\t"
1766 "addu %[count1], %[count1], %[t1] \n\t"
1767 "addu %[count2], %[count2], %[t2] \n\t"
1771 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1772 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1773 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
1774 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
1775 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1777 : [in_int]
"r"(in_int)
1787 curbits += p_bits[curidx];
1788 curbits += upair7_sign_bits[curidx];
1789 vec = &p_codes[curidx*2];
1791 curbits += p_bits[curidx2];
1792 curbits += upair7_sign_bits[curidx2];
1793 vec2 = &p_codes[curidx2*2];
1797 ".set noreorder \n\t"
1799 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1800 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1801 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1802 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1803 "abs.s %[di0], %[di0] \n\t"
1804 "abs.s %[di1], %[di1] \n\t"
1805 "abs.s %[di2], %[di2] \n\t"
1806 "abs.s %[di3], %[di3] \n\t"
1807 "lwc1 $f0, 0(%[vec]) \n\t"
1808 "lwc1 $f1, 4(%[vec]) \n\t"
1809 "lwc1 $f2, 0(%[vec2]) \n\t"
1810 "lwc1 $f3, 4(%[vec2]) \n\t"
1811 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1812 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1813 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1814 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1818 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1819 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1820 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1821 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1822 :
"$f0",
"$f1",
"$f2",
"$f3",
1826 cost += di0 * di0 + di1 * di1
1827 + di2 * di2 + di3 * di3;
1832 return cost * lambda + curbits;
1835 static float get_band_cost_UPAIR12_mips(
struct AACEncContext *s,
1837 const float *scaled,
int size,
int scale_idx,
1838 int cb,
const float lambda,
const float uplim,
1845 int qc1, qc2, qc3, qc4;
1851 for (i = 0; i <
size; i += 4) {
1852 const float *vec, *vec2;
1853 int curidx, curidx2;
1854 int sign1, count1, sign2, count2;
1855 int *in_int = (
int *)&in[i];
1856 float *in_pos = (
float *)&in[i];
1857 float di0, di1, di2, di3;
1860 qc1 = scaled[i ] * Q34 + 0.4054f;
1861 qc2 = scaled[i+1] * Q34 + 0.4054f;
1862 qc3 = scaled[i+2] * Q34 + 0.4054f;
1863 qc4 = scaled[i+3] * Q34 + 0.4054f;
1867 ".set noreorder \n\t"
1869 "ori %[t4], $zero, 12 \n\t"
1870 "ori %[sign1], $zero, 0 \n\t"
1871 "ori %[sign2], $zero, 0 \n\t"
1872 "slt %[t0], %[t4], %[qc1] \n\t"
1873 "slt %[t1], %[t4], %[qc2] \n\t"
1874 "slt %[t2], %[t4], %[qc3] \n\t"
1875 "slt %[t3], %[t4], %[qc4] \n\t"
1876 "movn %[qc1], %[t4], %[t0] \n\t"
1877 "movn %[qc2], %[t4], %[t1] \n\t"
1878 "movn %[qc3], %[t4], %[t2] \n\t"
1879 "movn %[qc4], %[t4], %[t3] \n\t"
1880 "lw %[t0], 0(%[in_int]) \n\t"
1881 "lw %[t1], 4(%[in_int]) \n\t"
1882 "lw %[t2], 8(%[in_int]) \n\t"
1883 "lw %[t3], 12(%[in_int]) \n\t"
1884 "slt %[t0], %[t0], $zero \n\t"
1885 "movn %[sign1], %[t0], %[qc1] \n\t"
1886 "slt %[t2], %[t2], $zero \n\t"
1887 "movn %[sign2], %[t2], %[qc3] \n\t"
1888 "slt %[t1], %[t1], $zero \n\t"
1889 "sll %[t0], %[sign1], 1 \n\t"
1890 "or %[t0], %[t0], %[t1] \n\t"
1891 "movn %[sign1], %[t0], %[qc2] \n\t"
1892 "slt %[t3], %[t3], $zero \n\t"
1893 "sll %[t0], %[sign2], 1 \n\t"
1894 "or %[t0], %[t0], %[t3] \n\t"
1895 "movn %[sign2], %[t0], %[qc4] \n\t"
1896 "slt %[count1], $zero, %[qc1] \n\t"
1897 "slt %[t1], $zero, %[qc2] \n\t"
1898 "slt %[count2], $zero, %[qc3] \n\t"
1899 "slt %[t2], $zero, %[qc4] \n\t"
1900 "addu %[count1], %[count1], %[t1] \n\t"
1901 "addu %[count2], %[count2], %[t2] \n\t"
1905 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1906 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1907 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
1908 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
1909 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1911 : [in_int]
"r"(in_int)
1921 curbits += p_bits[curidx];
1922 curbits += p_bits[curidx2];
1923 curbits += upair12_sign_bits[curidx];
1924 curbits += upair12_sign_bits[curidx2];
1925 vec = &p_codes[curidx*2];
1926 vec2 = &p_codes[curidx2*2];
1930 ".set noreorder \n\t"
1932 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1933 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1934 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1935 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1936 "abs.s %[di0], %[di0] \n\t"
1937 "abs.s %[di1], %[di1] \n\t"
1938 "abs.s %[di2], %[di2] \n\t"
1939 "abs.s %[di3], %[di3] \n\t"
1940 "lwc1 $f0, 0(%[vec]) \n\t"
1941 "lwc1 $f1, 4(%[vec]) \n\t"
1942 "lwc1 $f2, 0(%[vec2]) \n\t"
1943 "lwc1 $f3, 4(%[vec2]) \n\t"
1944 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1945 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1946 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1947 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1951 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1952 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1953 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1954 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1955 :
"$f0",
"$f1",
"$f2",
"$f3",
1959 cost += di0 * di0 + di1 * di1
1960 + di2 * di2 + di3 * di3;
1965 return cost * lambda + curbits;
1968 static float get_band_cost_ESC_mips(
struct AACEncContext *s,
1970 const float *scaled,
int size,
int scale_idx,
1971 int cb,
const float lambda,
const float uplim,
1976 const float CLIPPED_ESCAPE = 165140.0f * IQ;
1979 int qc1, qc2, qc3, qc4;
1985 for (i = 0; i <
size; i += 4) {
1986 const float *vec, *vec2;
1987 int curidx, curidx2;
1989 float di1, di2, di3, di4;
1990 int cond0, cond1, cond2, cond3;
1994 qc1 = scaled[i ] * Q34 + 0.4054f;
1995 qc2 = scaled[i+1] * Q34 + 0.4054f;
1996 qc3 = scaled[i+2] * Q34 + 0.4054f;
1997 qc4 = scaled[i+3] * Q34 + 0.4054f;
2001 ".set noreorder \n\t"
2003 "ori %[t6], $zero, 15 \n\t"
2004 "ori %[t7], $zero, 16 \n\t"
2005 "shll_s.w %[c1], %[qc1], 18 \n\t"
2006 "shll_s.w %[c2], %[qc2], 18 \n\t"
2007 "shll_s.w %[c3], %[qc3], 18 \n\t"
2008 "shll_s.w %[c4], %[qc4], 18 \n\t"
2009 "srl %[c1], %[c1], 18 \n\t"
2010 "srl %[c2], %[c2], 18 \n\t"
2011 "srl %[c3], %[c3], 18 \n\t"
2012 "srl %[c4], %[c4], 18 \n\t"
2013 "slt %[cond0], %[t6], %[qc1] \n\t"
2014 "slt %[cond1], %[t6], %[qc2] \n\t"
2015 "slt %[cond2], %[t6], %[qc3] \n\t"
2016 "slt %[cond3], %[t6], %[qc4] \n\t"
2017 "movn %[qc1], %[t7], %[cond0] \n\t"
2018 "movn %[qc2], %[t7], %[cond1] \n\t"
2019 "movn %[qc3], %[t7], %[cond2] \n\t"
2020 "movn %[qc4], %[t7], %[cond3] \n\t"
2024 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
2025 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
2026 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
2027 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
2028 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
2029 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
2030 [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
2039 curbits += p_bits[curidx];
2040 curbits += esc_sign_bits[curidx];
2041 vec = &p_codes[curidx*2];
2043 curbits += p_bits[curidx2];
2044 curbits += esc_sign_bits[curidx2];
2045 vec2 = &p_codes[curidx2*2];
2047 curbits += (
av_log2(c1) * 2 - 3) & (-cond0);
2048 curbits += (
av_log2(c2) * 2 - 3) & (-cond1);
2049 curbits += (
av_log2(c3) * 2 - 3) & (-cond2);
2050 curbits += (
av_log2(c4) * 2 - 3) & (-cond3);
2053 t2 = fabsf(in[i+1]);
2054 t3 = fabsf(in[i+2]);
2055 t4 = fabsf(in[i+3]);
2058 if (t1 >= CLIPPED_ESCAPE) {
2059 di1 = t1 - CLIPPED_ESCAPE;
2061 di1 = t1 - c1 *
cbrtf(c1) * IQ;
2064 di1 = t1 - vec[0] * IQ;
2067 if (t2 >= CLIPPED_ESCAPE) {
2068 di2 = t2 - CLIPPED_ESCAPE;
2070 di2 = t2 - c2 *
cbrtf(c2) * IQ;
2073 di2 = t2 - vec[1] * IQ;
2076 if (t3 >= CLIPPED_ESCAPE) {
2077 di3 = t3 - CLIPPED_ESCAPE;
2079 di3 = t3 - c3 *
cbrtf(c3) * IQ;
2082 di3 = t3 - vec2[0] * IQ;
2085 if (t4 >= CLIPPED_ESCAPE) {
2086 di4 = t4 - CLIPPED_ESCAPE;
2088 di4 = t4 - c4 *
cbrtf(c4) * IQ;
2091 di4 = t4 - vec2[1]*IQ;
2093 cost += di1 * di1 + di2 * di2
2094 + di3 * di3 + di4 * di4;
2099 return cost * lambda + curbits;
2102 static float (*
const get_band_cost_arr[])(
struct AACEncContext *
s,
2104 const float *scaled,
int size,
int scale_idx,
2105 int cb,
const float lambda,
const float uplim,
2107 get_band_cost_ZERO_mips,
2108 get_band_cost_SQUAD_mips,
2109 get_band_cost_SQUAD_mips,
2110 get_band_cost_UQUAD_mips,
2111 get_band_cost_UQUAD_mips,
2112 get_band_cost_SPAIR_mips,
2113 get_band_cost_SPAIR_mips,
2114 get_band_cost_UPAIR7_mips,
2115 get_band_cost_UPAIR7_mips,
2116 get_band_cost_UPAIR12_mips,
2117 get_band_cost_UPAIR12_mips,
2118 get_band_cost_ESC_mips,
2121 #define get_band_cost( \
2122 s, pb, in, scaled, size, scale_idx, cb, \
2123 lambda, uplim, bits) \
2124 get_band_cost_arr[cb]( \
2125 s, pb, in, scaled, size, scale_idx, cb, \
2126 lambda, uplim, bits)
2129 const float *scaled,
int size,
int scale_idx,
2130 int cb,
const float lambda,
const float uplim,
2133 return get_band_cost(s,
NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
2136 static void search_for_quantizers_twoloop_mips(
AVCodecContext *avctx,
2141 int start = 0, i, w, w2,
g;
2143 float dists[128] = { 0 }, uplims[128];
2145 int fflag, minscaler;
2150 destbits =
FFMIN(destbits, 5800);
2159 sce->
zeroes[(w+w2)*16+g] = 1;
2164 uplims[w*16+
g] = uplim *512;
2167 minthr =
FFMIN(minthr, uplim);
2173 if (sce->
zeroes[w*16+g]) {
2196 minscaler = sce->
sf_idx[0];
2197 qstep = its ? 1 : 32;
2212 if (sce->
zeroes[w*16+g] || sce->
sf_idx[w*16+g] >= 218) {
2216 minscaler =
FFMIN(minscaler, sce->
sf_idx[w*16+g]);
2220 bits += quantize_band_cost_bits(s, coefs + w2*128,
2248 if (sce->
zeroes[w*16+g] || sce->
sf_idx[w*16+g] >= 218) {
2252 minscaler =
FFMIN(minscaler, sce->
sf_idx[w*16+g]);
2266 dists[w*16+
g] = dist -
bits;
2276 if (tbits > destbits) {
2277 for (i = 0; i < 128; i++)
2278 if (sce->
sf_idx[i] < 218 - qstep)
2281 for (i = 0; i < 128; i++)
2282 if (sce->
sf_idx[i] > 60 - qstep)
2286 if (!qstep && tbits > destbits*1.02 && sce->
sf_idx[0] < 217)
2294 int prevsc = sce->
sf_idx[w*16+
g];
2295 if (dists[w*16+g] > uplims[w*16+g] && sce->
sf_idx[w*16+g] > 60) {
2303 if (sce->
sf_idx[w*16+g] != prevsc)
2309 }
while (fflag && its < 10);
2315 int start = 0, i, w, w2,
g;
2316 float M[128],
S[128];
2325 float dist1 = 0.0f, dist2 = 0.0f;
2332 M[i ] = (sce0->
coeffs[start+w2*128+i ]
2333 + sce1->
coeffs[start+w2*128+i ]) * 0.5;
2334 M[i+1] = (sce0->
coeffs[start+w2*128+i+1]
2335 + sce1->
coeffs[start+w2*128+i+1]) * 0.5;
2336 M[i+2] = (sce0->
coeffs[start+w2*128+i+2]
2337 + sce1->
coeffs[start+w2*128+i+2]) * 0.5;
2338 M[i+3] = (sce0->
coeffs[start+w2*128+i+3]
2339 + sce1->
coeffs[start+w2*128+i+3]) * 0.5;
2342 - sce1->
coeffs[start+w2*128+i ];
2344 - sce1->
coeffs[start+w2*128+i+1];
2346 - sce1->
coeffs[start+w2*128+i+2];
2348 - sce1->
coeffs[start+w2*128+i+3];
2357 sce0->
sf_idx[(w+w2)*16+g],
2363 sce1->
sf_idx[(w+w2)*16+g],
2369 sce0->
sf_idx[(w+w2)*16+g],
2375 sce1->
sf_idx[(w+w2)*16+g],
2379 cpe->
ms_mask[w*16+
g] = dist2 < dist1;
2388 int win,
int group_len,
const float lambda)
2395 const int run_esc = (1 <<
run_bits) - 1;
2396 int idx, ppos,
count;
2397 int stackrun[120], stackcb[120], stack_len;
2403 for (cb = 0; cb < 12; cb++) {
2404 path[0][
cb].
cost = run_bits+4;
2406 path[0][
cb].
run = 0;
2408 for (swb = 0; swb < max_sfb; swb++) {
2410 if (sce->
zeroes[win*16 + swb]) {
2411 float cost_stay_here = path[swb][0].
cost;
2412 float cost_get_here = next_minbits + run_bits + 4;
2416 if (cost_get_here < cost_stay_here) {
2417 path[swb+1][0].
prev_idx = next_mincb;
2418 path[swb+1][0].
cost = cost_get_here;
2419 path[swb+1][0].
run = 1;
2422 path[swb+1][0].
cost = cost_stay_here;
2423 path[swb+1][0].
run = path[swb][0].
run + 1;
2425 next_minbits = path[swb+1][0].
cost;
2427 for (cb = 1; cb < 12; cb++) {
2428 path[swb+1][
cb].
cost = 61450;
2430 path[swb+1][
cb].
run = 0;
2433 float minbits = next_minbits;
2434 int mincb = next_mincb;
2435 int startcb = sce->
band_type[win*16+swb];
2438 for (cb = 0; cb < startcb; cb++) {
2439 path[swb+1][
cb].
cost = 61450;
2441 path[swb+1][
cb].
run = 0;
2443 for (cb = startcb; cb < 12; cb++) {
2444 float cost_stay_here, cost_get_here;
2446 for (w = 0; w < group_len; w++) {
2447 bits += quantize_band_cost_bits(s, sce->
coeffs + start + w*128,
2448 s->
scoefs + start + w*128, size,
2449 sce->
sf_idx[(win+w)*16+swb], cb,
2453 cost_get_here = minbits + bits + run_bits + 4;
2457 if (cost_get_here < cost_stay_here) {
2459 path[swb+1][
cb].
cost = cost_get_here;
2460 path[swb+1][
cb].
run = 1;
2463 path[swb+1][
cb].
cost = cost_stay_here;
2466 if (path[swb+1][cb].cost < next_minbits) {
2467 next_minbits = path[swb+1][
cb].
cost;
2477 for (cb = 1; cb < 12; cb++)
2478 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
2484 stackrun[stack_len] = path[ppos][
cb].
run;
2485 stackcb [stack_len] =
cb;
2487 ppos -= path[ppos][
cb].
run;
2492 for (i = stack_len - 1; i >= 0; i--) {
2494 count = stackrun[i];
2495 memset(sce->
zeroes + win*16 + start, !stackcb[i], count);
2496 for (j = 0; j <
count; j++) {
2500 while (count >= run_esc) {