74 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
75 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
76 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
77 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
81 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
88 static const uint8_t uquad_sign_bits[81] = {
89 0, 1, 1, 1, 2, 2, 1, 2, 2,
90 1, 2, 2, 2, 3, 3, 2, 3, 3,
91 1, 2, 2, 2, 3, 3, 2, 3, 3,
92 1, 2, 2, 2, 3, 3, 2, 3, 3,
93 2, 3, 3, 3, 4, 4, 3, 4, 4,
94 2, 3, 3, 3, 4, 4, 3, 4, 4,
95 1, 2, 2, 2, 3, 3, 2, 3, 3,
96 2, 3, 3, 3, 4, 4, 3, 4, 4,
97 2, 3, 3, 3, 4, 4, 3, 4, 4
100 static const uint8_t upair7_sign_bits[64] = {
101 0, 1, 1, 1, 1, 1, 1, 1,
102 1, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2,
111 static const uint8_t upair12_sign_bits[169] = {
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
115 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
127 static const uint8_t esc_sign_bits[289] = {
128 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
134 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
135 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
136 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
140 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
144 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
148 #ifndef USE_REALLY_FULL_SEARCH
151 float ax, bx, cx, dx;
153 for (i = 0; i <
size; i += 4) {
177 static float find_max_val(
int group_len,
int swb_size,
const float *scaled) {
180 for (w2 = 0; w2 < group_len; w2++) {
181 for (i = 0; i < swb_size; i++) {
182 maxval =
FFMAX(maxval, scaled[w2*128+i]);
190 float Q34 = sqrtf(Q * sqrtf(Q));
192 qmaxval = maxval * Q34 + 0.4054f;
193 if (qmaxval == 0) cb = 0;
194 else if (qmaxval == 1) cb = 1;
195 else if (qmaxval == 2) cb = 3;
196 else if (qmaxval <= 4) cb = 5;
197 else if (qmaxval <= 7) cb = 7;
198 else if (qmaxval <= 12) cb = 9;
206 static void quantize_and_encode_band_cost_SQUAD_mips(
struct AACEncContext *
s,
208 const float *scaled,
int size,
int scale_idx,
209 int cb,
const float lambda,
const float uplim,
214 int qc1, qc2, qc3, qc4;
221 for (i = 0; i <
size; i += 4) {
223 int *in_int = (
int *)&in[i];
225 qc1 = scaled[i ] * Q34 + 0.4054f;
226 qc2 = scaled[i+1] * Q34 + 0.4054f;
227 qc3 = scaled[i+2] * Q34 + 0.4054f;
228 qc4 = scaled[i+3] * Q34 + 0.4054f;
232 ".set noreorder \n\t"
234 "slt %[qc1], $zero, %[qc1] \n\t"
235 "slt %[qc2], $zero, %[qc2] \n\t"
236 "slt %[qc3], $zero, %[qc3] \n\t"
237 "slt %[qc4], $zero, %[qc4] \n\t"
238 "lw $t0, 0(%[in_int]) \n\t"
239 "lw $t1, 4(%[in_int]) \n\t"
240 "lw $t2, 8(%[in_int]) \n\t"
241 "lw $t3, 12(%[in_int]) \n\t"
242 "srl $t0, $t0, 31 \n\t"
243 "srl $t1, $t1, 31 \n\t"
244 "srl $t2, $t2, 31 \n\t"
245 "srl $t3, $t3, 31 \n\t"
246 "subu $t4, $zero, %[qc1] \n\t"
247 "subu $t5, $zero, %[qc2] \n\t"
248 "subu $t6, $zero, %[qc3] \n\t"
249 "subu $t7, $zero, %[qc4] \n\t"
250 "movn %[qc1], $t4, $t0 \n\t"
251 "movn %[qc2], $t5, $t1 \n\t"
252 "movn %[qc3], $t6, $t2 \n\t"
253 "movn %[qc4], $t7, $t3 \n\t"
257 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
258 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
259 : [in_int]
"r"(in_int)
260 :
"t0",
"t1",
"t2",
"t3",
261 "t4",
"t5",
"t6",
"t7",
274 put_bits(pb, p_bits[curidx], p_codes[curidx]);
278 static void quantize_and_encode_band_cost_UQUAD_mips(
struct AACEncContext *s,
280 const float *scaled,
int size,
int scale_idx,
281 int cb,
const float lambda,
const float uplim,
286 int qc1, qc2, qc3, qc4;
293 for (i = 0; i <
size; i += 4) {
294 int curidx, sign,
count;
295 int *in_int = (
int *)&in[i];
297 unsigned int v_codes;
299 qc1 = scaled[i ] * Q34 + 0.4054f;
300 qc2 = scaled[i+1] * Q34 + 0.4054f;
301 qc3 = scaled[i+2] * Q34 + 0.4054f;
302 qc4 = scaled[i+3] * Q34 + 0.4054f;
306 ".set noreorder \n\t"
308 "ori $t4, $zero, 2 \n\t"
309 "ori %[sign], $zero, 0 \n\t"
310 "slt $t0, $t4, %[qc1] \n\t"
311 "slt $t1, $t4, %[qc2] \n\t"
312 "slt $t2, $t4, %[qc3] \n\t"
313 "slt $t3, $t4, %[qc4] \n\t"
314 "movn %[qc1], $t4, $t0 \n\t"
315 "movn %[qc2], $t4, $t1 \n\t"
316 "movn %[qc3], $t4, $t2 \n\t"
317 "movn %[qc4], $t4, $t3 \n\t"
318 "lw $t0, 0(%[in_int]) \n\t"
319 "lw $t1, 4(%[in_int]) \n\t"
320 "lw $t2, 8(%[in_int]) \n\t"
321 "lw $t3, 12(%[in_int]) \n\t"
322 "slt $t0, $t0, $zero \n\t"
323 "movn %[sign], $t0, %[qc1] \n\t"
324 "slt $t1, $t1, $zero \n\t"
325 "slt $t2, $t2, $zero \n\t"
326 "slt $t3, $t3, $zero \n\t"
327 "sll $t0, %[sign], 1 \n\t"
328 "or $t0, $t0, $t1 \n\t"
329 "movn %[sign], $t0, %[qc2] \n\t"
330 "slt $t4, $zero, %[qc1] \n\t"
331 "slt $t1, $zero, %[qc2] \n\t"
332 "slt %[count], $zero, %[qc3] \n\t"
333 "sll $t0, %[sign], 1 \n\t"
334 "or $t0, $t0, $t2 \n\t"
335 "movn %[sign], $t0, %[qc3] \n\t"
336 "slt $t2, $zero, %[qc4] \n\t"
337 "addu %[count], %[count], $t4 \n\t"
338 "addu %[count], %[count], $t1 \n\t"
339 "sll $t0, %[sign], 1 \n\t"
340 "or $t0, $t0, $t3 \n\t"
341 "movn %[sign], $t0, %[qc4] \n\t"
342 "addu %[count], %[count], $t2 \n\t"
346 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
347 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
348 [sign]
"=&r"(sign), [count]
"=&r"(count)
349 : [in_int]
"r"(in_int)
350 :
"t0",
"t1",
"t2",
"t3",
"t4",
362 v_codes = (p_codes[curidx] <<
count) | (sign & ((1 << count) - 1));
363 v_bits = p_bits[curidx] +
count;
368 static void quantize_and_encode_band_cost_SPAIR_mips(
struct AACEncContext *s,
370 const float *scaled,
int size,
int scale_idx,
371 int cb,
const float lambda,
const float uplim,
376 int qc1, qc2, qc3, qc4;
383 for (i = 0; i <
size; i += 4) {
385 int *in_int = (
int *)&in[i];
387 unsigned int v_codes;
389 qc1 = scaled[i ] * Q34 + 0.4054f;
390 qc2 = scaled[i+1] * Q34 + 0.4054f;
391 qc3 = scaled[i+2] * Q34 + 0.4054f;
392 qc4 = scaled[i+3] * Q34 + 0.4054f;
396 ".set noreorder \n\t"
398 "ori $t4, $zero, 4 \n\t"
399 "slt $t0, $t4, %[qc1] \n\t"
400 "slt $t1, $t4, %[qc2] \n\t"
401 "slt $t2, $t4, %[qc3] \n\t"
402 "slt $t3, $t4, %[qc4] \n\t"
403 "movn %[qc1], $t4, $t0 \n\t"
404 "movn %[qc2], $t4, $t1 \n\t"
405 "movn %[qc3], $t4, $t2 \n\t"
406 "movn %[qc4], $t4, $t3 \n\t"
407 "lw $t0, 0(%[in_int]) \n\t"
408 "lw $t1, 4(%[in_int]) \n\t"
409 "lw $t2, 8(%[in_int]) \n\t"
410 "lw $t3, 12(%[in_int]) \n\t"
411 "srl $t0, $t0, 31 \n\t"
412 "srl $t1, $t1, 31 \n\t"
413 "srl $t2, $t2, 31 \n\t"
414 "srl $t3, $t3, 31 \n\t"
415 "subu $t4, $zero, %[qc1] \n\t"
416 "subu $t5, $zero, %[qc2] \n\t"
417 "subu $t6, $zero, %[qc3] \n\t"
418 "subu $t7, $zero, %[qc4] \n\t"
419 "movn %[qc1], $t4, $t0 \n\t"
420 "movn %[qc2], $t5, $t1 \n\t"
421 "movn %[qc3], $t6, $t2 \n\t"
422 "movn %[qc4], $t7, $t3 \n\t"
426 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
427 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
428 : [in_int]
"r"(in_int)
429 :
"t0",
"t1",
"t2",
"t3",
430 "t4",
"t5",
"t6",
"t7",
440 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
441 v_bits = p_bits[curidx] + p_bits[curidx2];
446 static void quantize_and_encode_band_cost_UPAIR7_mips(
struct AACEncContext *s,
448 const float *scaled,
int size,
int scale_idx,
449 int cb,
const float lambda,
const float uplim,
454 int qc1, qc2, qc3, qc4;
461 for (i = 0; i <
size; i += 4) {
462 int curidx, sign1, count1, sign2, count2;
463 int *in_int = (
int *)&in[i];
465 unsigned int v_codes;
467 qc1 = scaled[i ] * Q34 + 0.4054f;
468 qc2 = scaled[i+1] * Q34 + 0.4054f;
469 qc3 = scaled[i+2] * Q34 + 0.4054f;
470 qc4 = scaled[i+3] * Q34 + 0.4054f;
474 ".set noreorder \n\t"
476 "ori $t4, $zero, 7 \n\t"
477 "ori %[sign1], $zero, 0 \n\t"
478 "ori %[sign2], $zero, 0 \n\t"
479 "slt $t0, $t4, %[qc1] \n\t"
480 "slt $t1, $t4, %[qc2] \n\t"
481 "slt $t2, $t4, %[qc3] \n\t"
482 "slt $t3, $t4, %[qc4] \n\t"
483 "movn %[qc1], $t4, $t0 \n\t"
484 "movn %[qc2], $t4, $t1 \n\t"
485 "movn %[qc3], $t4, $t2 \n\t"
486 "movn %[qc4], $t4, $t3 \n\t"
487 "lw $t0, 0(%[in_int]) \n\t"
488 "lw $t1, 4(%[in_int]) \n\t"
489 "lw $t2, 8(%[in_int]) \n\t"
490 "lw $t3, 12(%[in_int]) \n\t"
491 "slt $t0, $t0, $zero \n\t"
492 "movn %[sign1], $t0, %[qc1] \n\t"
493 "slt $t2, $t2, $zero \n\t"
494 "movn %[sign2], $t2, %[qc3] \n\t"
495 "slt $t1, $t1, $zero \n\t"
496 "sll $t0, %[sign1], 1 \n\t"
497 "or $t0, $t0, $t1 \n\t"
498 "movn %[sign1], $t0, %[qc2] \n\t"
499 "slt $t3, $t3, $zero \n\t"
500 "sll $t0, %[sign2], 1 \n\t"
501 "or $t0, $t0, $t3 \n\t"
502 "movn %[sign2], $t0, %[qc4] \n\t"
503 "slt %[count1], $zero, %[qc1] \n\t"
504 "slt $t1, $zero, %[qc2] \n\t"
505 "slt %[count2], $zero, %[qc3] \n\t"
506 "slt $t2, $zero, %[qc4] \n\t"
507 "addu %[count1], %[count1], $t1 \n\t"
508 "addu %[count2], %[count2], $t2 \n\t"
512 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
513 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
514 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
515 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2)
516 : [in_int]
"r"(in_int)
517 :
"t0",
"t1",
"t2",
"t3",
"t4",
524 v_codes = (p_codes[curidx] << count1) | sign1;
525 v_bits = p_bits[curidx] + count1;
531 v_codes = (p_codes[curidx] << count2) | sign2;
532 v_bits = p_bits[curidx] + count2;
537 static void quantize_and_encode_band_cost_UPAIR12_mips(
struct AACEncContext *s,
539 const float *scaled,
int size,
int scale_idx,
540 int cb,
const float lambda,
const float uplim,
545 int qc1, qc2, qc3, qc4;
552 for (i = 0; i <
size; i += 4) {
553 int curidx, sign1, count1, sign2, count2;
554 int *in_int = (
int *)&in[i];
556 unsigned int v_codes;
558 qc1 = scaled[i ] * Q34 + 0.4054f;
559 qc2 = scaled[i+1] * Q34 + 0.4054f;
560 qc3 = scaled[i+2] * Q34 + 0.4054f;
561 qc4 = scaled[i+3] * Q34 + 0.4054f;
565 ".set noreorder \n\t"
567 "ori $t4, $zero, 12 \n\t"
568 "ori %[sign1], $zero, 0 \n\t"
569 "ori %[sign2], $zero, 0 \n\t"
570 "slt $t0, $t4, %[qc1] \n\t"
571 "slt $t1, $t4, %[qc2] \n\t"
572 "slt $t2, $t4, %[qc3] \n\t"
573 "slt $t3, $t4, %[qc4] \n\t"
574 "movn %[qc1], $t4, $t0 \n\t"
575 "movn %[qc2], $t4, $t1 \n\t"
576 "movn %[qc3], $t4, $t2 \n\t"
577 "movn %[qc4], $t4, $t3 \n\t"
578 "lw $t0, 0(%[in_int]) \n\t"
579 "lw $t1, 4(%[in_int]) \n\t"
580 "lw $t2, 8(%[in_int]) \n\t"
581 "lw $t3, 12(%[in_int]) \n\t"
582 "slt $t0, $t0, $zero \n\t"
583 "movn %[sign1], $t0, %[qc1] \n\t"
584 "slt $t2, $t2, $zero \n\t"
585 "movn %[sign2], $t2, %[qc3] \n\t"
586 "slt $t1, $t1, $zero \n\t"
587 "sll $t0, %[sign1], 1 \n\t"
588 "or $t0, $t0, $t1 \n\t"
589 "movn %[sign1], $t0, %[qc2] \n\t"
590 "slt $t3, $t3, $zero \n\t"
591 "sll $t0, %[sign2], 1 \n\t"
592 "or $t0, $t0, $t3 \n\t"
593 "movn %[sign2], $t0, %[qc4] \n\t"
594 "slt %[count1], $zero, %[qc1] \n\t"
595 "slt $t1, $zero, %[qc2] \n\t"
596 "slt %[count2], $zero, %[qc3] \n\t"
597 "slt $t2, $zero, %[qc4] \n\t"
598 "addu %[count1], %[count1], $t1 \n\t"
599 "addu %[count2], %[count2], $t2 \n\t"
603 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
604 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
605 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
606 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2)
607 : [in_int]
"r"(in_int)
608 :
"t0",
"t1",
"t2",
"t3",
"t4",
615 v_codes = (p_codes[curidx] << count1) | sign1;
616 v_bits = p_bits[curidx] + count1;
622 v_codes = (p_codes[curidx] << count2) | sign2;
623 v_bits = p_bits[curidx] + count2;
628 static void quantize_and_encode_band_cost_ESC_mips(
struct AACEncContext *s,
630 const float *scaled,
int size,
int scale_idx,
631 int cb,
const float lambda,
const float uplim,
636 int qc1, qc2, qc3, qc4;
646 for (i = 0; i <
size; i += 4) {
647 int curidx, curidx2, sign1, count1, sign2, count2;
648 int *in_int = (
int *)&in[i];
650 unsigned int v_codes;
652 qc1 = scaled[i ] * Q34 + 0.4054f;
653 qc2 = scaled[i+1] * Q34 + 0.4054f;
654 qc3 = scaled[i+2] * Q34 + 0.4054f;
655 qc4 = scaled[i+3] * Q34 + 0.4054f;
659 ".set noreorder \n\t"
661 "ori $t4, $zero, 16 \n\t"
662 "ori %[sign1], $zero, 0 \n\t"
663 "ori %[sign2], $zero, 0 \n\t"
664 "slt $t0, $t4, %[qc1] \n\t"
665 "slt $t1, $t4, %[qc2] \n\t"
666 "slt $t2, $t4, %[qc3] \n\t"
667 "slt $t3, $t4, %[qc4] \n\t"
668 "movn %[qc1], $t4, $t0 \n\t"
669 "movn %[qc2], $t4, $t1 \n\t"
670 "movn %[qc3], $t4, $t2 \n\t"
671 "movn %[qc4], $t4, $t3 \n\t"
672 "lw $t0, 0(%[in_int]) \n\t"
673 "lw $t1, 4(%[in_int]) \n\t"
674 "lw $t2, 8(%[in_int]) \n\t"
675 "lw $t3, 12(%[in_int]) \n\t"
676 "slt $t0, $t0, $zero \n\t"
677 "movn %[sign1], $t0, %[qc1] \n\t"
678 "slt $t2, $t2, $zero \n\t"
679 "movn %[sign2], $t2, %[qc3] \n\t"
680 "slt $t1, $t1, $zero \n\t"
681 "sll $t0, %[sign1], 1 \n\t"
682 "or $t0, $t0, $t1 \n\t"
683 "movn %[sign1], $t0, %[qc2] \n\t"
684 "slt $t3, $t3, $zero \n\t"
685 "sll $t0, %[sign2], 1 \n\t"
686 "or $t0, $t0, $t3 \n\t"
687 "movn %[sign2], $t0, %[qc4] \n\t"
688 "slt %[count1], $zero, %[qc1] \n\t"
689 "slt $t1, $zero, %[qc2] \n\t"
690 "slt %[count2], $zero, %[qc3] \n\t"
691 "slt $t2, $zero, %[qc4] \n\t"
692 "addu %[count1], %[count1], $t1 \n\t"
693 "addu %[count2], %[count2], $t2 \n\t"
697 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
698 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
699 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
700 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2)
701 : [in_int]
"r"(in_int)
702 :
"t0",
"t1",
"t2",
"t3",
"t4",
711 v_codes = (p_codes[curidx] << count1) | sign1;
712 v_bits = p_bits[curidx] + count1;
715 v_codes = (p_codes[curidx2] << count2) | sign2;
716 v_bits = p_bits[curidx2] + count2;
720 for (i = 0; i <
size; i += 4) {
721 int curidx, curidx2, sign1, count1, sign2, count2;
722 int *in_int = (
int *)&in[i];
724 unsigned int v_codes;
727 qc1 = scaled[i ] * Q34 + 0.4054f;
728 qc2 = scaled[i+1] * Q34 + 0.4054f;
729 qc3 = scaled[i+2] * Q34 + 0.4054f;
730 qc4 = scaled[i+3] * Q34 + 0.4054f;
734 ".set noreorder \n\t"
736 "ori $t4, $zero, 16 \n\t"
737 "ori %[sign1], $zero, 0 \n\t"
738 "ori %[sign2], $zero, 0 \n\t"
739 "shll_s.w %[c1], %[qc1], 18 \n\t"
740 "shll_s.w %[c2], %[qc2], 18 \n\t"
741 "shll_s.w %[c3], %[qc3], 18 \n\t"
742 "shll_s.w %[c4], %[qc4], 18 \n\t"
743 "srl %[c1], %[c1], 18 \n\t"
744 "srl %[c2], %[c2], 18 \n\t"
745 "srl %[c3], %[c3], 18 \n\t"
746 "srl %[c4], %[c4], 18 \n\t"
747 "slt $t0, $t4, %[qc1] \n\t"
748 "slt $t1, $t4, %[qc2] \n\t"
749 "slt $t2, $t4, %[qc3] \n\t"
750 "slt $t3, $t4, %[qc4] \n\t"
751 "movn %[qc1], $t4, $t0 \n\t"
752 "movn %[qc2], $t4, $t1 \n\t"
753 "movn %[qc3], $t4, $t2 \n\t"
754 "movn %[qc4], $t4, $t3 \n\t"
755 "lw $t0, 0(%[in_int]) \n\t"
756 "lw $t1, 4(%[in_int]) \n\t"
757 "lw $t2, 8(%[in_int]) \n\t"
758 "lw $t3, 12(%[in_int]) \n\t"
759 "slt $t0, $t0, $zero \n\t"
760 "movn %[sign1], $t0, %[qc1] \n\t"
761 "slt $t2, $t2, $zero \n\t"
762 "movn %[sign2], $t2, %[qc3] \n\t"
763 "slt $t1, $t1, $zero \n\t"
764 "sll $t0, %[sign1], 1 \n\t"
765 "or $t0, $t0, $t1 \n\t"
766 "movn %[sign1], $t0, %[qc2] \n\t"
767 "slt $t3, $t3, $zero \n\t"
768 "sll $t0, %[sign2], 1 \n\t"
769 "or $t0, $t0, $t3 \n\t"
770 "movn %[sign2], $t0, %[qc4] \n\t"
771 "slt %[count1], $zero, %[qc1] \n\t"
772 "slt $t1, $zero, %[qc2] \n\t"
773 "slt %[count2], $zero, %[qc3] \n\t"
774 "slt $t2, $zero, %[qc4] \n\t"
775 "addu %[count1], %[count1], $t1 \n\t"
776 "addu %[count2], %[count2], $t2 \n\t"
780 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
781 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
782 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
783 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
784 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
785 [
c3]
"=&r"(
c3), [c4]
"=&r"(c4)
786 : [in_int]
"r"(in_int)
787 :
"t0",
"t1",
"t2",
"t3",
"t4",
797 v_codes = (p_codes[curidx] << count1) | sign1;
798 v_bits = p_bits[curidx] + count1;
801 if (p_vectors[curidx*2 ] == 64.0f) {
803 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 <<
len) - 1));
806 if (p_vectors[curidx*2+1] == 64.0f) {
808 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 <<
len) - 1));
812 v_codes = (p_codes[curidx2] << count2) | sign2;
813 v_bits = p_bits[curidx2] + count2;
816 if (p_vectors[curidx2*2 ] == 64.0f) {
818 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 <<
len) - 1));
821 if (p_vectors[curidx2*2+1] == 64.0f) {
823 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 <<
len) - 1));
832 const float *scaled,
int size,
int scale_idx,
833 int cb,
const float lambda,
const float uplim,
836 quantize_and_encode_band_cost_SQUAD_mips,
837 quantize_and_encode_band_cost_SQUAD_mips,
838 quantize_and_encode_band_cost_UQUAD_mips,
839 quantize_and_encode_band_cost_UQUAD_mips,
840 quantize_and_encode_band_cost_SPAIR_mips,
841 quantize_and_encode_band_cost_SPAIR_mips,
842 quantize_and_encode_band_cost_UPAIR7_mips,
843 quantize_and_encode_band_cost_UPAIR7_mips,
844 quantize_and_encode_band_cost_UPAIR12_mips,
845 quantize_and_encode_band_cost_UPAIR12_mips,
846 quantize_and_encode_band_cost_ESC_mips,
849 #define quantize_and_encode_band_cost( \
850 s, pb, in, scaled, size, scale_idx, cb, \
851 lambda, uplim, bits) \
852 quantize_and_encode_band_cost_arr[cb]( \
853 s, pb, in, scaled, size, scale_idx, cb, \
857 const float *in,
int size,
int scale_idx,
858 int cb,
const float lambda)
867 static float get_band_numbits_ZERO_mips(
struct AACEncContext *s,
869 const float *scaled,
int size,
int scale_idx,
870 int cb,
const float lambda,
const float uplim,
876 static float get_band_numbits_SQUAD_mips(
struct AACEncContext *s,
878 const float *scaled,
int size,
int scale_idx,
879 int cb,
const float lambda,
const float uplim,
884 int qc1, qc2, qc3, qc4;
889 for (i = 0; i <
size; i += 4) {
891 int *in_int = (
int *)&in[i];
893 qc1 = scaled[i ] * Q34 + 0.4054f;
894 qc2 = scaled[i+1] * Q34 + 0.4054f;
895 qc3 = scaled[i+2] * Q34 + 0.4054f;
896 qc4 = scaled[i+3] * Q34 + 0.4054f;
900 ".set noreorder \n\t"
902 "slt %[qc1], $zero, %[qc1] \n\t"
903 "slt %[qc2], $zero, %[qc2] \n\t"
904 "slt %[qc3], $zero, %[qc3] \n\t"
905 "slt %[qc4], $zero, %[qc4] \n\t"
906 "lw $t0, 0(%[in_int]) \n\t"
907 "lw $t1, 4(%[in_int]) \n\t"
908 "lw $t2, 8(%[in_int]) \n\t"
909 "lw $t3, 12(%[in_int]) \n\t"
910 "srl $t0, $t0, 31 \n\t"
911 "srl $t1, $t1, 31 \n\t"
912 "srl $t2, $t2, 31 \n\t"
913 "srl $t3, $t3, 31 \n\t"
914 "subu $t4, $zero, %[qc1] \n\t"
915 "subu $t5, $zero, %[qc2] \n\t"
916 "subu $t6, $zero, %[qc3] \n\t"
917 "subu $t7, $zero, %[qc4] \n\t"
918 "movn %[qc1], $t4, $t0 \n\t"
919 "movn %[qc2], $t5, $t1 \n\t"
920 "movn %[qc3], $t6, $t2 \n\t"
921 "movn %[qc4], $t7, $t3 \n\t"
925 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
926 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
927 : [in_int]
"r"(in_int)
928 :
"t0",
"t1",
"t2",
"t3",
929 "t4",
"t5",
"t6",
"t7",
942 curbits += p_bits[curidx];
947 static float get_band_numbits_UQUAD_mips(
struct AACEncContext *s,
949 const float *scaled,
int size,
int scale_idx,
950 int cb,
const float lambda,
const float uplim,
956 int qc1, qc2, qc3, qc4;
960 for (i = 0; i <
size; i += 4) {
963 qc1 = scaled[i ] * Q34 + 0.4054f;
964 qc2 = scaled[i+1] * Q34 + 0.4054f;
965 qc3 = scaled[i+2] * Q34 + 0.4054f;
966 qc4 = scaled[i+3] * Q34 + 0.4054f;
970 ".set noreorder \n\t"
972 "ori $t4, $zero, 2 \n\t"
973 "slt $t0, $t4, %[qc1] \n\t"
974 "slt $t1, $t4, %[qc2] \n\t"
975 "slt $t2, $t4, %[qc3] \n\t"
976 "slt $t3, $t4, %[qc4] \n\t"
977 "movn %[qc1], $t4, $t0 \n\t"
978 "movn %[qc2], $t4, $t1 \n\t"
979 "movn %[qc3], $t4, $t2 \n\t"
980 "movn %[qc4], $t4, $t3 \n\t"
984 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
985 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
987 :
"t0",
"t1",
"t2",
"t3",
"t4"
998 curbits += p_bits[curidx];
999 curbits += uquad_sign_bits[curidx];
1004 static float get_band_numbits_SPAIR_mips(
struct AACEncContext *s,
1006 const float *scaled,
int size,
int scale_idx,
1007 int cb,
const float lambda,
const float uplim,
1012 int qc1, qc2, qc3, qc4;
1017 for (i = 0; i <
size; i += 4) {
1018 int curidx, curidx2;
1019 int *in_int = (
int *)&in[i];
1021 qc1 = scaled[i ] * Q34 + 0.4054f;
1022 qc2 = scaled[i+1] * Q34 + 0.4054f;
1023 qc3 = scaled[i+2] * Q34 + 0.4054f;
1024 qc4 = scaled[i+3] * Q34 + 0.4054f;
1028 ".set noreorder \n\t"
1030 "ori $t4, $zero, 4 \n\t"
1031 "slt $t0, $t4, %[qc1] \n\t"
1032 "slt $t1, $t4, %[qc2] \n\t"
1033 "slt $t2, $t4, %[qc3] \n\t"
1034 "slt $t3, $t4, %[qc4] \n\t"
1035 "movn %[qc1], $t4, $t0 \n\t"
1036 "movn %[qc2], $t4, $t1 \n\t"
1037 "movn %[qc3], $t4, $t2 \n\t"
1038 "movn %[qc4], $t4, $t3 \n\t"
1039 "lw $t0, 0(%[in_int]) \n\t"
1040 "lw $t1, 4(%[in_int]) \n\t"
1041 "lw $t2, 8(%[in_int]) \n\t"
1042 "lw $t3, 12(%[in_int]) \n\t"
1043 "srl $t0, $t0, 31 \n\t"
1044 "srl $t1, $t1, 31 \n\t"
1045 "srl $t2, $t2, 31 \n\t"
1046 "srl $t3, $t3, 31 \n\t"
1047 "subu $t4, $zero, %[qc1] \n\t"
1048 "subu $t5, $zero, %[qc2] \n\t"
1049 "subu $t6, $zero, %[qc3] \n\t"
1050 "subu $t7, $zero, %[qc4] \n\t"
1051 "movn %[qc1], $t4, $t0 \n\t"
1052 "movn %[qc2], $t5, $t1 \n\t"
1053 "movn %[qc3], $t6, $t2 \n\t"
1054 "movn %[qc4], $t7, $t3 \n\t"
1058 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1059 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
1060 : [in_int]
"r"(in_int)
1061 :
"t0",
"t1",
"t2",
"t3",
1062 "t4",
"t5",
"t6",
"t7",
1070 curidx2 += qc4 + 40;
1072 curbits += p_bits[curidx] + p_bits[curidx2];
1077 static float get_band_numbits_UPAIR7_mips(
struct AACEncContext *s,
1079 const float *scaled,
int size,
int scale_idx,
1080 int cb,
const float lambda,
const float uplim,
1085 int qc1, qc2, qc3, qc4;
1090 for (i = 0; i <
size; i += 4) {
1091 int curidx, curidx2;
1093 qc1 = scaled[i ] * Q34 + 0.4054f;
1094 qc2 = scaled[i+1] * Q34 + 0.4054f;
1095 qc3 = scaled[i+2] * Q34 + 0.4054f;
1096 qc4 = scaled[i+3] * Q34 + 0.4054f;
1100 ".set noreorder \n\t"
1102 "ori $t4, $zero, 7 \n\t"
1103 "slt $t0, $t4, %[qc1] \n\t"
1104 "slt $t1, $t4, %[qc2] \n\t"
1105 "slt $t2, $t4, %[qc3] \n\t"
1106 "slt $t3, $t4, %[qc4] \n\t"
1107 "movn %[qc1], $t4, $t0 \n\t"
1108 "movn %[qc2], $t4, $t1 \n\t"
1109 "movn %[qc3], $t4, $t2 \n\t"
1110 "movn %[qc4], $t4, $t3 \n\t"
1114 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1115 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
1117 :
"t0",
"t1",
"t2",
"t3",
"t4"
1126 curbits += p_bits[curidx] +
1127 upair7_sign_bits[curidx] +
1129 upair7_sign_bits[curidx2];
1134 static float get_band_numbits_UPAIR12_mips(
struct AACEncContext *s,
1136 const float *scaled,
int size,
int scale_idx,
1137 int cb,
const float lambda,
const float uplim,
1142 int qc1, qc2, qc3, qc4;
1147 for (i = 0; i <
size; i += 4) {
1148 int curidx, curidx2;
1150 qc1 = scaled[i ] * Q34 + 0.4054f;
1151 qc2 = scaled[i+1] * Q34 + 0.4054f;
1152 qc3 = scaled[i+2] * Q34 + 0.4054f;
1153 qc4 = scaled[i+3] * Q34 + 0.4054f;
1157 ".set noreorder \n\t"
1159 "ori $t4, $zero, 12 \n\t"
1160 "slt $t0, $t4, %[qc1] \n\t"
1161 "slt $t1, $t4, %[qc2] \n\t"
1162 "slt $t2, $t4, %[qc3] \n\t"
1163 "slt $t3, $t4, %[qc4] \n\t"
1164 "movn %[qc1], $t4, $t0 \n\t"
1165 "movn %[qc2], $t4, $t1 \n\t"
1166 "movn %[qc3], $t4, $t2 \n\t"
1167 "movn %[qc4], $t4, $t3 \n\t"
1171 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1172 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
1174 :
"t0",
"t1",
"t2",
"t3",
"t4"
1183 curbits += p_bits[curidx] +
1185 upair12_sign_bits[curidx] +
1186 upair12_sign_bits[curidx2];
1191 static float get_band_numbits_ESC_mips(
struct AACEncContext *s,
1193 const float *scaled,
int size,
int scale_idx,
1194 int cb,
const float lambda,
const float uplim,
1199 int qc1, qc2, qc3, qc4;
1204 for (i = 0; i <
size; i += 4) {
1205 int curidx, curidx2;
1206 int cond0, cond1, cond2, cond3;
1209 qc1 = scaled[i ] * Q34 + 0.4054f;
1210 qc2 = scaled[i+1] * Q34 + 0.4054f;
1211 qc3 = scaled[i+2] * Q34 + 0.4054f;
1212 qc4 = scaled[i+3] * Q34 + 0.4054f;
1216 ".set noreorder \n\t"
1218 "ori $t4, $zero, 15 \n\t"
1219 "ori $t5, $zero, 16 \n\t"
1220 "shll_s.w %[c1], %[qc1], 18 \n\t"
1221 "shll_s.w %[c2], %[qc2], 18 \n\t"
1222 "shll_s.w %[c3], %[qc3], 18 \n\t"
1223 "shll_s.w %[c4], %[qc4], 18 \n\t"
1224 "srl %[c1], %[c1], 18 \n\t"
1225 "srl %[c2], %[c2], 18 \n\t"
1226 "srl %[c3], %[c3], 18 \n\t"
1227 "srl %[c4], %[c4], 18 \n\t"
1228 "slt %[cond0], $t4, %[qc1] \n\t"
1229 "slt %[cond1], $t4, %[qc2] \n\t"
1230 "slt %[cond2], $t4, %[qc3] \n\t"
1231 "slt %[cond3], $t4, %[qc4] \n\t"
1232 "movn %[qc1], $t5, %[cond0] \n\t"
1233 "movn %[qc2], $t5, %[cond1] \n\t"
1234 "movn %[qc3], $t5, %[cond2] \n\t"
1235 "movn %[qc4], $t5, %[cond3] \n\t"
1236 "ori $t5, $zero, 31 \n\t"
1237 "clz %[c1], %[c1] \n\t"
1238 "clz %[c2], %[c2] \n\t"
1239 "clz %[c3], %[c3] \n\t"
1240 "clz %[c4], %[c4] \n\t"
1241 "subu %[c1], $t5, %[c1] \n\t"
1242 "subu %[c2], $t5, %[c2] \n\t"
1243 "subu %[c3], $t5, %[c3] \n\t"
1244 "subu %[c4], $t5, %[c4] \n\t"
1245 "sll %[c1], %[c1], 1 \n\t"
1246 "sll %[c2], %[c2], 1 \n\t"
1247 "sll %[c3], %[c3], 1 \n\t"
1248 "sll %[c4], %[c4], 1 \n\t"
1249 "addiu %[c1], %[c1], -3 \n\t"
1250 "addiu %[c2], %[c2], -3 \n\t"
1251 "addiu %[c3], %[c3], -3 \n\t"
1252 "addiu %[c4], %[c4], -3 \n\t"
1253 "subu %[cond0], $zero, %[cond0] \n\t"
1254 "subu %[cond1], $zero, %[cond1] \n\t"
1255 "subu %[cond2], $zero, %[cond2] \n\t"
1256 "subu %[cond3], $zero, %[cond3] \n\t"
1257 "and %[c1], %[c1], %[cond0] \n\t"
1258 "and %[c2], %[c2], %[cond1] \n\t"
1259 "and %[c3], %[c3], %[cond2] \n\t"
1260 "and %[c4], %[c4], %[cond3] \n\t"
1264 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1265 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1266 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
1267 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
1268 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
1269 [
c3]
"=&r"(
c3), [c4]
"=&r"(c4)
1280 curbits += p_bits[curidx];
1281 curbits += esc_sign_bits[curidx];
1282 curbits += p_bits[curidx2];
1283 curbits += esc_sign_bits[curidx2];
1293 static float (*
const get_band_numbits_arr[])(
struct AACEncContext *
s,
1295 const float *scaled,
int size,
int scale_idx,
1296 int cb,
const float lambda,
const float uplim,
1298 get_band_numbits_ZERO_mips,
1299 get_band_numbits_SQUAD_mips,
1300 get_band_numbits_SQUAD_mips,
1301 get_band_numbits_UQUAD_mips,
1302 get_band_numbits_UQUAD_mips,
1303 get_band_numbits_SPAIR_mips,
1304 get_band_numbits_SPAIR_mips,
1305 get_band_numbits_UPAIR7_mips,
1306 get_band_numbits_UPAIR7_mips,
1307 get_band_numbits_UPAIR12_mips,
1308 get_band_numbits_UPAIR12_mips,
1309 get_band_numbits_ESC_mips,
1312 #define get_band_numbits( \
1313 s, pb, in, scaled, size, scale_idx, cb, \
1314 lambda, uplim, bits) \
1315 get_band_numbits_arr[cb]( \
1316 s, pb, in, scaled, size, scale_idx, cb, \
1317 lambda, uplim, bits)
1319 static float quantize_band_cost_bits(
struct AACEncContext *s,
const float *in,
1320 const float *scaled,
int size,
int scale_idx,
1321 int cb,
const float lambda,
const float uplim,
1324 return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1331 static float get_band_cost_ZERO_mips(
struct AACEncContext *s,
1333 const float *scaled,
int size,
int scale_idx,
1334 int cb,
const float lambda,
const float uplim,
1340 for (i = 0; i <
size; i += 4) {
1341 cost += in[i ] * in[i ];
1342 cost += in[i+1] * in[i+1];
1343 cost += in[i+2] * in[i+2];
1344 cost += in[i+3] * in[i+3];
1351 static float get_band_cost_SQUAD_mips(
struct AACEncContext *s,
1353 const float *scaled,
int size,
int scale_idx,
1354 int cb,
const float lambda,
const float uplim,
1361 int qc1, qc2, qc3, qc4;
1367 for (i = 0; i <
size; i += 4) {
1370 int *in_int = (
int *)&in[i];
1371 float *in_pos = (
float *)&in[i];
1372 float di0, di1, di2, di3;
1374 qc1 = scaled[i ] * Q34 + 0.4054f;
1375 qc2 = scaled[i+1] * Q34 + 0.4054f;
1376 qc3 = scaled[i+2] * Q34 + 0.4054f;
1377 qc4 = scaled[i+3] * Q34 + 0.4054f;
1381 ".set noreorder \n\t"
1383 "slt %[qc1], $zero, %[qc1] \n\t"
1384 "slt %[qc2], $zero, %[qc2] \n\t"
1385 "slt %[qc3], $zero, %[qc3] \n\t"
1386 "slt %[qc4], $zero, %[qc4] \n\t"
1387 "lw $t0, 0(%[in_int]) \n\t"
1388 "lw $t1, 4(%[in_int]) \n\t"
1389 "lw $t2, 8(%[in_int]) \n\t"
1390 "lw $t3, 12(%[in_int]) \n\t"
1391 "srl $t0, $t0, 31 \n\t"
1392 "srl $t1, $t1, 31 \n\t"
1393 "srl $t2, $t2, 31 \n\t"
1394 "srl $t3, $t3, 31 \n\t"
1395 "subu $t4, $zero, %[qc1] \n\t"
1396 "subu $t5, $zero, %[qc2] \n\t"
1397 "subu $t6, $zero, %[qc3] \n\t"
1398 "subu $t7, $zero, %[qc4] \n\t"
1399 "movn %[qc1], $t4, $t0 \n\t"
1400 "movn %[qc2], $t5, $t1 \n\t"
1401 "movn %[qc3], $t6, $t2 \n\t"
1402 "movn %[qc4], $t7, $t3 \n\t"
1406 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1407 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
1408 : [in_int]
"r"(in_int)
1409 :
"t0",
"t1",
"t2",
"t3",
1410 "t4",
"t5",
"t6",
"t7",
1423 curbits += p_bits[curidx];
1424 vec = &p_codes[curidx*4];
1428 ".set noreorder \n\t"
1430 "lwc1 $f0, 0(%[in_pos]) \n\t"
1431 "lwc1 $f1, 0(%[vec]) \n\t"
1432 "lwc1 $f2, 4(%[in_pos]) \n\t"
1433 "lwc1 $f3, 4(%[vec]) \n\t"
1434 "lwc1 $f4, 8(%[in_pos]) \n\t"
1435 "lwc1 $f5, 8(%[vec]) \n\t"
1436 "lwc1 $f6, 12(%[in_pos]) \n\t"
1437 "lwc1 $f7, 12(%[vec]) \n\t"
1438 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1439 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1440 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1441 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1445 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1446 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1447 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1449 :
"$f0",
"$f1",
"$f2",
"$f3",
1450 "$f4",
"$f5",
"$f6",
"$f7",
1454 cost += di0 * di0 + di1 * di1
1455 + di2 * di2 + di3 * di3;
1460 return cost * lambda + curbits;
1463 static float get_band_cost_UQUAD_mips(
struct AACEncContext *s,
1465 const float *scaled,
int size,
int scale_idx,
1466 int cb,
const float lambda,
const float uplim,
1474 int qc1, qc2, qc3, qc4;
1479 for (i = 0; i <
size; i += 4) {
1482 float *in_pos = (
float *)&in[i];
1483 float di0, di1, di2, di3;
1485 qc1 = scaled[i ] * Q34 + 0.4054f;
1486 qc2 = scaled[i+1] * Q34 + 0.4054f;
1487 qc3 = scaled[i+2] * Q34 + 0.4054f;
1488 qc4 = scaled[i+3] * Q34 + 0.4054f;
1492 ".set noreorder \n\t"
1494 "ori $t4, $zero, 2 \n\t"
1495 "slt $t0, $t4, %[qc1] \n\t"
1496 "slt $t1, $t4, %[qc2] \n\t"
1497 "slt $t2, $t4, %[qc3] \n\t"
1498 "slt $t3, $t4, %[qc4] \n\t"
1499 "movn %[qc1], $t4, $t0 \n\t"
1500 "movn %[qc2], $t4, $t1 \n\t"
1501 "movn %[qc3], $t4, $t2 \n\t"
1502 "movn %[qc4], $t4, $t3 \n\t"
1506 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1507 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
1509 :
"t0",
"t1",
"t2",
"t3",
"t4"
1520 curbits += p_bits[curidx];
1521 curbits += uquad_sign_bits[curidx];
1522 vec = &p_codes[curidx*4];
1526 ".set noreorder \n\t"
1528 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1529 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1530 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1531 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1532 "abs.s %[di0], %[di0] \n\t"
1533 "abs.s %[di1], %[di1] \n\t"
1534 "abs.s %[di2], %[di2] \n\t"
1535 "abs.s %[di3], %[di3] \n\t"
1536 "lwc1 $f0, 0(%[vec]) \n\t"
1537 "lwc1 $f1, 4(%[vec]) \n\t"
1538 "lwc1 $f2, 8(%[vec]) \n\t"
1539 "lwc1 $f3, 12(%[vec]) \n\t"
1540 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1541 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1542 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1543 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1547 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1548 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1549 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1551 :
"$f0",
"$f1",
"$f2",
"$f3",
1555 cost += di0 * di0 + di1 * di1
1556 + di2 * di2 + di3 * di3;
1561 return cost * lambda + curbits;
1564 static float get_band_cost_SPAIR_mips(
struct AACEncContext *s,
1566 const float *scaled,
int size,
int scale_idx,
1567 int cb,
const float lambda,
const float uplim,
1574 int qc1, qc2, qc3, qc4;
1580 for (i = 0; i <
size; i += 4) {
1581 const float *vec, *vec2;
1582 int curidx, curidx2;
1583 int *in_int = (
int *)&in[i];
1584 float *in_pos = (
float *)&in[i];
1585 float di0, di1, di2, di3;
1587 qc1 = scaled[i ] * Q34 + 0.4054f;
1588 qc2 = scaled[i+1] * Q34 + 0.4054f;
1589 qc3 = scaled[i+2] * Q34 + 0.4054f;
1590 qc4 = scaled[i+3] * Q34 + 0.4054f;
1594 ".set noreorder \n\t"
1596 "ori $t4, $zero, 4 \n\t"
1597 "slt $t0, $t4, %[qc1] \n\t"
1598 "slt $t1, $t4, %[qc2] \n\t"
1599 "slt $t2, $t4, %[qc3] \n\t"
1600 "slt $t3, $t4, %[qc4] \n\t"
1601 "movn %[qc1], $t4, $t0 \n\t"
1602 "movn %[qc2], $t4, $t1 \n\t"
1603 "movn %[qc3], $t4, $t2 \n\t"
1604 "movn %[qc4], $t4, $t3 \n\t"
1605 "lw $t0, 0(%[in_int]) \n\t"
1606 "lw $t1, 4(%[in_int]) \n\t"
1607 "lw $t2, 8(%[in_int]) \n\t"
1608 "lw $t3, 12(%[in_int]) \n\t"
1609 "srl $t0, $t0, 31 \n\t"
1610 "srl $t1, $t1, 31 \n\t"
1611 "srl $t2, $t2, 31 \n\t"
1612 "srl $t3, $t3, 31 \n\t"
1613 "subu $t4, $zero, %[qc1] \n\t"
1614 "subu $t5, $zero, %[qc2] \n\t"
1615 "subu $t6, $zero, %[qc3] \n\t"
1616 "subu $t7, $zero, %[qc4] \n\t"
1617 "movn %[qc1], $t4, $t0 \n\t"
1618 "movn %[qc2], $t5, $t1 \n\t"
1619 "movn %[qc3], $t6, $t2 \n\t"
1620 "movn %[qc4], $t7, $t3 \n\t"
1624 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1625 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4)
1626 : [in_int]
"r"(in_int)
1627 :
"t0",
"t1",
"t2",
"t3",
1628 "t4",
"t5",
"t6",
"t7",
1636 curidx2 += qc4 + 40;
1638 curbits += p_bits[curidx];
1639 curbits += p_bits[curidx2];
1641 vec = &p_codes[curidx*2];
1642 vec2 = &p_codes[curidx2*2];
1646 ".set noreorder \n\t"
1648 "lwc1 $f0, 0(%[in_pos]) \n\t"
1649 "lwc1 $f1, 0(%[vec]) \n\t"
1650 "lwc1 $f2, 4(%[in_pos]) \n\t"
1651 "lwc1 $f3, 4(%[vec]) \n\t"
1652 "lwc1 $f4, 8(%[in_pos]) \n\t"
1653 "lwc1 $f5, 0(%[vec2]) \n\t"
1654 "lwc1 $f6, 12(%[in_pos]) \n\t"
1655 "lwc1 $f7, 4(%[vec2]) \n\t"
1656 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1657 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1658 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1659 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1663 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1664 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1665 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1666 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1667 :
"$f0",
"$f1",
"$f2",
"$f3",
1668 "$f4",
"$f5",
"$f6",
"$f7",
1672 cost += di0 * di0 + di1 * di1
1673 + di2 * di2 + di3 * di3;
1678 return cost * lambda + curbits;
1681 static float get_band_cost_UPAIR7_mips(
struct AACEncContext *s,
1683 const float *scaled,
int size,
int scale_idx,
1684 int cb,
const float lambda,
const float uplim,
1691 int qc1, qc2, qc3, qc4;
1697 for (i = 0; i <
size; i += 4) {
1698 const float *vec, *vec2;
1699 int curidx, curidx2, sign1, count1, sign2, count2;
1700 int *in_int = (
int *)&in[i];
1701 float *in_pos = (
float *)&in[i];
1702 float di0, di1, di2, di3;
1704 qc1 = scaled[i ] * Q34 + 0.4054f;
1705 qc2 = scaled[i+1] * Q34 + 0.4054f;
1706 qc3 = scaled[i+2] * Q34 + 0.4054f;
1707 qc4 = scaled[i+3] * Q34 + 0.4054f;
1711 ".set noreorder \n\t"
1713 "ori $t4, $zero, 7 \n\t"
1714 "ori %[sign1], $zero, 0 \n\t"
1715 "ori %[sign2], $zero, 0 \n\t"
1716 "slt $t0, $t4, %[qc1] \n\t"
1717 "slt $t1, $t4, %[qc2] \n\t"
1718 "slt $t2, $t4, %[qc3] \n\t"
1719 "slt $t3, $t4, %[qc4] \n\t"
1720 "movn %[qc1], $t4, $t0 \n\t"
1721 "movn %[qc2], $t4, $t1 \n\t"
1722 "movn %[qc3], $t4, $t2 \n\t"
1723 "movn %[qc4], $t4, $t3 \n\t"
1724 "lw $t0, 0(%[in_int]) \n\t"
1725 "lw $t1, 4(%[in_int]) \n\t"
1726 "lw $t2, 8(%[in_int]) \n\t"
1727 "lw $t3, 12(%[in_int]) \n\t"
1728 "slt $t0, $t0, $zero \n\t"
1729 "movn %[sign1], $t0, %[qc1] \n\t"
1730 "slt $t2, $t2, $zero \n\t"
1731 "movn %[sign2], $t2, %[qc3] \n\t"
1732 "slt $t1, $t1, $zero \n\t"
1733 "sll $t0, %[sign1], 1 \n\t"
1734 "or $t0, $t0, $t1 \n\t"
1735 "movn %[sign1], $t0, %[qc2] \n\t"
1736 "slt $t3, $t3, $zero \n\t"
1737 "sll $t0, %[sign2], 1 \n\t"
1738 "or $t0, $t0, $t3 \n\t"
1739 "movn %[sign2], $t0, %[qc4] \n\t"
1740 "slt %[count1], $zero, %[qc1] \n\t"
1741 "slt $t1, $zero, %[qc2] \n\t"
1742 "slt %[count2], $zero, %[qc3] \n\t"
1743 "slt $t2, $zero, %[qc4] \n\t"
1744 "addu %[count1], %[count1], $t1 \n\t"
1745 "addu %[count2], %[count2], $t2 \n\t"
1749 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1750 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1751 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
1752 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2)
1753 : [in_int]
"r"(in_int)
1754 :
"t0",
"t1",
"t2",
"t3",
"t4",
1764 curbits += p_bits[curidx];
1765 curbits += upair7_sign_bits[curidx];
1766 vec = &p_codes[curidx*2];
1768 curbits += p_bits[curidx2];
1769 curbits += upair7_sign_bits[curidx2];
1770 vec2 = &p_codes[curidx2*2];
1774 ".set noreorder \n\t"
1776 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1777 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1778 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1779 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1780 "abs.s %[di0], %[di0] \n\t"
1781 "abs.s %[di1], %[di1] \n\t"
1782 "abs.s %[di2], %[di2] \n\t"
1783 "abs.s %[di3], %[di3] \n\t"
1784 "lwc1 $f0, 0(%[vec]) \n\t"
1785 "lwc1 $f1, 4(%[vec]) \n\t"
1786 "lwc1 $f2, 0(%[vec2]) \n\t"
1787 "lwc1 $f3, 4(%[vec2]) \n\t"
1788 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1789 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1790 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1791 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1795 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1796 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1797 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1798 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1799 :
"$f0",
"$f1",
"$f2",
"$f3",
1803 cost += di0 * di0 + di1 * di1
1804 + di2 * di2 + di3 * di3;
1809 return cost * lambda + curbits;
1812 static float get_band_cost_UPAIR12_mips(
struct AACEncContext *s,
1814 const float *scaled,
int size,
int scale_idx,
1815 int cb,
const float lambda,
const float uplim,
1822 int qc1, qc2, qc3, qc4;
1828 for (i = 0; i <
size; i += 4) {
1829 const float *vec, *vec2;
1830 int curidx, curidx2;
1831 int sign1, count1, sign2, count2;
1832 int *in_int = (
int *)&in[i];
1833 float *in_pos = (
float *)&in[i];
1834 float di0, di1, di2, di3;
1836 qc1 = scaled[i ] * Q34 + 0.4054f;
1837 qc2 = scaled[i+1] * Q34 + 0.4054f;
1838 qc3 = scaled[i+2] * Q34 + 0.4054f;
1839 qc4 = scaled[i+3] * Q34 + 0.4054f;
1843 ".set noreorder \n\t"
1845 "ori $t4, $zero, 12 \n\t"
1846 "ori %[sign1], $zero, 0 \n\t"
1847 "ori %[sign2], $zero, 0 \n\t"
1848 "slt $t0, $t4, %[qc1] \n\t"
1849 "slt $t1, $t4, %[qc2] \n\t"
1850 "slt $t2, $t4, %[qc3] \n\t"
1851 "slt $t3, $t4, %[qc4] \n\t"
1852 "movn %[qc1], $t4, $t0 \n\t"
1853 "movn %[qc2], $t4, $t1 \n\t"
1854 "movn %[qc3], $t4, $t2 \n\t"
1855 "movn %[qc4], $t4, $t3 \n\t"
1856 "lw $t0, 0(%[in_int]) \n\t"
1857 "lw $t1, 4(%[in_int]) \n\t"
1858 "lw $t2, 8(%[in_int]) \n\t"
1859 "lw $t3, 12(%[in_int]) \n\t"
1860 "slt $t0, $t0, $zero \n\t"
1861 "movn %[sign1], $t0, %[qc1] \n\t"
1862 "slt $t2, $t2, $zero \n\t"
1863 "movn %[sign2], $t2, %[qc3] \n\t"
1864 "slt $t1, $t1, $zero \n\t"
1865 "sll $t0, %[sign1], 1 \n\t"
1866 "or $t0, $t0, $t1 \n\t"
1867 "movn %[sign1], $t0, %[qc2] \n\t"
1868 "slt $t3, $t3, $zero \n\t"
1869 "sll $t0, %[sign2], 1 \n\t"
1870 "or $t0, $t0, $t3 \n\t"
1871 "movn %[sign2], $t0, %[qc4] \n\t"
1872 "slt %[count1], $zero, %[qc1] \n\t"
1873 "slt $t1, $zero, %[qc2] \n\t"
1874 "slt %[count2], $zero, %[qc3] \n\t"
1875 "slt $t2, $zero, %[qc4] \n\t"
1876 "addu %[count1], %[count1], $t1 \n\t"
1877 "addu %[count2], %[count2], $t2 \n\t"
1881 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1882 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1883 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
1884 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2)
1885 : [in_int]
"r"(in_int)
1886 :
"t0",
"t1",
"t2",
"t3",
"t4",
1896 curbits += p_bits[curidx];
1897 curbits += p_bits[curidx2];
1898 curbits += upair12_sign_bits[curidx];
1899 curbits += upair12_sign_bits[curidx2];
1900 vec = &p_codes[curidx*2];
1901 vec2 = &p_codes[curidx2*2];
1905 ".set noreorder \n\t"
1907 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1908 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1909 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1910 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1911 "abs.s %[di0], %[di0] \n\t"
1912 "abs.s %[di1], %[di1] \n\t"
1913 "abs.s %[di2], %[di2] \n\t"
1914 "abs.s %[di3], %[di3] \n\t"
1915 "lwc1 $f0, 0(%[vec]) \n\t"
1916 "lwc1 $f1, 4(%[vec]) \n\t"
1917 "lwc1 $f2, 0(%[vec2]) \n\t"
1918 "lwc1 $f3, 4(%[vec2]) \n\t"
1919 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1920 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1921 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1922 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1926 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1927 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1928 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1929 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1930 :
"$f0",
"$f1",
"$f2",
"$f3",
1934 cost += di0 * di0 + di1 * di1
1935 + di2 * di2 + di3 * di3;
1940 return cost * lambda + curbits;
1943 static float get_band_cost_ESC_mips(
struct AACEncContext *s,
1945 const float *scaled,
int size,
int scale_idx,
1946 int cb,
const float lambda,
const float uplim,
1951 const float CLIPPED_ESCAPE = 165140.0f * IQ;
1954 int qc1, qc2, qc3, qc4;
1960 for (i = 0; i <
size; i += 4) {
1961 const float *vec, *vec2;
1962 int curidx, curidx2;
1964 float di1, di2, di3, di4;
1965 int cond0, cond1, cond2, cond3;
1968 qc1 = scaled[i ] * Q34 + 0.4054f;
1969 qc2 = scaled[i+1] * Q34 + 0.4054f;
1970 qc3 = scaled[i+2] * Q34 + 0.4054f;
1971 qc4 = scaled[i+3] * Q34 + 0.4054f;
1975 ".set noreorder \n\t"
1977 "ori $t4, $zero, 15 \n\t"
1978 "ori $t5, $zero, 16 \n\t"
1979 "shll_s.w %[c1], %[qc1], 18 \n\t"
1980 "shll_s.w %[c2], %[qc2], 18 \n\t"
1981 "shll_s.w %[c3], %[qc3], 18 \n\t"
1982 "shll_s.w %[c4], %[qc4], 18 \n\t"
1983 "srl %[c1], %[c1], 18 \n\t"
1984 "srl %[c2], %[c2], 18 \n\t"
1985 "srl %[c3], %[c3], 18 \n\t"
1986 "srl %[c4], %[c4], 18 \n\t"
1987 "slt %[cond0], $t4, %[qc1] \n\t"
1988 "slt %[cond1], $t4, %[qc2] \n\t"
1989 "slt %[cond2], $t4, %[qc3] \n\t"
1990 "slt %[cond3], $t4, %[qc4] \n\t"
1991 "movn %[qc1], $t5, %[cond0] \n\t"
1992 "movn %[qc2], $t5, %[cond1] \n\t"
1993 "movn %[qc3], $t5, %[cond2] \n\t"
1994 "movn %[qc4], $t5, %[cond3] \n\t"
1998 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1999 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
2000 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
2001 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
2002 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
2003 [
c3]
"=&r"(
c3), [c4]
"=&r"(c4)
2014 curbits += p_bits[curidx];
2015 curbits += esc_sign_bits[curidx];
2016 vec = &p_codes[curidx*2];
2018 curbits += p_bits[curidx2];
2019 curbits += esc_sign_bits[curidx2];
2020 vec2 = &p_codes[curidx2*2];
2022 curbits += (
av_log2(c1) * 2 - 3) & (-cond0);
2023 curbits += (
av_log2(c2) * 2 - 3) & (-cond1);
2024 curbits += (
av_log2(c3) * 2 - 3) & (-cond2);
2025 curbits += (
av_log2(c4) * 2 - 3) & (-cond3);
2028 t2 = fabsf(in[i+1]);
2029 t3 = fabsf(in[i+2]);
2030 t4 = fabsf(in[i+3]);
2033 if (t1 >= CLIPPED_ESCAPE) {
2034 di1 = t1 - CLIPPED_ESCAPE;
2036 di1 = t1 - c1 *
cbrtf(c1) * IQ;
2039 di1 = t1 - vec[0] * IQ;
2042 if (t2 >= CLIPPED_ESCAPE) {
2043 di2 = t2 - CLIPPED_ESCAPE;
2045 di2 = t2 - c2 *
cbrtf(c2) * IQ;
2048 di2 = t2 - vec[1] * IQ;
2051 if (t3 >= CLIPPED_ESCAPE) {
2052 di3 = t3 - CLIPPED_ESCAPE;
2054 di3 = t3 - c3 *
cbrtf(c3) * IQ;
2057 di3 = t3 - vec2[0] * IQ;
2060 if (t4 >= CLIPPED_ESCAPE) {
2061 di4 = t4 - CLIPPED_ESCAPE;
2063 di4 = t4 - c4 *
cbrtf(c4) * IQ;
2066 di4 = t4 - vec2[1]*IQ;
2068 cost += di1 * di1 + di2 * di2
2069 + di3 * di3 + di4 * di4;
2074 return cost * lambda + curbits;
2077 static float (*
const get_band_cost_arr[])(
struct AACEncContext *
s,
2079 const float *scaled,
int size,
int scale_idx,
2080 int cb,
const float lambda,
const float uplim,
2082 get_band_cost_ZERO_mips,
2083 get_band_cost_SQUAD_mips,
2084 get_band_cost_SQUAD_mips,
2085 get_band_cost_UQUAD_mips,
2086 get_band_cost_UQUAD_mips,
2087 get_band_cost_SPAIR_mips,
2088 get_band_cost_SPAIR_mips,
2089 get_band_cost_UPAIR7_mips,
2090 get_band_cost_UPAIR7_mips,
2091 get_band_cost_UPAIR12_mips,
2092 get_band_cost_UPAIR12_mips,
2093 get_band_cost_ESC_mips,
2096 #define get_band_cost( \
2097 s, pb, in, scaled, size, scale_idx, cb, \
2098 lambda, uplim, bits) \
2099 get_band_cost_arr[cb]( \
2100 s, pb, in, scaled, size, scale_idx, cb, \
2101 lambda, uplim, bits)
2104 const float *scaled,
int size,
int scale_idx,
2105 int cb,
const float lambda,
const float uplim,
2108 return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
2111 static void search_for_quantizers_twoloop_mips(
AVCodecContext *avctx,
2116 int start = 0, i, w, w2,
g;
2118 float dists[128] = { 0 }, uplims[128];
2120 int fflag, minscaler;
2125 destbits =
FFMIN(destbits, 5800);
2134 sce->
zeroes[(w+w2)*16+g] = 1;
2139 uplims[w*16+
g] = uplim *512;
2142 minthr =
FFMIN(minthr, uplim);
2148 if (sce->
zeroes[w*16+g]) {
2171 minscaler = sce->
sf_idx[0];
2172 qstep = its ? 1 : 32;
2187 if (sce->
zeroes[w*16+g] || sce->
sf_idx[w*16+g] >= 218) {
2191 minscaler =
FFMIN(minscaler, sce->
sf_idx[w*16+g]);
2195 bits += quantize_band_cost_bits(s, coefs + w2*128,
2223 if (sce->
zeroes[w*16+g] || sce->
sf_idx[w*16+g] >= 218) {
2227 minscaler =
FFMIN(minscaler, sce->
sf_idx[w*16+g]);
2241 dists[w*16+
g] = dist -
bits;
2251 if (tbits > destbits) {
2252 for (i = 0; i < 128; i++)
2253 if (sce->
sf_idx[i] < 218 - qstep)
2256 for (i = 0; i < 128; i++)
2257 if (sce->
sf_idx[i] > 60 - qstep)
2261 if (!qstep && tbits > destbits*1.02 && sce->
sf_idx[0] < 217)
2269 int prevsc = sce->
sf_idx[w*16+
g];
2270 if (dists[w*16+g] > uplims[w*16+g] && sce->
sf_idx[w*16+g] > 60) {
2278 if (sce->
sf_idx[w*16+g] != prevsc)
2284 }
while (fflag && its < 10);
2290 int start = 0, i, w, w2,
g;
2291 float M[128],
S[128];
2300 float dist1 = 0.0f, dist2 = 0.0f;
2307 M[i ] = (sce0->
coeffs[start+w2*128+i ]
2308 + sce1->
coeffs[start+w2*128+i ]) * 0.5;
2309 M[i+1] = (sce0->
coeffs[start+w2*128+i+1]
2310 + sce1->
coeffs[start+w2*128+i+1]) * 0.5;
2311 M[i+2] = (sce0->
coeffs[start+w2*128+i+2]
2312 + sce1->
coeffs[start+w2*128+i+2]) * 0.5;
2313 M[i+3] = (sce0->
coeffs[start+w2*128+i+3]
2314 + sce1->
coeffs[start+w2*128+i+3]) * 0.5;
2317 - sce1->
coeffs[start+w2*128+i ];
2319 - sce1->
coeffs[start+w2*128+i+1];
2321 - sce1->
coeffs[start+w2*128+i+2];
2323 - sce1->
coeffs[start+w2*128+i+3];
2332 sce0->
sf_idx[(w+w2)*16+g],
2338 sce1->
sf_idx[(w+w2)*16+g],
2344 sce0->
sf_idx[(w+w2)*16+g],
2350 sce1->
sf_idx[(w+w2)*16+g],
2354 cpe->
ms_mask[w*16+
g] = dist2 < dist1;
2363 int win,
int group_len,
const float lambda)
2370 const int run_esc = (1 <<
run_bits) - 1;
2371 int idx, ppos,
count;
2372 int stackrun[120], stackcb[120], stack_len;
2378 for (cb = 0; cb < 12; cb++) {
2379 path[0][
cb].
cost = run_bits+4;
2381 path[0][
cb].
run = 0;
2383 for (swb = 0; swb < max_sfb; swb++) {
2385 if (sce->
zeroes[win*16 + swb]) {
2386 float cost_stay_here = path[swb][0].
cost;
2387 float cost_get_here = next_minbits + run_bits + 4;
2391 if (cost_get_here < cost_stay_here) {
2392 path[swb+1][0].
prev_idx = next_mincb;
2393 path[swb+1][0].
cost = cost_get_here;
2394 path[swb+1][0].
run = 1;
2397 path[swb+1][0].
cost = cost_stay_here;
2398 path[swb+1][0].
run = path[swb][0].
run + 1;
2400 next_minbits = path[swb+1][0].
cost;
2402 for (cb = 1; cb < 12; cb++) {
2403 path[swb+1][
cb].
cost = 61450;
2405 path[swb+1][
cb].
run = 0;
2408 float minbits = next_minbits;
2409 int mincb = next_mincb;
2410 int startcb = sce->
band_type[win*16+swb];
2413 for (cb = 0; cb < startcb; cb++) {
2414 path[swb+1][
cb].
cost = 61450;
2416 path[swb+1][
cb].
run = 0;
2418 for (cb = startcb; cb < 12; cb++) {
2419 float cost_stay_here, cost_get_here;
2421 for (w = 0; w < group_len; w++) {
2422 bits += quantize_band_cost_bits(s, sce->
coeffs + start + w*128,
2423 s->
scoefs + start + w*128, size,
2424 sce->
sf_idx[(win+w)*16+swb], cb,
2428 cost_get_here = minbits + bits + run_bits + 4;
2432 if (cost_get_here < cost_stay_here) {
2434 path[swb+1][
cb].
cost = cost_get_here;
2435 path[swb+1][
cb].
run = 1;
2438 path[swb+1][
cb].
cost = cost_stay_here;
2441 if (path[swb+1][cb].cost < next_minbits) {
2442 next_minbits = path[swb+1][
cb].
cost;
2452 for (cb = 1; cb < 12; cb++)
2453 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
2459 stackrun[stack_len] = path[ppos][
cb].
run;
2460 stackcb [stack_len] =
cb;
2462 ppos -= path[ppos][
cb].
run;
2467 for (i = stack_len - 1; i >= 0; i--) {
2469 count = stackrun[i];
2470 memset(sce->
zeroes + win*16 + start, !stackcb[i], count);
2471 for (j = 0; j <
count; j++) {
2475 while (count >= run_esc) {