74 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
75 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
76 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
77 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
81 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
88 static const uint8_t uquad_sign_bits[81] = {
89 0, 1, 1, 1, 2, 2, 1, 2, 2,
90 1, 2, 2, 2, 3, 3, 2, 3, 3,
91 1, 2, 2, 2, 3, 3, 2, 3, 3,
92 1, 2, 2, 2, 3, 3, 2, 3, 3,
93 2, 3, 3, 3, 4, 4, 3, 4, 4,
94 2, 3, 3, 3, 4, 4, 3, 4, 4,
95 1, 2, 2, 2, 3, 3, 2, 3, 3,
96 2, 3, 3, 3, 4, 4, 3, 4, 4,
97 2, 3, 3, 3, 4, 4, 3, 4, 4
100 static const uint8_t upair7_sign_bits[64] = {
101 0, 1, 1, 1, 1, 1, 1, 1,
102 1, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2,
111 static const uint8_t upair12_sign_bits[169] = {
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
115 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
127 static const uint8_t esc_sign_bits[289] = {
128 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
134 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
135 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
136 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
140 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
144 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
147 #define ROUND_STANDARD 0.4054f
148 #define ROUND_TO_ZERO 0.1054f
151 #ifndef USE_REALLY_FULL_SEARCH
154 float ax, bx, cx, dx;
156 for (i = 0; i <
size; i += 4) {
180 static float find_max_val(
int group_len,
int swb_size,
const float *scaled) {
183 for (w2 = 0; w2 < group_len; w2++) {
184 for (i = 0; i < swb_size; i++) {
185 maxval =
FFMAX(maxval, scaled[w2*128+i]);
193 float Q34 = sqrtf(Q * sqrtf(Q));
195 qmaxval = maxval * Q34 + 0.4054f;
196 if (qmaxval == 0) cb = 0;
197 else if (qmaxval == 1) cb = 1;
198 else if (qmaxval == 2) cb = 3;
199 else if (qmaxval <= 4) cb = 5;
200 else if (qmaxval <= 7) cb = 7;
201 else if (qmaxval <= 12) cb = 9;
209 static void quantize_and_encode_band_cost_SQUAD_mips(
struct AACEncContext *
s,
211 const float *scaled,
int size,
int scale_idx,
212 int cb,
const float lambda,
const float uplim,
213 int *
bits,
const float ROUNDING)
217 int qc1, qc2, qc3, qc4;
224 for (i = 0; i <
size; i += 4) {
226 int *in_int = (
int *)&in[i];
236 ".set noreorder \n\t"
238 "slt %[qc1], $zero, %[qc1] \n\t"
239 "slt %[qc2], $zero, %[qc2] \n\t"
240 "slt %[qc3], $zero, %[qc3] \n\t"
241 "slt %[qc4], $zero, %[qc4] \n\t"
242 "lw %[t0], 0(%[in_int]) \n\t"
243 "lw %[t1], 4(%[in_int]) \n\t"
244 "lw %[t2], 8(%[in_int]) \n\t"
245 "lw %[t3], 12(%[in_int]) \n\t"
246 "srl %[t0], %[t0], 31 \n\t"
247 "srl %[t1], %[t1], 31 \n\t"
248 "srl %[t2], %[t2], 31 \n\t"
249 "srl %[t3], %[t3], 31 \n\t"
250 "subu %[t4], $zero, %[qc1] \n\t"
251 "subu %[t5], $zero, %[qc2] \n\t"
252 "subu %[t6], $zero, %[qc3] \n\t"
253 "subu %[t7], $zero, %[qc4] \n\t"
254 "movn %[qc1], %[t4], %[t0] \n\t"
255 "movn %[qc2], %[t5], %[t1] \n\t"
256 "movn %[qc3], %[t6], %[t2] \n\t"
257 "movn %[qc4], %[t7], %[t3] \n\t"
261 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
262 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
263 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
264 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
265 : [in_int]
"r"(in_int)
278 put_bits(pb, p_bits[curidx], p_codes[curidx]);
282 static void quantize_and_encode_band_cost_UQUAD_mips(
struct AACEncContext *s,
284 const float *scaled,
int size,
int scale_idx,
285 int cb,
const float lambda,
const float uplim,
286 int *bits,
const float ROUNDING)
290 int qc1, qc2, qc3, qc4;
297 for (i = 0; i <
size; i += 4) {
298 int curidx, sign,
count;
299 int *in_int = (
int *)&in[i];
301 unsigned int v_codes;
311 ".set noreorder \n\t"
313 "ori %[t4], $zero, 2 \n\t"
314 "ori %[sign], $zero, 0 \n\t"
315 "slt %[t0], %[t4], %[qc1] \n\t"
316 "slt %[t1], %[t4], %[qc2] \n\t"
317 "slt %[t2], %[t4], %[qc3] \n\t"
318 "slt %[t3], %[t4], %[qc4] \n\t"
319 "movn %[qc1], %[t4], %[t0] \n\t"
320 "movn %[qc2], %[t4], %[t1] \n\t"
321 "movn %[qc3], %[t4], %[t2] \n\t"
322 "movn %[qc4], %[t4], %[t3] \n\t"
323 "lw %[t0], 0(%[in_int]) \n\t"
324 "lw %[t1], 4(%[in_int]) \n\t"
325 "lw %[t2], 8(%[in_int]) \n\t"
326 "lw %[t3], 12(%[in_int]) \n\t"
327 "slt %[t0], %[t0], $zero \n\t"
328 "movn %[sign], %[t0], %[qc1] \n\t"
329 "slt %[t1], %[t1], $zero \n\t"
330 "slt %[t2], %[t2], $zero \n\t"
331 "slt %[t3], %[t3], $zero \n\t"
332 "sll %[t0], %[sign], 1 \n\t"
333 "or %[t0], %[t0], %[t1] \n\t"
334 "movn %[sign], %[t0], %[qc2] \n\t"
335 "slt %[t4], $zero, %[qc1] \n\t"
336 "slt %[t1], $zero, %[qc2] \n\t"
337 "slt %[count], $zero, %[qc3] \n\t"
338 "sll %[t0], %[sign], 1 \n\t"
339 "or %[t0], %[t0], %[t2] \n\t"
340 "movn %[sign], %[t0], %[qc3] \n\t"
341 "slt %[t2], $zero, %[qc4] \n\t"
342 "addu %[count], %[count], %[t4] \n\t"
343 "addu %[count], %[count], %[t1] \n\t"
344 "sll %[t0], %[sign], 1 \n\t"
345 "or %[t0], %[t0], %[t3] \n\t"
346 "movn %[sign], %[t0], %[qc4] \n\t"
347 "addu %[count], %[count], %[t2] \n\t"
351 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
352 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
353 [sign]
"=&r"(sign), [count]
"=&r"(count),
354 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
356 : [in_int]
"r"(in_int)
368 v_codes = (p_codes[curidx] <<
count) | (sign & ((1 << count) - 1));
369 v_bits = p_bits[curidx] +
count;
374 static void quantize_and_encode_band_cost_SPAIR_mips(
struct AACEncContext *s,
376 const float *scaled,
int size,
int scale_idx,
377 int cb,
const float lambda,
const float uplim,
378 int *bits,
const float ROUNDING)
382 int qc1, qc2, qc3, qc4;
389 for (i = 0; i <
size; i += 4) {
391 int *in_int = (
int *)&in[i];
393 unsigned int v_codes;
403 ".set noreorder \n\t"
405 "ori %[t4], $zero, 4 \n\t"
406 "slt %[t0], %[t4], %[qc1] \n\t"
407 "slt %[t1], %[t4], %[qc2] \n\t"
408 "slt %[t2], %[t4], %[qc3] \n\t"
409 "slt %[t3], %[t4], %[qc4] \n\t"
410 "movn %[qc1], %[t4], %[t0] \n\t"
411 "movn %[qc2], %[t4], %[t1] \n\t"
412 "movn %[qc3], %[t4], %[t2] \n\t"
413 "movn %[qc4], %[t4], %[t3] \n\t"
414 "lw %[t0], 0(%[in_int]) \n\t"
415 "lw %[t1], 4(%[in_int]) \n\t"
416 "lw %[t2], 8(%[in_int]) \n\t"
417 "lw %[t3], 12(%[in_int]) \n\t"
418 "srl %[t0], %[t0], 31 \n\t"
419 "srl %[t1], %[t1], 31 \n\t"
420 "srl %[t2], %[t2], 31 \n\t"
421 "srl %[t3], %[t3], 31 \n\t"
422 "subu %[t4], $zero, %[qc1] \n\t"
423 "subu %[t5], $zero, %[qc2] \n\t"
424 "subu %[t6], $zero, %[qc3] \n\t"
425 "subu %[t7], $zero, %[qc4] \n\t"
426 "movn %[qc1], %[t4], %[t0] \n\t"
427 "movn %[qc2], %[t5], %[t1] \n\t"
428 "movn %[qc3], %[t6], %[t2] \n\t"
429 "movn %[qc4], %[t7], %[t3] \n\t"
433 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
434 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
435 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
436 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
437 : [in_int]
"r"(in_int)
447 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
448 v_bits = p_bits[curidx] + p_bits[curidx2];
453 static void quantize_and_encode_band_cost_UPAIR7_mips(
struct AACEncContext *s,
455 const float *scaled,
int size,
int scale_idx,
456 int cb,
const float lambda,
const float uplim,
457 int *bits,
const float ROUNDING)
461 int qc1, qc2, qc3, qc4;
468 for (i = 0; i <
size; i += 4) {
469 int curidx, sign1, count1, sign2, count2;
470 int *in_int = (
int *)&in[i];
472 unsigned int v_codes;
482 ".set noreorder \n\t"
484 "ori %[t4], $zero, 7 \n\t"
485 "ori %[sign1], $zero, 0 \n\t"
486 "ori %[sign2], $zero, 0 \n\t"
487 "slt %[t0], %[t4], %[qc1] \n\t"
488 "slt %[t1], %[t4], %[qc2] \n\t"
489 "slt %[t2], %[t4], %[qc3] \n\t"
490 "slt %[t3], %[t4], %[qc4] \n\t"
491 "movn %[qc1], %[t4], %[t0] \n\t"
492 "movn %[qc2], %[t4], %[t1] \n\t"
493 "movn %[qc3], %[t4], %[t2] \n\t"
494 "movn %[qc4], %[t4], %[t3] \n\t"
495 "lw %[t0], 0(%[in_int]) \n\t"
496 "lw %[t1], 4(%[in_int]) \n\t"
497 "lw %[t2], 8(%[in_int]) \n\t"
498 "lw %[t3], 12(%[in_int]) \n\t"
499 "slt %[t0], %[t0], $zero \n\t"
500 "movn %[sign1], %[t0], %[qc1] \n\t"
501 "slt %[t2], %[t2], $zero \n\t"
502 "movn %[sign2], %[t2], %[qc3] \n\t"
503 "slt %[t1], %[t1], $zero \n\t"
504 "sll %[t0], %[sign1], 1 \n\t"
505 "or %[t0], %[t0], %[t1] \n\t"
506 "movn %[sign1], %[t0], %[qc2] \n\t"
507 "slt %[t3], %[t3], $zero \n\t"
508 "sll %[t0], %[sign2], 1 \n\t"
509 "or %[t0], %[t0], %[t3] \n\t"
510 "movn %[sign2], %[t0], %[qc4] \n\t"
511 "slt %[count1], $zero, %[qc1] \n\t"
512 "slt %[t1], $zero, %[qc2] \n\t"
513 "slt %[count2], $zero, %[qc3] \n\t"
514 "slt %[t2], $zero, %[qc4] \n\t"
515 "addu %[count1], %[count1], %[t1] \n\t"
516 "addu %[count2], %[count2], %[t2] \n\t"
520 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
521 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
522 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
523 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
524 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
526 : [in_int]
"r"(in_int)
527 :
"t0",
"t1",
"t2",
"t3",
"t4",
534 v_codes = (p_codes[curidx] << count1) | sign1;
535 v_bits = p_bits[curidx] + count1;
541 v_codes = (p_codes[curidx] << count2) | sign2;
542 v_bits = p_bits[curidx] + count2;
547 static void quantize_and_encode_band_cost_UPAIR12_mips(
struct AACEncContext *s,
549 const float *scaled,
int size,
int scale_idx,
550 int cb,
const float lambda,
const float uplim,
551 int *bits,
const float ROUNDING)
555 int qc1, qc2, qc3, qc4;
562 for (i = 0; i <
size; i += 4) {
563 int curidx, sign1, count1, sign2, count2;
564 int *in_int = (
int *)&in[i];
566 unsigned int v_codes;
576 ".set noreorder \n\t"
578 "ori %[t4], $zero, 12 \n\t"
579 "ori %[sign1], $zero, 0 \n\t"
580 "ori %[sign2], $zero, 0 \n\t"
581 "slt %[t0], %[t4], %[qc1] \n\t"
582 "slt %[t1], %[t4], %[qc2] \n\t"
583 "slt %[t2], %[t4], %[qc3] \n\t"
584 "slt %[t3], %[t4], %[qc4] \n\t"
585 "movn %[qc1], %[t4], %[t0] \n\t"
586 "movn %[qc2], %[t4], %[t1] \n\t"
587 "movn %[qc3], %[t4], %[t2] \n\t"
588 "movn %[qc4], %[t4], %[t3] \n\t"
589 "lw %[t0], 0(%[in_int]) \n\t"
590 "lw %[t1], 4(%[in_int]) \n\t"
591 "lw %[t2], 8(%[in_int]) \n\t"
592 "lw %[t3], 12(%[in_int]) \n\t"
593 "slt %[t0], %[t0], $zero \n\t"
594 "movn %[sign1], %[t0], %[qc1] \n\t"
595 "slt %[t2], %[t2], $zero \n\t"
596 "movn %[sign2], %[t2], %[qc3] \n\t"
597 "slt %[t1], %[t1], $zero \n\t"
598 "sll %[t0], %[sign1], 1 \n\t"
599 "or %[t0], %[t0], %[t1] \n\t"
600 "movn %[sign1], %[t0], %[qc2] \n\t"
601 "slt %[t3], %[t3], $zero \n\t"
602 "sll %[t0], %[sign2], 1 \n\t"
603 "or %[t0], %[t0], %[t3] \n\t"
604 "movn %[sign2], %[t0], %[qc4] \n\t"
605 "slt %[count1], $zero, %[qc1] \n\t"
606 "slt %[t1], $zero, %[qc2] \n\t"
607 "slt %[count2], $zero, %[qc3] \n\t"
608 "slt %[t2], $zero, %[qc4] \n\t"
609 "addu %[count1], %[count1], %[t1] \n\t"
610 "addu %[count2], %[count2], %[t2] \n\t"
614 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
615 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
616 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
617 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
618 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
620 : [in_int]
"r"(in_int)
627 v_codes = (p_codes[curidx] << count1) | sign1;
628 v_bits = p_bits[curidx] + count1;
634 v_codes = (p_codes[curidx] << count2) | sign2;
635 v_bits = p_bits[curidx] + count2;
640 static void quantize_and_encode_band_cost_ESC_mips(
struct AACEncContext *s,
642 const float *scaled,
int size,
int scale_idx,
643 int cb,
const float lambda,
const float uplim,
644 int *bits,
const float ROUNDING)
648 int qc1, qc2, qc3, qc4;
658 for (i = 0; i <
size; i += 4) {
659 int curidx, curidx2, sign1, count1, sign2, count2;
660 int *in_int = (
int *)&in[i];
662 unsigned int v_codes;
665 qc1 = scaled[i ] * Q34 + ROUNDING;
666 qc2 = scaled[i+1] * Q34 + ROUNDING;
667 qc3 = scaled[i+2] * Q34 + ROUNDING;
668 qc4 = scaled[i+3] * Q34 + ROUNDING;
672 ".set noreorder \n\t"
674 "ori %[t4], $zero, 16 \n\t"
675 "ori %[sign1], $zero, 0 \n\t"
676 "ori %[sign2], $zero, 0 \n\t"
677 "slt %[t0], %[t4], %[qc1] \n\t"
678 "slt %[t1], %[t4], %[qc2] \n\t"
679 "slt %[t2], %[t4], %[qc3] \n\t"
680 "slt %[t3], %[t4], %[qc4] \n\t"
681 "movn %[qc1], %[t4], %[t0] \n\t"
682 "movn %[qc2], %[t4], %[t1] \n\t"
683 "movn %[qc3], %[t4], %[t2] \n\t"
684 "movn %[qc4], %[t4], %[t3] \n\t"
685 "lw %[t0], 0(%[in_int]) \n\t"
686 "lw %[t1], 4(%[in_int]) \n\t"
687 "lw %[t2], 8(%[in_int]) \n\t"
688 "lw %[t3], 12(%[in_int]) \n\t"
689 "slt %[t0], %[t0], $zero \n\t"
690 "movn %[sign1], %[t0], %[qc1] \n\t"
691 "slt %[t2], %[t2], $zero \n\t"
692 "movn %[sign2], %[t2], %[qc3] \n\t"
693 "slt %[t1], %[t1], $zero \n\t"
694 "sll %[t0], %[sign1], 1 \n\t"
695 "or %[t0], %[t0], %[t1] \n\t"
696 "movn %[sign1], %[t0], %[qc2] \n\t"
697 "slt %[t3], %[t3], $zero \n\t"
698 "sll %[t0], %[sign2], 1 \n\t"
699 "or %[t0], %[t0], %[t3] \n\t"
700 "movn %[sign2], %[t0], %[qc4] \n\t"
701 "slt %[count1], $zero, %[qc1] \n\t"
702 "slt %[t1], $zero, %[qc2] \n\t"
703 "slt %[count2], $zero, %[qc3] \n\t"
704 "slt %[t2], $zero, %[qc4] \n\t"
705 "addu %[count1], %[count1], %[t1] \n\t"
706 "addu %[count2], %[count2], %[t2] \n\t"
710 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
711 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
712 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
713 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
714 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
716 : [in_int]
"r"(in_int)
725 v_codes = (p_codes[curidx] << count1) | sign1;
726 v_bits = p_bits[curidx] + count1;
729 v_codes = (p_codes[curidx2] << count2) | sign2;
730 v_bits = p_bits[curidx2] + count2;
734 for (i = 0; i <
size; i += 4) {
735 int curidx, curidx2, sign1, count1, sign2, count2;
736 int *in_int = (
int *)&in[i];
738 unsigned int v_codes;
742 qc1 = scaled[i ] * Q34 + ROUNDING;
743 qc2 = scaled[i+1] * Q34 + ROUNDING;
744 qc3 = scaled[i+2] * Q34 + ROUNDING;
745 qc4 = scaled[i+3] * Q34 + ROUNDING;
749 ".set noreorder \n\t"
751 "ori %[t4], $zero, 16 \n\t"
752 "ori %[sign1], $zero, 0 \n\t"
753 "ori %[sign2], $zero, 0 \n\t"
754 "shll_s.w %[c1], %[qc1], 18 \n\t"
755 "shll_s.w %[c2], %[qc2], 18 \n\t"
756 "shll_s.w %[c3], %[qc3], 18 \n\t"
757 "shll_s.w %[c4], %[qc4], 18 \n\t"
758 "srl %[c1], %[c1], 18 \n\t"
759 "srl %[c2], %[c2], 18 \n\t"
760 "srl %[c3], %[c3], 18 \n\t"
761 "srl %[c4], %[c4], 18 \n\t"
762 "slt %[t0], %[t4], %[qc1] \n\t"
763 "slt %[t1], %[t4], %[qc2] \n\t"
764 "slt %[t2], %[t4], %[qc3] \n\t"
765 "slt %[t3], %[t4], %[qc4] \n\t"
766 "movn %[qc1], %[t4], %[t0] \n\t"
767 "movn %[qc2], %[t4], %[t1] \n\t"
768 "movn %[qc3], %[t4], %[t2] \n\t"
769 "movn %[qc4], %[t4], %[t3] \n\t"
770 "lw %[t0], 0(%[in_int]) \n\t"
771 "lw %[t1], 4(%[in_int]) \n\t"
772 "lw %[t2], 8(%[in_int]) \n\t"
773 "lw %[t3], 12(%[in_int]) \n\t"
774 "slt %[t0], %[t0], $zero \n\t"
775 "movn %[sign1], %[t0], %[qc1] \n\t"
776 "slt %[t2], %[t2], $zero \n\t"
777 "movn %[sign2], %[t2], %[qc3] \n\t"
778 "slt %[t1], %[t1], $zero \n\t"
779 "sll %[t0], %[sign1], 1 \n\t"
780 "or %[t0], %[t0], %[t1] \n\t"
781 "movn %[sign1], %[t0], %[qc2] \n\t"
782 "slt %[t3], %[t3], $zero \n\t"
783 "sll %[t0], %[sign2], 1 \n\t"
784 "or %[t0], %[t0], %[t3] \n\t"
785 "movn %[sign2], %[t0], %[qc4] \n\t"
786 "slt %[count1], $zero, %[qc1] \n\t"
787 "slt %[t1], $zero, %[qc2] \n\t"
788 "slt %[count2], $zero, %[qc3] \n\t"
789 "slt %[t2], $zero, %[qc4] \n\t"
790 "addu %[count1], %[count1], %[t1] \n\t"
791 "addu %[count2], %[count2], %[t2] \n\t"
795 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
796 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
797 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
798 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
799 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
800 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
801 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
803 : [in_int]
"r"(in_int)
813 v_codes = (p_codes[curidx] << count1) | sign1;
814 v_bits = p_bits[curidx] + count1;
817 if (p_vectors[curidx*2 ] == 64.0f) {
819 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 <<
len) - 1));
822 if (p_vectors[curidx*2+1] == 64.0f) {
824 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 <<
len) - 1));
828 v_codes = (p_codes[curidx2] << count2) | sign2;
829 v_bits = p_bits[curidx2] + count2;
832 if (p_vectors[curidx2*2 ] == 64.0f) {
834 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 <<
len) - 1));
837 if (p_vectors[curidx2*2+1] == 64.0f) {
839 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 <<
len) - 1));
846 static void quantize_and_encode_band_cost_NONE_mips(
struct AACEncContext *s,
848 const float *scaled,
int size,
int scale_idx,
849 int cb,
const float lambda,
const float uplim,
850 int *bits,
const float ROUNDING) {
854 static void quantize_and_encode_band_cost_ZERO_mips(
struct AACEncContext *s,
856 const float *scaled,
int size,
int scale_idx,
857 int cb,
const float lambda,
const float uplim,
858 int *bits,
const float ROUNDING) {
863 for (i = 0; i <
size; i += 4) {
874 const float *scaled,
int size,
int scale_idx,
875 int cb,
const float lambda,
const float uplim,
876 int *
bits,
const float ROUNDING) = {
877 quantize_and_encode_band_cost_ZERO_mips,
878 quantize_and_encode_band_cost_SQUAD_mips,
879 quantize_and_encode_band_cost_SQUAD_mips,
880 quantize_and_encode_band_cost_UQUAD_mips,
881 quantize_and_encode_band_cost_UQUAD_mips,
882 quantize_and_encode_band_cost_SPAIR_mips,
883 quantize_and_encode_band_cost_SPAIR_mips,
884 quantize_and_encode_band_cost_UPAIR7_mips,
885 quantize_and_encode_band_cost_UPAIR7_mips,
886 quantize_and_encode_band_cost_UPAIR12_mips,
887 quantize_and_encode_band_cost_UPAIR12_mips,
888 quantize_and_encode_band_cost_ESC_mips,
889 quantize_and_encode_band_cost_NONE_mips,
890 quantize_and_encode_band_cost_ZERO_mips,
891 quantize_and_encode_band_cost_ZERO_mips,
892 quantize_and_encode_band_cost_ZERO_mips,
895 #define quantize_and_encode_band_cost( \
896 s, pb, in, out, scaled, size, scale_idx, cb, \
897 lambda, uplim, bits, ROUNDING) \
898 quantize_and_encode_band_cost_arr[cb]( \
899 s, pb, in, out, scaled, size, scale_idx, cb, \
900 lambda, uplim, bits, ROUNDING)
903 const float *in,
float *out,
int size,
int scale_idx,
904 int cb,
const float lambda,
int rtz)
913 static float get_band_numbits_ZERO_mips(
struct AACEncContext *s,
915 const float *scaled,
int size,
int scale_idx,
916 int cb,
const float lambda,
const float uplim,
922 static float get_band_numbits_NONE_mips(
struct AACEncContext *s,
924 const float *scaled,
int size,
int scale_idx,
925 int cb,
const float lambda,
const float uplim,
932 static float get_band_numbits_SQUAD_mips(
struct AACEncContext *s,
934 const float *scaled,
int size,
int scale_idx,
935 int cb,
const float lambda,
const float uplim,
940 int qc1, qc2, qc3, qc4;
945 for (i = 0; i <
size; i += 4) {
947 int *in_int = (
int *)&in[i];
957 ".set noreorder \n\t"
959 "slt %[qc1], $zero, %[qc1] \n\t"
960 "slt %[qc2], $zero, %[qc2] \n\t"
961 "slt %[qc3], $zero, %[qc3] \n\t"
962 "slt %[qc4], $zero, %[qc4] \n\t"
963 "lw %[t0], 0(%[in_int]) \n\t"
964 "lw %[t1], 4(%[in_int]) \n\t"
965 "lw %[t2], 8(%[in_int]) \n\t"
966 "lw %[t3], 12(%[in_int]) \n\t"
967 "srl %[t0], %[t0], 31 \n\t"
968 "srl %[t1], %[t1], 31 \n\t"
969 "srl %[t2], %[t2], 31 \n\t"
970 "srl %[t3], %[t3], 31 \n\t"
971 "subu %[t4], $zero, %[qc1] \n\t"
972 "subu %[t5], $zero, %[qc2] \n\t"
973 "subu %[t6], $zero, %[qc3] \n\t"
974 "subu %[t7], $zero, %[qc4] \n\t"
975 "movn %[qc1], %[t4], %[t0] \n\t"
976 "movn %[qc2], %[t5], %[t1] \n\t"
977 "movn %[qc3], %[t6], %[t2] \n\t"
978 "movn %[qc4], %[t7], %[t3] \n\t"
982 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
983 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
984 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
985 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
986 : [in_int]
"r"(in_int)
999 curbits += p_bits[curidx];
1004 static float get_band_numbits_UQUAD_mips(
struct AACEncContext *s,
1006 const float *scaled,
int size,
int scale_idx,
1007 int cb,
const float lambda,
const float uplim,
1013 int qc1, qc2, qc3, qc4;
1017 for (i = 0; i <
size; i += 4) {
1028 ".set noreorder \n\t"
1030 "ori %[t4], $zero, 2 \n\t"
1031 "slt %[t0], %[t4], %[qc1] \n\t"
1032 "slt %[t1], %[t4], %[qc2] \n\t"
1033 "slt %[t2], %[t4], %[qc3] \n\t"
1034 "slt %[t3], %[t4], %[qc4] \n\t"
1035 "movn %[qc1], %[t4], %[t0] \n\t"
1036 "movn %[qc2], %[t4], %[t1] \n\t"
1037 "movn %[qc3], %[t4], %[t2] \n\t"
1038 "movn %[qc4], %[t4], %[t3] \n\t"
1042 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1043 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1044 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1056 curbits += p_bits[curidx];
1057 curbits += uquad_sign_bits[curidx];
1062 static float get_band_numbits_SPAIR_mips(
struct AACEncContext *s,
1064 const float *scaled,
int size,
int scale_idx,
1065 int cb,
const float lambda,
const float uplim,
1070 int qc1, qc2, qc3, qc4;
1075 for (i = 0; i <
size; i += 4) {
1076 int curidx, curidx2;
1077 int *in_int = (
int *)&in[i];
1087 ".set noreorder \n\t"
1089 "ori %[t4], $zero, 4 \n\t"
1090 "slt %[t0], %[t4], %[qc1] \n\t"
1091 "slt %[t1], %[t4], %[qc2] \n\t"
1092 "slt %[t2], %[t4], %[qc3] \n\t"
1093 "slt %[t3], %[t4], %[qc4] \n\t"
1094 "movn %[qc1], %[t4], %[t0] \n\t"
1095 "movn %[qc2], %[t4], %[t1] \n\t"
1096 "movn %[qc3], %[t4], %[t2] \n\t"
1097 "movn %[qc4], %[t4], %[t3] \n\t"
1098 "lw %[t0], 0(%[in_int]) \n\t"
1099 "lw %[t1], 4(%[in_int]) \n\t"
1100 "lw %[t2], 8(%[in_int]) \n\t"
1101 "lw %[t3], 12(%[in_int]) \n\t"
1102 "srl %[t0], %[t0], 31 \n\t"
1103 "srl %[t1], %[t1], 31 \n\t"
1104 "srl %[t2], %[t2], 31 \n\t"
1105 "srl %[t3], %[t3], 31 \n\t"
1106 "subu %[t4], $zero, %[qc1] \n\t"
1107 "subu %[t5], $zero, %[qc2] \n\t"
1108 "subu %[t6], $zero, %[qc3] \n\t"
1109 "subu %[t7], $zero, %[qc4] \n\t"
1110 "movn %[qc1], %[t4], %[t0] \n\t"
1111 "movn %[qc2], %[t5], %[t1] \n\t"
1112 "movn %[qc3], %[t6], %[t2] \n\t"
1113 "movn %[qc4], %[t7], %[t3] \n\t"
1117 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1118 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1119 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1120 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1121 : [in_int]
"r"(in_int)
1129 curidx2 += qc4 + 40;
1131 curbits += p_bits[curidx] + p_bits[curidx2];
1136 static float get_band_numbits_UPAIR7_mips(
struct AACEncContext *s,
1138 const float *scaled,
int size,
int scale_idx,
1139 int cb,
const float lambda,
const float uplim,
1144 int qc1, qc2, qc3, qc4;
1149 for (i = 0; i <
size; i += 4) {
1150 int curidx, curidx2;
1160 ".set noreorder \n\t"
1162 "ori %[t4], $zero, 7 \n\t"
1163 "slt %[t0], %[t4], %[qc1] \n\t"
1164 "slt %[t1], %[t4], %[qc2] \n\t"
1165 "slt %[t2], %[t4], %[qc3] \n\t"
1166 "slt %[t3], %[t4], %[qc4] \n\t"
1167 "movn %[qc1], %[t4], %[t0] \n\t"
1168 "movn %[qc2], %[t4], %[t1] \n\t"
1169 "movn %[qc3], %[t4], %[t2] \n\t"
1170 "movn %[qc4], %[t4], %[t3] \n\t"
1174 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1175 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1176 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1186 curbits += p_bits[curidx] +
1187 upair7_sign_bits[curidx] +
1189 upair7_sign_bits[curidx2];
1194 static float get_band_numbits_UPAIR12_mips(
struct AACEncContext *s,
1196 const float *scaled,
int size,
int scale_idx,
1197 int cb,
const float lambda,
const float uplim,
1202 int qc1, qc2, qc3, qc4;
1207 for (i = 0; i <
size; i += 4) {
1208 int curidx, curidx2;
1218 ".set noreorder \n\t"
1220 "ori %[t4], $zero, 12 \n\t"
1221 "slt %[t0], %[t4], %[qc1] \n\t"
1222 "slt %[t1], %[t4], %[qc2] \n\t"
1223 "slt %[t2], %[t4], %[qc3] \n\t"
1224 "slt %[t3], %[t4], %[qc4] \n\t"
1225 "movn %[qc1], %[t4], %[t0] \n\t"
1226 "movn %[qc2], %[t4], %[t1] \n\t"
1227 "movn %[qc3], %[t4], %[t2] \n\t"
1228 "movn %[qc4], %[t4], %[t3] \n\t"
1232 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1233 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1234 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1244 curbits += p_bits[curidx] +
1246 upair12_sign_bits[curidx] +
1247 upair12_sign_bits[curidx2];
1252 static float get_band_numbits_ESC_mips(
struct AACEncContext *s,
1254 const float *scaled,
int size,
int scale_idx,
1255 int cb,
const float lambda,
const float uplim,
1260 int qc1, qc2, qc3, qc4;
1265 for (i = 0; i <
size; i += 4) {
1266 int curidx, curidx2;
1267 int cond0, cond1, cond2, cond3;
1278 ".set noreorder \n\t"
1280 "ori %[t4], $zero, 15 \n\t"
1281 "ori %[t5], $zero, 16 \n\t"
1282 "shll_s.w %[c1], %[qc1], 18 \n\t"
1283 "shll_s.w %[c2], %[qc2], 18 \n\t"
1284 "shll_s.w %[c3], %[qc3], 18 \n\t"
1285 "shll_s.w %[c4], %[qc4], 18 \n\t"
1286 "srl %[c1], %[c1], 18 \n\t"
1287 "srl %[c2], %[c2], 18 \n\t"
1288 "srl %[c3], %[c3], 18 \n\t"
1289 "srl %[c4], %[c4], 18 \n\t"
1290 "slt %[cond0], %[t4], %[qc1] \n\t"
1291 "slt %[cond1], %[t4], %[qc2] \n\t"
1292 "slt %[cond2], %[t4], %[qc3] \n\t"
1293 "slt %[cond3], %[t4], %[qc4] \n\t"
1294 "movn %[qc1], %[t5], %[cond0] \n\t"
1295 "movn %[qc2], %[t5], %[cond1] \n\t"
1296 "movn %[qc3], %[t5], %[cond2] \n\t"
1297 "movn %[qc4], %[t5], %[cond3] \n\t"
1298 "ori %[t5], $zero, 31 \n\t"
1299 "clz %[c1], %[c1] \n\t"
1300 "clz %[c2], %[c2] \n\t"
1301 "clz %[c3], %[c3] \n\t"
1302 "clz %[c4], %[c4] \n\t"
1303 "subu %[c1], %[t5], %[c1] \n\t"
1304 "subu %[c2], %[t5], %[c2] \n\t"
1305 "subu %[c3], %[t5], %[c3] \n\t"
1306 "subu %[c4], %[t5], %[c4] \n\t"
1307 "sll %[c1], %[c1], 1 \n\t"
1308 "sll %[c2], %[c2], 1 \n\t"
1309 "sll %[c3], %[c3], 1 \n\t"
1310 "sll %[c4], %[c4], 1 \n\t"
1311 "addiu %[c1], %[c1], -3 \n\t"
1312 "addiu %[c2], %[c2], -3 \n\t"
1313 "addiu %[c3], %[c3], -3 \n\t"
1314 "addiu %[c4], %[c4], -3 \n\t"
1315 "subu %[cond0], $zero, %[cond0] \n\t"
1316 "subu %[cond1], $zero, %[cond1] \n\t"
1317 "subu %[cond2], $zero, %[cond2] \n\t"
1318 "subu %[cond3], $zero, %[cond3] \n\t"
1319 "and %[c1], %[c1], %[cond0] \n\t"
1320 "and %[c2], %[c2], %[cond1] \n\t"
1321 "and %[c3], %[c3], %[cond2] \n\t"
1322 "and %[c4], %[c4], %[cond3] \n\t"
1326 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1327 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1328 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
1329 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
1330 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
1331 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
1332 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5)
1341 curbits += p_bits[curidx];
1342 curbits += esc_sign_bits[curidx];
1343 curbits += p_bits[curidx2];
1344 curbits += esc_sign_bits[curidx2];
1354 static float (*
const get_band_numbits_arr[])(
struct AACEncContext *
s,
1356 const float *scaled,
int size,
int scale_idx,
1357 int cb,
const float lambda,
const float uplim,
1359 get_band_numbits_ZERO_mips,
1360 get_band_numbits_SQUAD_mips,
1361 get_band_numbits_SQUAD_mips,
1362 get_band_numbits_UQUAD_mips,
1363 get_band_numbits_UQUAD_mips,
1364 get_band_numbits_SPAIR_mips,
1365 get_band_numbits_SPAIR_mips,
1366 get_band_numbits_UPAIR7_mips,
1367 get_band_numbits_UPAIR7_mips,
1368 get_band_numbits_UPAIR12_mips,
1369 get_band_numbits_UPAIR12_mips,
1370 get_band_numbits_ESC_mips,
1371 get_band_numbits_NONE_mips,
1372 get_band_numbits_ZERO_mips,
1373 get_band_numbits_ZERO_mips,
1374 get_band_numbits_ZERO_mips,
1377 #define get_band_numbits( \
1378 s, pb, in, scaled, size, scale_idx, cb, \
1379 lambda, uplim, bits) \
1380 get_band_numbits_arr[cb]( \
1381 s, pb, in, scaled, size, scale_idx, cb, \
1382 lambda, uplim, bits)
1384 static float quantize_band_cost_bits(
struct AACEncContext *s,
const float *in,
1385 const float *scaled,
int size,
int scale_idx,
1386 int cb,
const float lambda,
const float uplim,
1389 return get_band_numbits(s,
NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1396 static float get_band_cost_ZERO_mips(
struct AACEncContext *s,
1398 const float *scaled,
int size,
int scale_idx,
1399 int cb,
const float lambda,
const float uplim,
1405 for (i = 0; i <
size; i += 4) {
1406 cost += in[i ] * in[i ];
1407 cost += in[i+1] * in[i+1];
1408 cost += in[i+2] * in[i+2];
1409 cost += in[i+3] * in[i+3];
1416 static float get_band_cost_NONE_mips(
struct AACEncContext *s,
1418 const float *scaled,
int size,
int scale_idx,
1419 int cb,
const float lambda,
const float uplim,
1426 static float get_band_cost_SQUAD_mips(
struct AACEncContext *s,
1428 const float *scaled,
int size,
int scale_idx,
1429 int cb,
const float lambda,
const float uplim,
1436 int qc1, qc2, qc3, qc4;
1442 for (i = 0; i <
size; i += 4) {
1445 int *in_int = (
int *)&in[i];
1446 float *in_pos = (
float *)&in[i];
1447 float di0, di1, di2, di3;
1457 ".set noreorder \n\t"
1459 "slt %[qc1], $zero, %[qc1] \n\t"
1460 "slt %[qc2], $zero, %[qc2] \n\t"
1461 "slt %[qc3], $zero, %[qc3] \n\t"
1462 "slt %[qc4], $zero, %[qc4] \n\t"
1463 "lw %[t0], 0(%[in_int]) \n\t"
1464 "lw %[t1], 4(%[in_int]) \n\t"
1465 "lw %[t2], 8(%[in_int]) \n\t"
1466 "lw %[t3], 12(%[in_int]) \n\t"
1467 "srl %[t0], %[t0], 31 \n\t"
1468 "srl %[t1], %[t1], 31 \n\t"
1469 "srl %[t2], %[t2], 31 \n\t"
1470 "srl %[t3], %[t3], 31 \n\t"
1471 "subu %[t4], $zero, %[qc1] \n\t"
1472 "subu %[t5], $zero, %[qc2] \n\t"
1473 "subu %[t6], $zero, %[qc3] \n\t"
1474 "subu %[t7], $zero, %[qc4] \n\t"
1475 "movn %[qc1], %[t4], %[t0] \n\t"
1476 "movn %[qc2], %[t5], %[t1] \n\t"
1477 "movn %[qc3], %[t6], %[t2] \n\t"
1478 "movn %[qc4], %[t7], %[t3] \n\t"
1482 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1483 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1484 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1485 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1486 : [in_int]
"r"(in_int)
1499 curbits += p_bits[curidx];
1500 vec = &p_codes[curidx*4];
1504 ".set noreorder \n\t"
1506 "lwc1 $f0, 0(%[in_pos]) \n\t"
1507 "lwc1 $f1, 0(%[vec]) \n\t"
1508 "lwc1 $f2, 4(%[in_pos]) \n\t"
1509 "lwc1 $f3, 4(%[vec]) \n\t"
1510 "lwc1 $f4, 8(%[in_pos]) \n\t"
1511 "lwc1 $f5, 8(%[vec]) \n\t"
1512 "lwc1 $f6, 12(%[in_pos]) \n\t"
1513 "lwc1 $f7, 12(%[vec]) \n\t"
1514 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1515 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1516 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1517 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1521 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1522 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1523 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1525 :
"$f0",
"$f1",
"$f2",
"$f3",
1526 "$f4",
"$f5",
"$f6",
"$f7",
1530 cost += di0 * di0 + di1 * di1
1531 + di2 * di2 + di3 * di3;
1536 return cost * lambda + curbits;
1539 static float get_band_cost_UQUAD_mips(
struct AACEncContext *s,
1541 const float *scaled,
int size,
int scale_idx,
1542 int cb,
const float lambda,
const float uplim,
1550 int qc1, qc2, qc3, qc4;
1555 for (i = 0; i <
size; i += 4) {
1558 float *in_pos = (
float *)&in[i];
1559 float di0, di1, di2, di3;
1569 ".set noreorder \n\t"
1571 "ori %[t4], $zero, 2 \n\t"
1572 "slt %[t0], %[t4], %[qc1] \n\t"
1573 "slt %[t1], %[t4], %[qc2] \n\t"
1574 "slt %[t2], %[t4], %[qc3] \n\t"
1575 "slt %[t3], %[t4], %[qc4] \n\t"
1576 "movn %[qc1], %[t4], %[t0] \n\t"
1577 "movn %[qc2], %[t4], %[t1] \n\t"
1578 "movn %[qc3], %[t4], %[t2] \n\t"
1579 "movn %[qc4], %[t4], %[t3] \n\t"
1583 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1584 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1585 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1597 curbits += p_bits[curidx];
1598 curbits += uquad_sign_bits[curidx];
1599 vec = &p_codes[curidx*4];
1603 ".set noreorder \n\t"
1605 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1606 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1607 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1608 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1609 "abs.s %[di0], %[di0] \n\t"
1610 "abs.s %[di1], %[di1] \n\t"
1611 "abs.s %[di2], %[di2] \n\t"
1612 "abs.s %[di3], %[di3] \n\t"
1613 "lwc1 $f0, 0(%[vec]) \n\t"
1614 "lwc1 $f1, 4(%[vec]) \n\t"
1615 "lwc1 $f2, 8(%[vec]) \n\t"
1616 "lwc1 $f3, 12(%[vec]) \n\t"
1617 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1618 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1619 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1620 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1624 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1625 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1626 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1628 :
"$f0",
"$f1",
"$f2",
"$f3",
1632 cost += di0 * di0 + di1 * di1
1633 + di2 * di2 + di3 * di3;
1638 return cost * lambda + curbits;
1641 static float get_band_cost_SPAIR_mips(
struct AACEncContext *s,
1643 const float *scaled,
int size,
int scale_idx,
1644 int cb,
const float lambda,
const float uplim,
1651 int qc1, qc2, qc3, qc4;
1657 for (i = 0; i <
size; i += 4) {
1658 const float *vec, *vec2;
1659 int curidx, curidx2;
1660 int *in_int = (
int *)&in[i];
1661 float *in_pos = (
float *)&in[i];
1662 float di0, di1, di2, di3;
1672 ".set noreorder \n\t"
1674 "ori %[t4], $zero, 4 \n\t"
1675 "slt %[t0], %[t4], %[qc1] \n\t"
1676 "slt %[t1], %[t4], %[qc2] \n\t"
1677 "slt %[t2], %[t4], %[qc3] \n\t"
1678 "slt %[t3], %[t4], %[qc4] \n\t"
1679 "movn %[qc1], %[t4], %[t0] \n\t"
1680 "movn %[qc2], %[t4], %[t1] \n\t"
1681 "movn %[qc3], %[t4], %[t2] \n\t"
1682 "movn %[qc4], %[t4], %[t3] \n\t"
1683 "lw %[t0], 0(%[in_int]) \n\t"
1684 "lw %[t1], 4(%[in_int]) \n\t"
1685 "lw %[t2], 8(%[in_int]) \n\t"
1686 "lw %[t3], 12(%[in_int]) \n\t"
1687 "srl %[t0], %[t0], 31 \n\t"
1688 "srl %[t1], %[t1], 31 \n\t"
1689 "srl %[t2], %[t2], 31 \n\t"
1690 "srl %[t3], %[t3], 31 \n\t"
1691 "subu %[t4], $zero, %[qc1] \n\t"
1692 "subu %[t5], $zero, %[qc2] \n\t"
1693 "subu %[t6], $zero, %[qc3] \n\t"
1694 "subu %[t7], $zero, %[qc4] \n\t"
1695 "movn %[qc1], %[t4], %[t0] \n\t"
1696 "movn %[qc2], %[t5], %[t1] \n\t"
1697 "movn %[qc3], %[t6], %[t2] \n\t"
1698 "movn %[qc4], %[t7], %[t3] \n\t"
1702 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1703 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1704 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1705 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1706 : [in_int]
"r"(in_int)
1714 curidx2 += qc4 + 40;
1716 curbits += p_bits[curidx];
1717 curbits += p_bits[curidx2];
1719 vec = &p_codes[curidx*2];
1720 vec2 = &p_codes[curidx2*2];
1724 ".set noreorder \n\t"
1726 "lwc1 $f0, 0(%[in_pos]) \n\t"
1727 "lwc1 $f1, 0(%[vec]) \n\t"
1728 "lwc1 $f2, 4(%[in_pos]) \n\t"
1729 "lwc1 $f3, 4(%[vec]) \n\t"
1730 "lwc1 $f4, 8(%[in_pos]) \n\t"
1731 "lwc1 $f5, 0(%[vec2]) \n\t"
1732 "lwc1 $f6, 12(%[in_pos]) \n\t"
1733 "lwc1 $f7, 4(%[vec2]) \n\t"
1734 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1735 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1736 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1737 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1741 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1742 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1743 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1744 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1745 :
"$f0",
"$f1",
"$f2",
"$f3",
1746 "$f4",
"$f5",
"$f6",
"$f7",
1750 cost += di0 * di0 + di1 * di1
1751 + di2 * di2 + di3 * di3;
1756 return cost * lambda + curbits;
1759 static float get_band_cost_UPAIR7_mips(
struct AACEncContext *s,
1761 const float *scaled,
int size,
int scale_idx,
1762 int cb,
const float lambda,
const float uplim,
1769 int qc1, qc2, qc3, qc4;
1775 for (i = 0; i <
size; i += 4) {
1776 const float *vec, *vec2;
1777 int curidx, curidx2, sign1, count1, sign2, count2;
1778 int *in_int = (
int *)&in[i];
1779 float *in_pos = (
float *)&in[i];
1780 float di0, di1, di2, di3;
1790 ".set noreorder \n\t"
1792 "ori %[t4], $zero, 7 \n\t"
1793 "ori %[sign1], $zero, 0 \n\t"
1794 "ori %[sign2], $zero, 0 \n\t"
1795 "slt %[t0], %[t4], %[qc1] \n\t"
1796 "slt %[t1], %[t4], %[qc2] \n\t"
1797 "slt %[t2], %[t4], %[qc3] \n\t"
1798 "slt %[t3], %[t4], %[qc4] \n\t"
1799 "movn %[qc1], %[t4], %[t0] \n\t"
1800 "movn %[qc2], %[t4], %[t1] \n\t"
1801 "movn %[qc3], %[t4], %[t2] \n\t"
1802 "movn %[qc4], %[t4], %[t3] \n\t"
1803 "lw %[t0], 0(%[in_int]) \n\t"
1804 "lw %[t1], 4(%[in_int]) \n\t"
1805 "lw %[t2], 8(%[in_int]) \n\t"
1806 "lw %[t3], 12(%[in_int]) \n\t"
1807 "slt %[t0], %[t0], $zero \n\t"
1808 "movn %[sign1], %[t0], %[qc1] \n\t"
1809 "slt %[t2], %[t2], $zero \n\t"
1810 "movn %[sign2], %[t2], %[qc3] \n\t"
1811 "slt %[t1], %[t1], $zero \n\t"
1812 "sll %[t0], %[sign1], 1 \n\t"
1813 "or %[t0], %[t0], %[t1] \n\t"
1814 "movn %[sign1], %[t0], %[qc2] \n\t"
1815 "slt %[t3], %[t3], $zero \n\t"
1816 "sll %[t0], %[sign2], 1 \n\t"
1817 "or %[t0], %[t0], %[t3] \n\t"
1818 "movn %[sign2], %[t0], %[qc4] \n\t"
1819 "slt %[count1], $zero, %[qc1] \n\t"
1820 "slt %[t1], $zero, %[qc2] \n\t"
1821 "slt %[count2], $zero, %[qc3] \n\t"
1822 "slt %[t2], $zero, %[qc4] \n\t"
1823 "addu %[count1], %[count1], %[t1] \n\t"
1824 "addu %[count2], %[count2], %[t2] \n\t"
1828 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1829 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1830 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
1831 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
1832 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1834 : [in_int]
"r"(in_int)
1844 curbits += p_bits[curidx];
1845 curbits += upair7_sign_bits[curidx];
1846 vec = &p_codes[curidx*2];
1848 curbits += p_bits[curidx2];
1849 curbits += upair7_sign_bits[curidx2];
1850 vec2 = &p_codes[curidx2*2];
1854 ".set noreorder \n\t"
1856 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1857 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1858 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1859 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1860 "abs.s %[di0], %[di0] \n\t"
1861 "abs.s %[di1], %[di1] \n\t"
1862 "abs.s %[di2], %[di2] \n\t"
1863 "abs.s %[di3], %[di3] \n\t"
1864 "lwc1 $f0, 0(%[vec]) \n\t"
1865 "lwc1 $f1, 4(%[vec]) \n\t"
1866 "lwc1 $f2, 0(%[vec2]) \n\t"
1867 "lwc1 $f3, 4(%[vec2]) \n\t"
1868 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1869 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1870 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1871 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1875 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1876 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1877 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1878 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1879 :
"$f0",
"$f1",
"$f2",
"$f3",
1883 cost += di0 * di0 + di1 * di1
1884 + di2 * di2 + di3 * di3;
1889 return cost * lambda + curbits;
1892 static float get_band_cost_UPAIR12_mips(
struct AACEncContext *s,
1894 const float *scaled,
int size,
int scale_idx,
1895 int cb,
const float lambda,
const float uplim,
1902 int qc1, qc2, qc3, qc4;
1908 for (i = 0; i <
size; i += 4) {
1909 const float *vec, *vec2;
1910 int curidx, curidx2;
1911 int sign1, count1, sign2, count2;
1912 int *in_int = (
int *)&in[i];
1913 float *in_pos = (
float *)&in[i];
1914 float di0, di1, di2, di3;
1924 ".set noreorder \n\t"
1926 "ori %[t4], $zero, 12 \n\t"
1927 "ori %[sign1], $zero, 0 \n\t"
1928 "ori %[sign2], $zero, 0 \n\t"
1929 "slt %[t0], %[t4], %[qc1] \n\t"
1930 "slt %[t1], %[t4], %[qc2] \n\t"
1931 "slt %[t2], %[t4], %[qc3] \n\t"
1932 "slt %[t3], %[t4], %[qc4] \n\t"
1933 "movn %[qc1], %[t4], %[t0] \n\t"
1934 "movn %[qc2], %[t4], %[t1] \n\t"
1935 "movn %[qc3], %[t4], %[t2] \n\t"
1936 "movn %[qc4], %[t4], %[t3] \n\t"
1937 "lw %[t0], 0(%[in_int]) \n\t"
1938 "lw %[t1], 4(%[in_int]) \n\t"
1939 "lw %[t2], 8(%[in_int]) \n\t"
1940 "lw %[t3], 12(%[in_int]) \n\t"
1941 "slt %[t0], %[t0], $zero \n\t"
1942 "movn %[sign1], %[t0], %[qc1] \n\t"
1943 "slt %[t2], %[t2], $zero \n\t"
1944 "movn %[sign2], %[t2], %[qc3] \n\t"
1945 "slt %[t1], %[t1], $zero \n\t"
1946 "sll %[t0], %[sign1], 1 \n\t"
1947 "or %[t0], %[t0], %[t1] \n\t"
1948 "movn %[sign1], %[t0], %[qc2] \n\t"
1949 "slt %[t3], %[t3], $zero \n\t"
1950 "sll %[t0], %[sign2], 1 \n\t"
1951 "or %[t0], %[t0], %[t3] \n\t"
1952 "movn %[sign2], %[t0], %[qc4] \n\t"
1953 "slt %[count1], $zero, %[qc1] \n\t"
1954 "slt %[t1], $zero, %[qc2] \n\t"
1955 "slt %[count2], $zero, %[qc3] \n\t"
1956 "slt %[t2], $zero, %[qc4] \n\t"
1957 "addu %[count1], %[count1], %[t1] \n\t"
1958 "addu %[count2], %[count2], %[t2] \n\t"
1962 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1963 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1964 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
1965 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
1966 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1968 : [in_int]
"r"(in_int)
1978 curbits += p_bits[curidx];
1979 curbits += p_bits[curidx2];
1980 curbits += upair12_sign_bits[curidx];
1981 curbits += upair12_sign_bits[curidx2];
1982 vec = &p_codes[curidx*2];
1983 vec2 = &p_codes[curidx2*2];
1987 ".set noreorder \n\t"
1989 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1990 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1991 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1992 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1993 "abs.s %[di0], %[di0] \n\t"
1994 "abs.s %[di1], %[di1] \n\t"
1995 "abs.s %[di2], %[di2] \n\t"
1996 "abs.s %[di3], %[di3] \n\t"
1997 "lwc1 $f0, 0(%[vec]) \n\t"
1998 "lwc1 $f1, 4(%[vec]) \n\t"
1999 "lwc1 $f2, 0(%[vec2]) \n\t"
2000 "lwc1 $f3, 4(%[vec2]) \n\t"
2001 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2002 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2003 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2004 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2008 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
2009 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
2010 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
2011 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
2012 :
"$f0",
"$f1",
"$f2",
"$f3",
2016 cost += di0 * di0 + di1 * di1
2017 + di2 * di2 + di3 * di3;
2022 return cost * lambda + curbits;
2025 static float get_band_cost_ESC_mips(
struct AACEncContext *s,
2027 const float *scaled,
int size,
int scale_idx,
2028 int cb,
const float lambda,
const float uplim,
2033 const float CLIPPED_ESCAPE = 165140.0f * IQ;
2036 int qc1, qc2, qc3, qc4;
2042 for (i = 0; i <
size; i += 4) {
2043 const float *vec, *vec2;
2044 int curidx, curidx2;
2046 float di1, di2, di3, di4;
2047 int cond0, cond1, cond2, cond3;
2058 ".set noreorder \n\t"
2060 "ori %[t6], $zero, 15 \n\t"
2061 "ori %[t7], $zero, 16 \n\t"
2062 "shll_s.w %[c1], %[qc1], 18 \n\t"
2063 "shll_s.w %[c2], %[qc2], 18 \n\t"
2064 "shll_s.w %[c3], %[qc3], 18 \n\t"
2065 "shll_s.w %[c4], %[qc4], 18 \n\t"
2066 "srl %[c1], %[c1], 18 \n\t"
2067 "srl %[c2], %[c2], 18 \n\t"
2068 "srl %[c3], %[c3], 18 \n\t"
2069 "srl %[c4], %[c4], 18 \n\t"
2070 "slt %[cond0], %[t6], %[qc1] \n\t"
2071 "slt %[cond1], %[t6], %[qc2] \n\t"
2072 "slt %[cond2], %[t6], %[qc3] \n\t"
2073 "slt %[cond3], %[t6], %[qc4] \n\t"
2074 "movn %[qc1], %[t7], %[cond0] \n\t"
2075 "movn %[qc2], %[t7], %[cond1] \n\t"
2076 "movn %[qc3], %[t7], %[cond2] \n\t"
2077 "movn %[qc4], %[t7], %[cond3] \n\t"
2081 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
2082 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
2083 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
2084 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
2085 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
2086 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
2087 [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
2096 curbits += p_bits[curidx];
2097 curbits += esc_sign_bits[curidx];
2098 vec = &p_codes[curidx*2];
2100 curbits += p_bits[curidx2];
2101 curbits += esc_sign_bits[curidx2];
2102 vec2 = &p_codes[curidx2*2];
2104 curbits += (
av_log2(c1) * 2 - 3) & (-cond0);
2105 curbits += (
av_log2(c2) * 2 - 3) & (-cond1);
2106 curbits += (
av_log2(c3) * 2 - 3) & (-cond2);
2107 curbits += (
av_log2(c4) * 2 - 3) & (-cond3);
2110 t2 = fabsf(in[i+1]);
2111 t3 = fabsf(in[i+2]);
2112 t4 = fabsf(in[i+3]);
2115 if (t1 >= CLIPPED_ESCAPE) {
2116 di1 = t1 - CLIPPED_ESCAPE;
2118 di1 = t1 - c1 *
cbrtf(c1) * IQ;
2121 di1 = t1 - vec[0] * IQ;
2124 if (t2 >= CLIPPED_ESCAPE) {
2125 di2 = t2 - CLIPPED_ESCAPE;
2127 di2 = t2 - c2 *
cbrtf(c2) * IQ;
2130 di2 = t2 - vec[1] * IQ;
2133 if (t3 >= CLIPPED_ESCAPE) {
2134 di3 = t3 - CLIPPED_ESCAPE;
2136 di3 = t3 - c3 *
cbrtf(c3) * IQ;
2139 di3 = t3 - vec2[0] * IQ;
2142 if (t4 >= CLIPPED_ESCAPE) {
2143 di4 = t4 - CLIPPED_ESCAPE;
2145 di4 = t4 - c4 *
cbrtf(c4) * IQ;
2148 di4 = t4 - vec2[1]*IQ;
2150 cost += di1 * di1 + di2 * di2
2151 + di3 * di3 + di4 * di4;
2156 return cost * lambda + curbits;
2159 static float (*
const get_band_cost_arr[])(
struct AACEncContext *
s,
2161 const float *scaled,
int size,
int scale_idx,
2162 int cb,
const float lambda,
const float uplim,
2164 get_band_cost_ZERO_mips,
2165 get_band_cost_SQUAD_mips,
2166 get_band_cost_SQUAD_mips,
2167 get_band_cost_UQUAD_mips,
2168 get_band_cost_UQUAD_mips,
2169 get_band_cost_SPAIR_mips,
2170 get_band_cost_SPAIR_mips,
2171 get_band_cost_UPAIR7_mips,
2172 get_band_cost_UPAIR7_mips,
2173 get_band_cost_UPAIR12_mips,
2174 get_band_cost_UPAIR12_mips,
2175 get_band_cost_ESC_mips,
2176 get_band_cost_NONE_mips,
2177 get_band_cost_ZERO_mips,
2178 get_band_cost_ZERO_mips,
2179 get_band_cost_ZERO_mips,
2182 #define get_band_cost( \
2183 s, pb, in, scaled, size, scale_idx, cb, \
2184 lambda, uplim, bits) \
2185 get_band_cost_arr[cb]( \
2186 s, pb, in, scaled, size, scale_idx, cb, \
2187 lambda, uplim, bits)
2190 const float *scaled,
int size,
int scale_idx,
2191 int cb,
const float lambda,
const float uplim,
2194 return get_band_cost(s,
NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
2197 static void search_for_quantizers_twoloop_mips(
AVCodecContext *avctx,
2202 int start = 0, i, w, w2,
g;
2204 float dists[128] = { 0 }, uplims[128];
2206 int fflag, minscaler;
2211 destbits =
FFMIN(destbits, 5800);
2220 sce->
zeroes[(w+w2)*16+g] = 1;
2225 uplims[w*16+
g] = uplim *512;
2228 minthr =
FFMIN(minthr, uplim);
2234 if (sce->
zeroes[w*16+g]) {
2257 minscaler = sce->
sf_idx[0];
2258 qstep = its ? 1 : 32;
2273 if (sce->
zeroes[w*16+g] || sce->
sf_idx[w*16+g] >= 218) {
2277 minscaler =
FFMIN(minscaler, sce->
sf_idx[w*16+g]);
2281 bits += quantize_band_cost_bits(s, coefs + w2*128,
2309 if (sce->
zeroes[w*16+g] || sce->
sf_idx[w*16+g] >= 218) {
2313 minscaler =
FFMIN(minscaler, sce->
sf_idx[w*16+g]);
2327 dists[w*16+
g] = dist -
bits;
2337 if (tbits > destbits) {
2338 for (i = 0; i < 128; i++)
2339 if (sce->
sf_idx[i] < 218 - qstep)
2342 for (i = 0; i < 128; i++)
2343 if (sce->
sf_idx[i] > 60 - qstep)
2347 if (!qstep && tbits > destbits*1.02 && sce->
sf_idx[0] < 217)
2355 int prevsc = sce->
sf_idx[w*16+
g];
2356 if (dists[w*16+g] > uplims[w*16+g] && sce->
sf_idx[w*16+g] > 60) {
2364 if (sce->
sf_idx[w*16+g] != prevsc)
2370 }
while (fflag && its < 10);
2375 int start = 0, i, w, w2,
g;
2376 float M[128],
S[128];
2385 float dist1 = 0.0f, dist2 = 0.0f;
2392 M[i ] = (sce0->
coeffs[start+w2*128+i ]
2393 + sce1->
coeffs[start+w2*128+i ]) * 0.5;
2394 M[i+1] = (sce0->
coeffs[start+w2*128+i+1]
2395 + sce1->
coeffs[start+w2*128+i+1]) * 0.5;
2396 M[i+2] = (sce0->
coeffs[start+w2*128+i+2]
2397 + sce1->
coeffs[start+w2*128+i+2]) * 0.5;
2398 M[i+3] = (sce0->
coeffs[start+w2*128+i+3]
2399 + sce1->
coeffs[start+w2*128+i+3]) * 0.5;
2402 - sce1->
coeffs[start+w2*128+i ];
2404 - sce1->
coeffs[start+w2*128+i+1];
2406 - sce1->
coeffs[start+w2*128+i+2];
2408 - sce1->
coeffs[start+w2*128+i+3];
2417 sce0->
sf_idx[(w+w2)*16+g],
2423 sce1->
sf_idx[(w+w2)*16+g],
2429 sce0->
sf_idx[(w+w2)*16+g],
2435 sce1->
sf_idx[(w+w2)*16+g],
2439 cpe->
ms_mask[w*16+
g] = dist2 < dist1;
2448 int win,
int group_len,
const float lambda)
2455 const int run_esc = (1 <<
run_bits) - 1;
2456 int idx, ppos,
count;
2457 int stackrun[120], stackcb[120], stack_len;
2463 for (cb = 0; cb < 12; cb++) {
2464 path[0][
cb].
cost = run_bits+4;
2466 path[0][
cb].
run = 0;
2468 for (swb = 0; swb < max_sfb; swb++) {
2470 if (sce->
zeroes[win*16 + swb]) {
2471 float cost_stay_here = path[swb][0].
cost;
2472 float cost_get_here = next_minbits + run_bits + 4;
2476 if (cost_get_here < cost_stay_here) {
2477 path[swb+1][0].
prev_idx = next_mincb;
2478 path[swb+1][0].
cost = cost_get_here;
2479 path[swb+1][0].
run = 1;
2482 path[swb+1][0].
cost = cost_stay_here;
2483 path[swb+1][0].
run = path[swb][0].
run + 1;
2485 next_minbits = path[swb+1][0].
cost;
2487 for (cb = 1; cb < 12; cb++) {
2488 path[swb+1][
cb].
cost = 61450;
2490 path[swb+1][
cb].
run = 0;
2493 float minbits = next_minbits;
2494 int mincb = next_mincb;
2495 int startcb = sce->
band_type[win*16+swb];
2498 for (cb = 0; cb < startcb; cb++) {
2499 path[swb+1][
cb].
cost = 61450;
2501 path[swb+1][
cb].
run = 0;
2503 for (cb = startcb; cb < 12; cb++) {
2504 float cost_stay_here, cost_get_here;
2506 for (w = 0; w < group_len; w++) {
2507 bits += quantize_band_cost_bits(s, sce->
coeffs + start + w*128,
2508 s->
scoefs + start + w*128, size,
2509 sce->
sf_idx[(win+w)*16+swb], cb,
2513 cost_get_here = minbits + bits + run_bits + 4;
2517 if (cost_get_here < cost_stay_here) {
2519 path[swb+1][
cb].
cost = cost_get_here;
2520 path[swb+1][
cb].
run = 1;
2523 path[swb+1][
cb].
cost = cost_stay_here;
2526 if (path[swb+1][cb].cost < next_minbits) {
2527 next_minbits = path[swb+1][
cb].
cost;
2537 for (cb = 1; cb < 12; cb++)
2538 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
2544 stackrun[stack_len] = path[ppos][
cb].
run;
2545 stackcb [stack_len] =
cb;
2547 ppos -= path[ppos][
cb].
run;
2552 for (i = stack_len - 1; i >= 0; i--) {
2554 count = stackrun[i];
2555 memset(sce->
zeroes + win*16 + start, !stackcb[i], count);
2556 for (j = 0; j <
count; j++) {
2560 while (count >= run_esc) {
static const uint8_t *const run_value_bits[2]
static void abs_pow34_v(float *out, const float *in, const int size)
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
#define quantize_and_encode_band_cost(s, pb, in, quant, scaled, size, scale_idx, cb, lambda, uplim, bits, rtz)
FFPsyBand psy_bands[PSY_MAX_BANDS]
channel bands information
#define SCALE_MAX_DIFF
maximum scalefactor difference allowed by standard
AACCoefficientsEncoder * coder
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
int prev_idx
pointer to the previous path point
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
#define av_assert0(cond)
assert() equivalent, that is always enabled.
static double cb(void *priv, double x, double y)
AACEncOptions options
encoding options
SingleChannelElement ch[2]
float ff_aac_pow34sf_tab[428]
const uint8_t ff_aac_scalefactor_bits[121]
static const uint8_t run_bits[7][16]
void ff_aac_coder_init_mips(AACEncContext *c)
single band psychoacoustic information
float coeffs[1024]
coefficients for IMDCT, maybe processed
static const uint8_t run_value_bits_short[16]
bits needed to code codebook run value for short windows
uint8_t max_sfb
number of scalefactor bands per group
int num_swb
number of scalefactor window bands
Libavcodec external API header.
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
const float *const ff_aac_codebook_vectors[]
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
int bit_rate
the average bitrate
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
const uint8_t *const ff_aac_spectral_bits[11]
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
AAC definitions and structures.
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
static int find_min_book(float maxval, int sf)
int sample_rate
samples per second
main external API structure.
IndividualChannelStream ics
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
static av_always_inline float cbrtf(float x)
structure used in optimal codebook search
Replacements for frequently missing libm functions.
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
static float(*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits)
uint8_t zeroes[128]
band is not coded (used by encoder)
int sf_idx[128]
scalefactor indices (used by encoder)
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Single Channel Element - used for both SCE and LFE elements.
ChannelElement * cpe
channel elements
float ff_aac_pow2sf_tab[428]
channel element - generic struct for SCE/CPE/CCE/LFE
const uint16_t *const ff_aac_spectral_codes[11]
int channels
number of audio channels
static const uint8_t run_value_bits_long[64]
bits needed to code codebook run value for long windows
FFPsyChannel * ch
single channel information
enum BandType band_type[128]
band types
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> out
static float find_max_val(int group_len, int swb_size, const float *scaled)
float scoefs[1024]
scaled coefficients
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, int rtz)