doxygen/2.8/aaccoder__mips_8c_source.html

 /*

  * Copyright (c) 2012

  *      MIPS Technologies, Inc., California.

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions

  * are met:

  * 1. Redistributions of source code must retain the above copyright

  *    notice, this list of conditions and the following disclaimer.

  * 2. Redistributions in binary form must reproduce the above copyright

  *    notice, this list of conditions and the following disclaimer in the

  *    documentation and/or other materials provided with the distribution.

  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its

  *    contributors may be used to endorse or promote products derived from

  *    this software without specific prior written permission.

  *

  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND

  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE

  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

  * SUCH DAMAGE.

  *

  * Author:  Stanislav Ocovaj (socovaj@mips.com)

  *          Szabolcs Pal     (sabolc@mips.com)

  *

  * AAC coefficients encoder optimized for MIPS floating-point architecture

  *

  * This file is part of FFmpeg.

  *

  * FFmpeg is free software; you can redistribute it and/or

  * modify it under the terms of the GNU Lesser General Public

  * License as published by the Free Software Foundation; either

  * version 2.1 of the License, or (at your option) any later version.

  *

  * FFmpeg is distributed in the hope that it will be useful,

  * but WITHOUT ANY WARRANTY; without even the implied warranty of

  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

  * Lesser General Public License for more details.

  *

  * You should have received a copy of the GNU Lesser General Public

  * License along with FFmpeg; if not, write to the Free Software

  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

  */


 /**

  * @file

  * Reference: libavcodec/aaccoder.c

  */


 #include "libavutil/libm.h"


 #include <float.h>

 #include "libavutil/mathematics.h"

 #include "libavcodec/avcodec.h"

 #include "libavcodec/put_bits.h"

 #include "libavcodec/aac.h"

 #include "libavcodec/aacenc.h"

 #include "libavcodec/aactab.h"


 #if HAVE_INLINE_ASM

 typedef struct BandCodingPath {

     int prev_idx;

     float cost;

     int run;

 } BandCodingPath;


 static const uint8_t run_value_bits_long[64] = {

      5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,

      5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,

     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,

     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15

 };


 static const uint8_t run_value_bits_short[16] = {

     3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9

 };


 static const uint8_t * const run_value_bits[2] = {

     run_value_bits_long, run_value_bits_short

 };


 static const uint8_t uquad_sign_bits[81] = {

     0, 1, 1, 1, 2, 2, 1, 2, 2,

     1, 2, 2, 2, 3, 3, 2, 3, 3,

     1, 2, 2, 2, 3, 3, 2, 3, 3,

     1, 2, 2, 2, 3, 3, 2, 3, 3,

     2, 3, 3, 3, 4, 4, 3, 4, 4,

     2, 3, 3, 3, 4, 4, 3, 4, 4,

     1, 2, 2, 2, 3, 3, 2, 3, 3,

     2, 3, 3, 3, 4, 4, 3, 4, 4,

     2, 3, 3, 3, 4, 4, 3, 4, 4

 };


 static const uint8_t upair7_sign_bits[64] = {

     0, 1, 1, 1, 1, 1, 1, 1,

     1, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2,

 };


 static const uint8_t upair12_sign_bits[169] = {

     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2

 };


 static const uint8_t esc_sign_bits[289] = {

     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2

 };


 #define ROUND_STANDARD 0.4054f

 #define ROUND_TO_ZERO 0.1054f


 static void abs_pow34_v(float *out, const float *in, const int size) {

 #ifndef USE_REALLY_FULL_SEARCH

     int i;

     float a, b, c, d;

     float ax, bx, cx, dx;


     for (i = 0; i < size; i += 4) {

         a = fabsf(in[i  ]);

         b = fabsf(in[i+1]);

         c = fabsf(in[i+2]);

         d = fabsf(in[i+3]);


         ax = sqrtf(a);

         bx = sqrtf(b);

         cx = sqrtf(c);

         dx = sqrtf(d);


         a = a * ax;

         b = b * bx;

         c = c * cx;

         d = d * dx;


         out[i  ] = sqrtf(a);

         out[i+1] = sqrtf(b);

         out[i+2] = sqrtf(c);

         out[i+3] = sqrtf(d);

     }

 #endif /* USE_REALLY_FULL_SEARCH */

 }


 static float find_max_val(int group_len, int swb_size, const float *scaled) {

     float maxval = 0.0f;

     int w2, i;

     for (w2 = 0; w2 < group_len; w2++) {

         for (i = 0; i < swb_size; i++) {

             maxval = FFMAX(maxval, scaled[w2*128+i]);

         }

     }

     return maxval;

 }


 static int find_min_book(float maxval, int sf) {

     float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];

     float Q34 = sqrtf(Q * sqrtf(Q));

     int qmaxval, cb;

     qmaxval = maxval * Q34 + 0.4054f;

     if      (qmaxval ==  0) cb = 0;

     else if (qmaxval ==  1) cb = 1;

     else if (qmaxval ==  2) cb = 3;

     else if (qmaxval <=  4) cb = 5;

     else if (qmaxval <=  7) cb = 7;

     else if (qmaxval <= 12) cb = 9;

     else                    cb = 11;

     return cb;

 }


 /**

  * Functions developed from template function and optimized for quantizing and encoding band

  */

 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,

                                                      PutBitContext *pb, const float *in, float *out,

                                                      const float *scaled, int size, int scale_idx,

                                                      int cb, const float lambda, const float uplim,

                                                      int *bits, const float ROUNDING)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;


     uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];

     uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];


     abs_pow34_v(s->scoefs, in, size);

     scaled = s->scoefs;

     for (i = 0; i < size; i += 4) {

         int curidx;

         int *in_int = (int *)&in[i];

         int t0, t1, t2, t3, t4, t5, t6, t7;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                      \n\t"

             ".set noreorder                 \n\t"


             "slt    %[qc1], $zero,  %[qc1]  \n\t"

             "slt    %[qc2], $zero,  %[qc2]  \n\t"

             "slt    %[qc3], $zero,  %[qc3]  \n\t"

             "slt    %[qc4], $zero,  %[qc4]  \n\t"

             "lw     %[t0],  0(%[in_int])    \n\t"

             "lw     %[t1],  4(%[in_int])    \n\t"

             "lw     %[t2],  8(%[in_int])    \n\t"

             "lw     %[t3],  12(%[in_int])   \n\t"

             "srl    %[t0],  %[t0],  31      \n\t"

             "srl    %[t1],  %[t1],  31      \n\t"

             "srl    %[t2],  %[t2],  31      \n\t"

             "srl    %[t3],  %[t3],  31      \n\t"

             "subu   %[t4],  $zero,  %[qc1]  \n\t"

             "subu   %[t5],  $zero,  %[qc2]  \n\t"

             "subu   %[t6],  $zero,  %[qc3]  \n\t"

             "subu   %[t7],  $zero,  %[qc4]  \n\t"

             "movn   %[qc1], %[t4],  %[t0]   \n\t"

             "movn   %[qc2], %[t5],  %[t1]   \n\t"

             "movn   %[qc3], %[t6],  %[t2]   \n\t"

             "movn   %[qc4], %[t7],  %[t3]   \n\t"


             ".set pop                       \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx = qc1;

         curidx *= 3;

         curidx += qc2;

         curidx *= 3;

         curidx += qc3;

         curidx *= 3;

         curidx += qc4;

         curidx += 40;


         put_bits(pb, p_bits[curidx], p_codes[curidx]);

     }

 }


 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,

                                                      PutBitContext *pb, const float *in, float *out,

                                                      const float *scaled, int size, int scale_idx,

                                                      int cb, const float lambda, const float uplim,

                                                      int *bits, const float ROUNDING)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;


     uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];

     uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];


     abs_pow34_v(s->scoefs, in, size);

     scaled = s->scoefs;

     for (i = 0; i < size; i += 4) {

         int curidx, sign, count;

         int *in_int = (int *)&in[i];

         uint8_t v_bits;

         unsigned int v_codes;

         int t0, t1, t2, t3, t4;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                              \n\t"

             ".set noreorder                         \n\t"


             "ori    %[t4],      $zero,      2       \n\t"

             "ori    %[sign],    $zero,      0       \n\t"

             "slt    %[t0],      %[t4],      %[qc1]  \n\t"

             "slt    %[t1],      %[t4],      %[qc2]  \n\t"

             "slt    %[t2],      %[t4],      %[qc3]  \n\t"

             "slt    %[t3],      %[t4],      %[qc4]  \n\t"

             "movn   %[qc1],     %[t4],      %[t0]   \n\t"

             "movn   %[qc2],     %[t4],      %[t1]   \n\t"

             "movn   %[qc3],     %[t4],      %[t2]   \n\t"

             "movn   %[qc4],     %[t4],      %[t3]   \n\t"

             "lw     %[t0],      0(%[in_int])        \n\t"

             "lw     %[t1],      4(%[in_int])        \n\t"

             "lw     %[t2],      8(%[in_int])        \n\t"

             "lw     %[t3],      12(%[in_int])       \n\t"

             "slt    %[t0],      %[t0],      $zero   \n\t"

             "movn   %[sign],    %[t0],      %[qc1]  \n\t"

             "slt    %[t1],      %[t1],      $zero   \n\t"

             "slt    %[t2],      %[t2],      $zero   \n\t"

             "slt    %[t3],      %[t3],      $zero   \n\t"

             "sll    %[t0],      %[sign],    1       \n\t"

             "or     %[t0],      %[t0],      %[t1]   \n\t"

             "movn   %[sign],    %[t0],      %[qc2]  \n\t"

             "slt    %[t4],      $zero,      %[qc1]  \n\t"

             "slt    %[t1],      $zero,      %[qc2]  \n\t"

             "slt    %[count],   $zero,      %[qc3]  \n\t"

             "sll    %[t0],      %[sign],    1       \n\t"

             "or     %[t0],      %[t0],      %[t2]   \n\t"

             "movn   %[sign],    %[t0],      %[qc3]  \n\t"

             "slt    %[t2],      $zero,      %[qc4]  \n\t"

             "addu   %[count],   %[count],   %[t4]   \n\t"

             "addu   %[count],   %[count],   %[t1]   \n\t"

             "sll    %[t0],      %[sign],    1       \n\t"

             "or     %[t0],      %[t0],      %[t3]   \n\t"

             "movn   %[sign],    %[t0],      %[qc4]  \n\t"

             "addu   %[count],   %[count],   %[t2]   \n\t"


             ".set pop                               \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [sign]"=&r"(sign), [count]"=&r"(count),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx = qc1;

         curidx *= 3;

         curidx += qc2;

         curidx *= 3;

         curidx += qc3;

         curidx *= 3;

         curidx += qc4;


         v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));

         v_bits  = p_bits[curidx] + count;

         put_bits(pb, v_bits, v_codes);

     }

 }


 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,

                                                      PutBitContext *pb, const float *in, float *out,

                                                      const float *scaled, int size, int scale_idx,

                                                      int cb, const float lambda, const float uplim,

                                                      int *bits, const float ROUNDING)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;


     uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];

     uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];


     abs_pow34_v(s->scoefs, in, size);

     scaled = s->scoefs;

     for (i = 0; i < size; i += 4) {

         int curidx, curidx2;

         int *in_int = (int *)&in[i];

         uint8_t v_bits;

         unsigned int v_codes;

         int t0, t1, t2, t3, t4, t5, t6, t7;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                      \n\t"

             ".set noreorder                 \n\t"


             "ori    %[t4],  $zero,  4       \n\t"

             "slt    %[t0],  %[t4],  %[qc1]  \n\t"

             "slt    %[t1],  %[t4],  %[qc2]  \n\t"

             "slt    %[t2],  %[t4],  %[qc3]  \n\t"

             "slt    %[t3],  %[t4],  %[qc4]  \n\t"

             "movn   %[qc1], %[t4],  %[t0]   \n\t"

             "movn   %[qc2], %[t4],  %[t1]   \n\t"

             "movn   %[qc3], %[t4],  %[t2]   \n\t"

             "movn   %[qc4], %[t4],  %[t3]   \n\t"

             "lw     %[t0],  0(%[in_int])    \n\t"

             "lw     %[t1],  4(%[in_int])    \n\t"

             "lw     %[t2],  8(%[in_int])    \n\t"

             "lw     %[t3],  12(%[in_int])   \n\t"

             "srl    %[t0],  %[t0],  31      \n\t"

             "srl    %[t1],  %[t1],  31      \n\t"

             "srl    %[t2],  %[t2],  31      \n\t"

             "srl    %[t3],  %[t3],  31      \n\t"

             "subu   %[t4],  $zero,  %[qc1]  \n\t"

             "subu   %[t5],  $zero,  %[qc2]  \n\t"

             "subu   %[t6],  $zero,  %[qc3]  \n\t"

             "subu   %[t7],  $zero,  %[qc4]  \n\t"

             "movn   %[qc1], %[t4],  %[t0]   \n\t"

             "movn   %[qc2], %[t5],  %[t1]   \n\t"

             "movn   %[qc3], %[t6],  %[t2]   \n\t"

             "movn   %[qc4], %[t7],  %[t3]   \n\t"


             ".set pop                       \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx = 9 * qc1;

         curidx += qc2 + 40;


         curidx2 = 9 * qc3;

         curidx2 += qc4 + 40;


         v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);

         v_bits  = p_bits[curidx] + p_bits[curidx2];

         put_bits(pb, v_bits, v_codes);

     }

 }


 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,

                                                       PutBitContext *pb, const float *in, float *out,

                                                       const float *scaled, int size, int scale_idx,

                                                       int cb, const float lambda, const float uplim,

                                                       int *bits, const float ROUNDING)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;


     uint8_t  *p_bits  = (uint8_t*) ff_aac_spectral_bits[cb-1];

     uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];


     abs_pow34_v(s->scoefs, in, size);

     scaled = s->scoefs;

     for (i = 0; i < size; i += 4) {

         int curidx, sign1, count1, sign2, count2;

         int *in_int = (int *)&in[i];

         uint8_t v_bits;

         unsigned int v_codes;

         int t0, t1, t2, t3, t4;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                              \n\t"

             ".set noreorder                         \n\t"


             "ori    %[t4],      $zero,      7       \n\t"

             "ori    %[sign1],   $zero,      0       \n\t"

             "ori    %[sign2],   $zero,      0       \n\t"

             "slt    %[t0],      %[t4],      %[qc1]  \n\t"

             "slt    %[t1],      %[t4],      %[qc2]  \n\t"

             "slt    %[t2],      %[t4],      %[qc3]  \n\t"

             "slt    %[t3],      %[t4],      %[qc4]  \n\t"

             "movn   %[qc1],     %[t4],      %[t0]   \n\t"

             "movn   %[qc2],     %[t4],      %[t1]   \n\t"

             "movn   %[qc3],     %[t4],      %[t2]   \n\t"

             "movn   %[qc4],     %[t4],      %[t3]   \n\t"

             "lw     %[t0],      0(%[in_int])        \n\t"

             "lw     %[t1],      4(%[in_int])        \n\t"

             "lw     %[t2],      8(%[in_int])        \n\t"

             "lw     %[t3],      12(%[in_int])       \n\t"

             "slt    %[t0],      %[t0],      $zero   \n\t"

             "movn   %[sign1],   %[t0],      %[qc1]  \n\t"

             "slt    %[t2],      %[t2],      $zero   \n\t"

             "movn   %[sign2],   %[t2],      %[qc3]  \n\t"

             "slt    %[t1],      %[t1],      $zero   \n\t"

             "sll    %[t0],      %[sign1],   1       \n\t"

             "or     %[t0],      %[t0],      %[t1]   \n\t"

             "movn   %[sign1],   %[t0],      %[qc2]  \n\t"

             "slt    %[t3],      %[t3],      $zero   \n\t"

             "sll    %[t0],      %[sign2],   1       \n\t"

             "or     %[t0],      %[t0],      %[t3]   \n\t"

             "movn   %[sign2],   %[t0],      %[qc4]  \n\t"

             "slt    %[count1],  $zero,      %[qc1]  \n\t"

             "slt    %[t1],      $zero,      %[qc2]  \n\t"

             "slt    %[count2],  $zero,      %[qc3]  \n\t"

             "slt    %[t2],      $zero,      %[qc4]  \n\t"

             "addu   %[count1],  %[count1],  %[t1]   \n\t"

             "addu   %[count2],  %[count2],  %[t2]   \n\t"


             ".set pop                               \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [sign1]"=&r"(sign1), [count1]"=&r"(count1),

               [sign2]"=&r"(sign2), [count2]"=&r"(count2),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4)

             : [in_int]"r"(in_int)

             : "t0", "t1", "t2", "t3", "t4",

               "memory"

         );


         curidx  = 8 * qc1;

         curidx += qc2;


         v_codes = (p_codes[curidx] << count1) | sign1;

         v_bits  = p_bits[curidx] + count1;

         put_bits(pb, v_bits, v_codes);


         curidx  = 8 * qc3;

         curidx += qc4;


         v_codes = (p_codes[curidx] << count2) | sign2;

         v_bits  = p_bits[curidx] + count2;

         put_bits(pb, v_bits, v_codes);

     }

 }


 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,

                                                        PutBitContext *pb, const float *in, float *out,

                                                        const float *scaled, int size, int scale_idx,

                                                        int cb, const float lambda, const float uplim,

                                                        int *bits, const float ROUNDING)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;


     uint8_t  *p_bits  = (uint8_t*) ff_aac_spectral_bits[cb-1];

     uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];


     abs_pow34_v(s->scoefs, in, size);

     scaled = s->scoefs;

     for (i = 0; i < size; i += 4) {

         int curidx, sign1, count1, sign2, count2;

         int *in_int = (int *)&in[i];

         uint8_t v_bits;

         unsigned int v_codes;

         int t0, t1, t2, t3, t4;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                              \n\t"

             ".set noreorder                         \n\t"


             "ori    %[t4],      $zero,      12      \n\t"

             "ori    %[sign1],   $zero,      0       \n\t"

             "ori    %[sign2],   $zero,      0       \n\t"

             "slt    %[t0],      %[t4],      %[qc1]  \n\t"

             "slt    %[t1],      %[t4],      %[qc2]  \n\t"

             "slt    %[t2],      %[t4],      %[qc3]  \n\t"

             "slt    %[t3],      %[t4],      %[qc4]  \n\t"

             "movn   %[qc1],     %[t4],      %[t0]   \n\t"

             "movn   %[qc2],     %[t4],      %[t1]   \n\t"

             "movn   %[qc3],     %[t4],      %[t2]   \n\t"

             "movn   %[qc4],     %[t4],      %[t3]   \n\t"

             "lw     %[t0],      0(%[in_int])        \n\t"

             "lw     %[t1],      4(%[in_int])        \n\t"

             "lw     %[t2],      8(%[in_int])        \n\t"

             "lw     %[t3],      12(%[in_int])       \n\t"

             "slt    %[t0],      %[t0],      $zero   \n\t"

             "movn   %[sign1],   %[t0],      %[qc1]  \n\t"

             "slt    %[t2],      %[t2],      $zero   \n\t"

             "movn   %[sign2],   %[t2],      %[qc3]  \n\t"

             "slt    %[t1],      %[t1],      $zero   \n\t"

             "sll    %[t0],      %[sign1],   1       \n\t"

             "or     %[t0],      %[t0],      %[t1]   \n\t"

             "movn   %[sign1],   %[t0],      %[qc2]  \n\t"

             "slt    %[t3],      %[t3],      $zero   \n\t"

             "sll    %[t0],      %[sign2],   1       \n\t"

             "or     %[t0],      %[t0],      %[t3]   \n\t"

             "movn   %[sign2],   %[t0],      %[qc4]  \n\t"

             "slt    %[count1],  $zero,      %[qc1]  \n\t"

             "slt    %[t1],      $zero,      %[qc2]  \n\t"

             "slt    %[count2],  $zero,      %[qc3]  \n\t"

             "slt    %[t2],      $zero,      %[qc4]  \n\t"

             "addu   %[count1],  %[count1],  %[t1]   \n\t"

             "addu   %[count2],  %[count2],  %[t2]   \n\t"


             ".set pop                               \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [sign1]"=&r"(sign1), [count1]"=&r"(count1),

               [sign2]"=&r"(sign2), [count2]"=&r"(count2),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx  = 13 * qc1;

         curidx += qc2;


         v_codes = (p_codes[curidx] << count1) | sign1;

         v_bits  = p_bits[curidx] + count1;

         put_bits(pb, v_bits, v_codes);


         curidx  = 13 * qc3;

         curidx += qc4;


         v_codes = (p_codes[curidx] << count2) | sign2;

         v_bits  = p_bits[curidx] + count2;

         put_bits(pb, v_bits, v_codes);

     }

 }


 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,

                                                    PutBitContext *pb, const float *in, float *out,

                                                    const float *scaled, int size, int scale_idx,

                                                    int cb, const float lambda, const float uplim,

                                                    int *bits, const float ROUNDING)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;


     uint8_t  *p_bits    = (uint8_t* )ff_aac_spectral_bits[cb-1];

     uint16_t *p_codes   = (uint16_t*)ff_aac_spectral_codes[cb-1];

     float    *p_vectors = (float*   )ff_aac_codebook_vectors[cb-1];


     abs_pow34_v(s->scoefs, in, size);

     scaled = s->scoefs;


     if (cb < 11) {

         for (i = 0; i < size; i += 4) {

             int curidx, curidx2, sign1, count1, sign2, count2;

             int *in_int = (int *)&in[i];

             uint8_t v_bits;

             unsigned int v_codes;

             int t0, t1, t2, t3, t4;


             qc1 = scaled[i  ] * Q34 + ROUNDING;

             qc2 = scaled[i+1] * Q34 + ROUNDING;

             qc3 = scaled[i+2] * Q34 + ROUNDING;

             qc4 = scaled[i+3] * Q34 + ROUNDING;


             __asm__ volatile (

                 ".set push                                  \n\t"

                 ".set noreorder                             \n\t"


                 "ori        %[t4],      $zero,      16      \n\t"

                 "ori        %[sign1],   $zero,      0       \n\t"

                 "ori        %[sign2],   $zero,      0       \n\t"

                 "slt        %[t0],      %[t4],      %[qc1]  \n\t"

                 "slt        %[t1],      %[t4],      %[qc2]  \n\t"

                 "slt        %[t2],      %[t4],      %[qc3]  \n\t"

                 "slt        %[t3],      %[t4],      %[qc4]  \n\t"

                 "movn       %[qc1],     %[t4],      %[t0]   \n\t"

                 "movn       %[qc2],     %[t4],      %[t1]   \n\t"

                 "movn       %[qc3],     %[t4],      %[t2]   \n\t"

                 "movn       %[qc4],     %[t4],      %[t3]   \n\t"

                 "lw         %[t0],      0(%[in_int])        \n\t"

                 "lw         %[t1],      4(%[in_int])        \n\t"

                 "lw         %[t2],      8(%[in_int])        \n\t"

                 "lw         %[t3],      12(%[in_int])       \n\t"

                 "slt        %[t0],      %[t0],      $zero   \n\t"

                 "movn       %[sign1],   %[t0],      %[qc1]  \n\t"

                 "slt        %[t2],      %[t2],      $zero   \n\t"

                 "movn       %[sign2],   %[t2],      %[qc3]  \n\t"

                 "slt        %[t1],      %[t1],      $zero   \n\t"

                 "sll        %[t0],      %[sign1],   1       \n\t"

                 "or         %[t0],      %[t0],      %[t1]   \n\t"

                 "movn       %[sign1],   %[t0],      %[qc2]  \n\t"

                 "slt        %[t3],      %[t3],      $zero   \n\t"

                 "sll        %[t0],      %[sign2],   1       \n\t"

                 "or         %[t0],      %[t0],      %[t3]   \n\t"

                 "movn       %[sign2],   %[t0],      %[qc4]  \n\t"

                 "slt        %[count1],  $zero,      %[qc1]  \n\t"

                 "slt        %[t1],      $zero,      %[qc2]  \n\t"

                 "slt        %[count2],  $zero,      %[qc3]  \n\t"

                 "slt        %[t2],      $zero,      %[qc4]  \n\t"

                 "addu       %[count1],  %[count1],  %[t1]   \n\t"

                 "addu       %[count2],  %[count2],  %[t2]   \n\t"


                 ".set pop                                   \n\t"


                 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

                   [qc3]"+r"(qc3), [qc4]"+r"(qc4),

                   [sign1]"=&r"(sign1), [count1]"=&r"(count1),

                   [sign2]"=&r"(sign2), [count2]"=&r"(count2),

                   [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

                   [t4]"=&r"(t4)

                 : [in_int]"r"(in_int)

                 : "memory"

             );


             curidx = 17 * qc1;

             curidx += qc2;

             curidx2 = 17 * qc3;

             curidx2 += qc4;


             v_codes = (p_codes[curidx] << count1) | sign1;

             v_bits  = p_bits[curidx] + count1;

             put_bits(pb, v_bits, v_codes);


             v_codes = (p_codes[curidx2] << count2) | sign2;

             v_bits  = p_bits[curidx2] + count2;

             put_bits(pb, v_bits, v_codes);

         }

     } else {

         for (i = 0; i < size; i += 4) {

             int curidx, curidx2, sign1, count1, sign2, count2;

             int *in_int = (int *)&in[i];

             uint8_t v_bits;

             unsigned int v_codes;

             int c1, c2, c3, c4;

             int t0, t1, t2, t3, t4;


             qc1 = scaled[i  ] * Q34 + ROUNDING;

             qc2 = scaled[i+1] * Q34 + ROUNDING;

             qc3 = scaled[i+2] * Q34 + ROUNDING;

             qc4 = scaled[i+3] * Q34 + ROUNDING;


             __asm__ volatile (

                 ".set push                                  \n\t"

                 ".set noreorder                             \n\t"


                 "ori        %[t4],      $zero,      16      \n\t"

                 "ori        %[sign1],   $zero,      0       \n\t"

                 "ori        %[sign2],   $zero,      0       \n\t"

                 "shll_s.w   %[c1],      %[qc1],     18      \n\t"

                 "shll_s.w   %[c2],      %[qc2],     18      \n\t"

                 "shll_s.w   %[c3],      %[qc3],     18      \n\t"

                 "shll_s.w   %[c4],      %[qc4],     18      \n\t"

                 "srl        %[c1],      %[c1],      18      \n\t"

                 "srl        %[c2],      %[c2],      18      \n\t"

                 "srl        %[c3],      %[c3],      18      \n\t"

                 "srl        %[c4],      %[c4],      18      \n\t"

                 "slt        %[t0],      %[t4],      %[qc1]  \n\t"

                 "slt        %[t1],      %[t4],      %[qc2]  \n\t"

                 "slt        %[t2],      %[t4],      %[qc3]  \n\t"

                 "slt        %[t3],      %[t4],      %[qc4]  \n\t"

                 "movn       %[qc1],     %[t4],      %[t0]   \n\t"

                 "movn       %[qc2],     %[t4],      %[t1]   \n\t"

                 "movn       %[qc3],     %[t4],      %[t2]   \n\t"

                 "movn       %[qc4],     %[t4],      %[t3]   \n\t"

                 "lw         %[t0],      0(%[in_int])        \n\t"

                 "lw         %[t1],      4(%[in_int])        \n\t"

                 "lw         %[t2],      8(%[in_int])        \n\t"

                 "lw         %[t3],      12(%[in_int])       \n\t"

                 "slt        %[t0],      %[t0],      $zero   \n\t"

                 "movn       %[sign1],   %[t0],      %[qc1]  \n\t"

                 "slt        %[t2],      %[t2],      $zero   \n\t"

                 "movn       %[sign2],   %[t2],      %[qc3]  \n\t"

                 "slt        %[t1],      %[t1],      $zero   \n\t"

                 "sll        %[t0],      %[sign1],   1       \n\t"

                 "or         %[t0],      %[t0],      %[t1]   \n\t"

                 "movn       %[sign1],   %[t0],      %[qc2]  \n\t"

                 "slt        %[t3],      %[t3],      $zero   \n\t"

                 "sll        %[t0],      %[sign2],   1       \n\t"

                 "or         %[t0],      %[t0],      %[t3]   \n\t"

                 "movn       %[sign2],   %[t0],      %[qc4]  \n\t"

                 "slt        %[count1],  $zero,      %[qc1]  \n\t"

                 "slt        %[t1],      $zero,      %[qc2]  \n\t"

                 "slt        %[count2],  $zero,      %[qc3]  \n\t"

                 "slt        %[t2],      $zero,      %[qc4]  \n\t"

                 "addu       %[count1],  %[count1],  %[t1]   \n\t"

                 "addu       %[count2],  %[count2],  %[t2]   \n\t"


                 ".set pop                                   \n\t"


                 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

                   [qc3]"+r"(qc3), [qc4]"+r"(qc4),

                   [sign1]"=&r"(sign1), [count1]"=&r"(count1),

                   [sign2]"=&r"(sign2), [count2]"=&r"(count2),

                   [c1]"=&r"(c1), [c2]"=&r"(c2),

                   [c3]"=&r"(c3), [c4]"=&r"(c4),

                   [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

                   [t4]"=&r"(t4)

                 : [in_int]"r"(in_int)

                 : "memory"

             );


             curidx = 17 * qc1;

             curidx += qc2;


             curidx2 = 17 * qc3;

             curidx2 += qc4;


             v_codes = (p_codes[curidx] << count1) | sign1;

             v_bits  = p_bits[curidx] + count1;

             put_bits(pb, v_bits, v_codes);


             if (p_vectors[curidx*2  ] == 64.0f) {

                 int len = av_log2(c1);

                 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));

                 put_bits(pb, len * 2 - 3, v_codes);

             }

             if (p_vectors[curidx*2+1] == 64.0f) {

                 int len = av_log2(c2);

                 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));

                 put_bits(pb, len*2-3, v_codes);

             }


             v_codes = (p_codes[curidx2] << count2) | sign2;

             v_bits  = p_bits[curidx2] + count2;

             put_bits(pb, v_bits, v_codes);


             if (p_vectors[curidx2*2  ] == 64.0f) {

                 int len = av_log2(c3);

                 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));

                 put_bits(pb, len* 2 - 3, v_codes);

             }

             if (p_vectors[curidx2*2+1] == 64.0f) {

                 int len = av_log2(c4);

                 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));

                 put_bits(pb, len * 2 - 3, v_codes);

             }

         }

     }

 }


 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,

                                                          PutBitContext *pb, const float *in, float *out,

                                                          const float *scaled, int size, int scale_idx,

                                                          int cb, const float lambda, const float uplim,

                                                          int *bits, const float ROUNDING) {

     av_assert0(0);

 }


 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,

                                                          PutBitContext *pb, const float *in, float *out,

                                                          const float *scaled, int size, int scale_idx,

                                                          int cb, const float lambda, const float uplim,

                                                          int *bits, const float ROUNDING) {

     int i;

     if (bits)

         *bits = 0;

     if (out) {

         for (i = 0; i < size; i += 4) {

            out[i  ] = 0.0f;

            out[i+1] = 0.0f;

            out[i+2] = 0.0f;

            out[i+3] = 0.0f;

         }

     }

 }


 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,

                                                          PutBitContext *pb, const float *in, float *out,

                                                          const float *scaled, int size, int scale_idx,

                                                          int cb, const float lambda, const float uplim,

                                                          int *bits, const float ROUNDING) = {

     quantize_and_encode_band_cost_ZERO_mips,

     quantize_and_encode_band_cost_SQUAD_mips,

     quantize_and_encode_band_cost_SQUAD_mips,

     quantize_and_encode_band_cost_UQUAD_mips,

     quantize_and_encode_band_cost_UQUAD_mips,

     quantize_and_encode_band_cost_SPAIR_mips,

     quantize_and_encode_band_cost_SPAIR_mips,

     quantize_and_encode_band_cost_UPAIR7_mips,

     quantize_and_encode_band_cost_UPAIR7_mips,

     quantize_and_encode_band_cost_UPAIR12_mips,

     quantize_and_encode_band_cost_UPAIR12_mips,

     quantize_and_encode_band_cost_ESC_mips,

     quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */

     quantize_and_encode_band_cost_ZERO_mips,

     quantize_and_encode_band_cost_ZERO_mips,

     quantize_and_encode_band_cost_ZERO_mips,

 };


 #define quantize_and_encode_band_cost(                                       \

                                 s, pb, in, out, scaled, size, scale_idx, cb, \

                                 lambda, uplim, bits, ROUNDING)               \

     quantize_and_encode_band_cost_arr[cb](                                   \

                                 s, pb, in, out, scaled, size, scale_idx, cb, \

                                 lambda, uplim, bits, ROUNDING)


 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,

                                           const float *in, float *out, int size, int scale_idx,

                                           int cb, const float lambda, int rtz)

 {

     quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,

                                   INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);

 }


 /**

  * Functions developed from template function and optimized for getting the number of bits

  */

 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,

                                         PutBitContext *pb, const float *in,

                                         const float *scaled, int size, int scale_idx,

                                         int cb, const float lambda, const float uplim,

                                         int *bits)

 {

     return 0;

 }


 static float get_band_numbits_NONE_mips(struct AACEncContext *s,

                                         PutBitContext *pb, const float *in,

                                         const float *scaled, int size, int scale_idx,

                                         int cb, const float lambda, const float uplim,

                                         int *bits)

 {

     av_assert0(0);

     return 0;

 }


 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,

                                          PutBitContext *pb, const float *in,

                                          const float *scaled, int size, int scale_idx,

                                          int cb, const float lambda, const float uplim,

                                          int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];


     for (i = 0; i < size; i += 4) {

         int curidx;

         int *in_int = (int *)&in[i];

         int t0, t1, t2, t3, t4, t5, t6, t7;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                      \n\t"

             ".set noreorder                 \n\t"


             "slt    %[qc1], $zero,  %[qc1]  \n\t"

             "slt    %[qc2], $zero,  %[qc2]  \n\t"

             "slt    %[qc3], $zero,  %[qc3]  \n\t"

             "slt    %[qc4], $zero,  %[qc4]  \n\t"

             "lw     %[t0],  0(%[in_int])    \n\t"

             "lw     %[t1],  4(%[in_int])    \n\t"

             "lw     %[t2],  8(%[in_int])    \n\t"

             "lw     %[t3],  12(%[in_int])   \n\t"

             "srl    %[t0],  %[t0],  31      \n\t"

             "srl    %[t1],  %[t1],  31      \n\t"

             "srl    %[t2],  %[t2],  31      \n\t"

             "srl    %[t3],  %[t3],  31      \n\t"

             "subu   %[t4],  $zero,  %[qc1]  \n\t"

             "subu   %[t5],  $zero,  %[qc2]  \n\t"

             "subu   %[t6],  $zero,  %[qc3]  \n\t"

             "subu   %[t7],  $zero,  %[qc4]  \n\t"

             "movn   %[qc1], %[t4],  %[t0]   \n\t"

             "movn   %[qc2], %[t5],  %[t1]   \n\t"

             "movn   %[qc3], %[t6],  %[t2]   \n\t"

             "movn   %[qc4], %[t7],  %[t3]   \n\t"


             ".set pop                       \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx = qc1;

         curidx *= 3;

         curidx += qc2;

         curidx *= 3;

         curidx += qc3;

         curidx *= 3;

         curidx += qc4;

         curidx += 40;


         curbits += p_bits[curidx];

     }

     return curbits;

 }


 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,

                                          PutBitContext *pb, const float *in,

                                          const float *scaled, int size, int scale_idx,

                                          int cb, const float lambda, const float uplim,

                                          int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int curbits = 0;

     int qc1, qc2, qc3, qc4;


     uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];


     for (i = 0; i < size; i += 4) {

         int curidx;

         int t0, t1, t2, t3, t4;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                      \n\t"

             ".set noreorder                 \n\t"


             "ori    %[t4],  $zero,  2       \n\t"

             "slt    %[t0],  %[t4],  %[qc1]  \n\t"

             "slt    %[t1],  %[t4],  %[qc2]  \n\t"

             "slt    %[t2],  %[t4],  %[qc3]  \n\t"

             "slt    %[t3],  %[t4],  %[qc4]  \n\t"

             "movn   %[qc1], %[t4],  %[t0]   \n\t"

             "movn   %[qc2], %[t4],  %[t1]   \n\t"

             "movn   %[qc3], %[t4],  %[t2]   \n\t"

             "movn   %[qc4], %[t4],  %[t3]   \n\t"


             ".set pop                       \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4)

         );


         curidx = qc1;

         curidx *= 3;

         curidx += qc2;

         curidx *= 3;

         curidx += qc3;

         curidx *= 3;

         curidx += qc4;


         curbits += p_bits[curidx];

         curbits += uquad_sign_bits[curidx];

     }

     return curbits;

 }


 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,

                                          PutBitContext *pb, const float *in,

                                          const float *scaled, int size, int scale_idx,

                                          int cb, const float lambda, const float uplim,

                                          int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];


     for (i = 0; i < size; i += 4) {

         int curidx, curidx2;

         int *in_int = (int *)&in[i];

         int t0, t1, t2, t3, t4, t5, t6, t7;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                      \n\t"

             ".set noreorder                 \n\t"


             "ori    %[t4],  $zero,  4       \n\t"

             "slt    %[t0],  %[t4],  %[qc1]  \n\t"

             "slt    %[t1],  %[t4],  %[qc2]  \n\t"

             "slt    %[t2],  %[t4],  %[qc3]  \n\t"

             "slt    %[t3],  %[t4],  %[qc4]  \n\t"

             "movn   %[qc1], %[t4],  %[t0]   \n\t"

             "movn   %[qc2], %[t4],  %[t1]   \n\t"

             "movn   %[qc3], %[t4],  %[t2]   \n\t"

             "movn   %[qc4], %[t4],  %[t3]   \n\t"

             "lw     %[t0],  0(%[in_int])    \n\t"

             "lw     %[t1],  4(%[in_int])    \n\t"

             "lw     %[t2],  8(%[in_int])    \n\t"

             "lw     %[t3],  12(%[in_int])   \n\t"

             "srl    %[t0],  %[t0],  31      \n\t"

             "srl    %[t1],  %[t1],  31      \n\t"

             "srl    %[t2],  %[t2],  31      \n\t"

             "srl    %[t3],  %[t3],  31      \n\t"

             "subu   %[t4],  $zero,  %[qc1]  \n\t"

             "subu   %[t5],  $zero,  %[qc2]  \n\t"

             "subu   %[t6],  $zero,  %[qc3]  \n\t"

             "subu   %[t7],  $zero,  %[qc4]  \n\t"

             "movn   %[qc1], %[t4],  %[t0]   \n\t"

             "movn   %[qc2], %[t5],  %[t1]   \n\t"

             "movn   %[qc3], %[t6],  %[t2]   \n\t"

             "movn   %[qc4], %[t7],  %[t3]   \n\t"


             ".set pop                       \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx  = 9 * qc1;

         curidx += qc2 + 40;


         curidx2  = 9 * qc3;

         curidx2 += qc4 + 40;


         curbits += p_bits[curidx] + p_bits[curidx2];

     }

     return curbits;

 }


 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,

                                           PutBitContext *pb, const float *in,

                                           const float *scaled, int size, int scale_idx,

                                           int cb, const float lambda, const float uplim,

                                           int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];


     for (i = 0; i < size; i += 4) {

         int curidx, curidx2;

         int t0, t1, t2, t3, t4;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                      \n\t"

             ".set noreorder                 \n\t"


             "ori    %[t4],  $zero,  7       \n\t"

             "slt    %[t0],  %[t4],  %[qc1]  \n\t"

             "slt    %[t1],  %[t4],  %[qc2]  \n\t"

             "slt    %[t2],  %[t4],  %[qc3]  \n\t"

             "slt    %[t3],  %[t4],  %[qc4]  \n\t"

             "movn   %[qc1], %[t4],  %[t0]   \n\t"

             "movn   %[qc2], %[t4],  %[t1]   \n\t"

             "movn   %[qc3], %[t4],  %[t2]   \n\t"

             "movn   %[qc4], %[t4],  %[t3]   \n\t"


             ".set pop                       \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4)

         );


         curidx  = 8 * qc1;

         curidx += qc2;


         curidx2  = 8 * qc3;

         curidx2 += qc4;


         curbits += p_bits[curidx] +

                    upair7_sign_bits[curidx] +

                    p_bits[curidx2] +

                    upair7_sign_bits[curidx2];

     }

     return curbits;

 }


 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,

                                            PutBitContext *pb, const float *in,

                                            const float *scaled, int size, int scale_idx,

                                            int cb, const float lambda, const float uplim,

                                            int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];


     for (i = 0; i < size; i += 4) {

         int curidx, curidx2;

         int t0, t1, t2, t3, t4;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                      \n\t"

             ".set noreorder                 \n\t"


             "ori    %[t4],  $zero,  12      \n\t"

             "slt    %[t0],  %[t4],  %[qc1]  \n\t"

             "slt    %[t1],  %[t4],  %[qc2]  \n\t"

             "slt    %[t2],  %[t4],  %[qc3]  \n\t"

             "slt    %[t3],  %[t4],  %[qc4]  \n\t"

             "movn   %[qc1], %[t4],  %[t0]   \n\t"

             "movn   %[qc2], %[t4],  %[t1]   \n\t"

             "movn   %[qc3], %[t4],  %[t2]   \n\t"

             "movn   %[qc4], %[t4],  %[t3]   \n\t"


             ".set pop                       \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4)

         );


         curidx  = 13 * qc1;

         curidx += qc2;


         curidx2  = 13 * qc3;

         curidx2 += qc4;


         curbits += p_bits[curidx] +

                    p_bits[curidx2] +

                    upair12_sign_bits[curidx] +

                    upair12_sign_bits[curidx2];

     }

     return curbits;

 }


 static float get_band_numbits_ESC_mips(struct AACEncContext *s,

                                        PutBitContext *pb, const float *in,

                                        const float *scaled, int size, int scale_idx,

                                        int cb, const float lambda, const float uplim,

                                        int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     int i;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];


     for (i = 0; i < size; i += 4) {

         int curidx, curidx2;

         int cond0, cond1, cond2, cond3;

         int c1, c2, c3, c4;

         int t4, t5;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                                  \n\t"

             ".set noreorder                             \n\t"


             "ori        %[t4],      $zero,  15          \n\t"

             "ori        %[t5],      $zero,  16          \n\t"

             "shll_s.w   %[c1],      %[qc1], 18          \n\t"

             "shll_s.w   %[c2],      %[qc2], 18          \n\t"

             "shll_s.w   %[c3],      %[qc3], 18          \n\t"

             "shll_s.w   %[c4],      %[qc4], 18          \n\t"

             "srl        %[c1],      %[c1],  18          \n\t"

             "srl        %[c2],      %[c2],  18          \n\t"

             "srl        %[c3],      %[c3],  18          \n\t"

             "srl        %[c4],      %[c4],  18          \n\t"

             "slt        %[cond0],   %[t4],  %[qc1]      \n\t"

             "slt        %[cond1],   %[t4],  %[qc2]      \n\t"

             "slt        %[cond2],   %[t4],  %[qc3]      \n\t"

             "slt        %[cond3],   %[t4],  %[qc4]      \n\t"

             "movn       %[qc1],     %[t5],  %[cond0]    \n\t"

             "movn       %[qc2],     %[t5],  %[cond1]    \n\t"

             "movn       %[qc3],     %[t5],  %[cond2]    \n\t"

             "movn       %[qc4],     %[t5],  %[cond3]    \n\t"

             "ori        %[t5],      $zero,  31          \n\t"

             "clz        %[c1],      %[c1]               \n\t"

             "clz        %[c2],      %[c2]               \n\t"

             "clz        %[c3],      %[c3]               \n\t"

             "clz        %[c4],      %[c4]               \n\t"

             "subu       %[c1],      %[t5],  %[c1]       \n\t"

             "subu       %[c2],      %[t5],  %[c2]       \n\t"

             "subu       %[c3],      %[t5],  %[c3]       \n\t"

             "subu       %[c4],      %[t5],  %[c4]       \n\t"

             "sll        %[c1],      %[c1],  1           \n\t"

             "sll        %[c2],      %[c2],  1           \n\t"

             "sll        %[c3],      %[c3],  1           \n\t"

             "sll        %[c4],      %[c4],  1           \n\t"

             "addiu      %[c1],      %[c1],  -3          \n\t"

             "addiu      %[c2],      %[c2],  -3          \n\t"

             "addiu      %[c3],      %[c3],  -3          \n\t"

             "addiu      %[c4],      %[c4],  -3          \n\t"

             "subu       %[cond0],   $zero,  %[cond0]    \n\t"

             "subu       %[cond1],   $zero,  %[cond1]    \n\t"

             "subu       %[cond2],   $zero,  %[cond2]    \n\t"

             "subu       %[cond3],   $zero,  %[cond3]    \n\t"

             "and        %[c1],      %[c1],  %[cond0]    \n\t"

             "and        %[c2],      %[c2],  %[cond1]    \n\t"

             "and        %[c3],      %[c3],  %[cond2]    \n\t"

             "and        %[c4],      %[c4],  %[cond3]    \n\t"


             ".set pop                                   \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),

               [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),

               [c1]"=&r"(c1), [c2]"=&r"(c2),

               [c3]"=&r"(c3), [c4]"=&r"(c4),

               [t4]"=&r"(t4), [t5]"=&r"(t5)

         );


         curidx = 17 * qc1;

         curidx += qc2;


         curidx2 = 17 * qc3;

         curidx2 += qc4;


         curbits += p_bits[curidx];

         curbits += esc_sign_bits[curidx];

         curbits += p_bits[curidx2];

         curbits += esc_sign_bits[curidx2];


         curbits += c1;

         curbits += c2;

         curbits += c3;

         curbits += c4;

     }

     return curbits;

 }


 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,

                                              PutBitContext *pb, const float *in,

                                              const float *scaled, int size, int scale_idx,

                                              int cb, const float lambda, const float uplim,

                                              int *bits) = {

     get_band_numbits_ZERO_mips,

     get_band_numbits_SQUAD_mips,

     get_band_numbits_SQUAD_mips,

     get_band_numbits_UQUAD_mips,

     get_band_numbits_UQUAD_mips,

     get_band_numbits_SPAIR_mips,

     get_band_numbits_SPAIR_mips,

     get_band_numbits_UPAIR7_mips,

     get_band_numbits_UPAIR7_mips,

     get_band_numbits_UPAIR12_mips,

     get_band_numbits_UPAIR12_mips,

     get_band_numbits_ESC_mips,

     get_band_numbits_NONE_mips, /* cb 12 doesn't exist */

     get_band_numbits_ZERO_mips,

     get_band_numbits_ZERO_mips,

     get_band_numbits_ZERO_mips,

 };


 #define get_band_numbits(                                  \

                                 s, pb, in, scaled, size, scale_idx, cb, \

                                 lambda, uplim, bits)                    \

     get_band_numbits_arr[cb](                              \

                                 s, pb, in, scaled, size, scale_idx, cb, \

                                 lambda, uplim, bits)


 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,

                                      const float *scaled, int size, int scale_idx,

                                      int cb, const float lambda, const float uplim,

                                      int *bits)

 {

     return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);

 }


 /**

  * Functions developed from template function and optimized for getting the band cost

  */

 #if HAVE_MIPSFPU

 static float get_band_cost_ZERO_mips(struct AACEncContext *s,

                                      PutBitContext *pb, const float *in,

                                      const float *scaled, int size, int scale_idx,

                                      int cb, const float lambda, const float uplim,

                                      int *bits)

 {

     int i;

     float cost = 0;


     for (i = 0; i < size; i += 4) {

         cost += in[i  ] * in[i  ];

         cost += in[i+1] * in[i+1];

         cost += in[i+2] * in[i+2];

         cost += in[i+3] * in[i+3];

     }

     if (bits)

         *bits = 0;

     return cost * lambda;

 }


 static float get_band_cost_NONE_mips(struct AACEncContext *s,

                                      PutBitContext *pb, const float *in,

                                      const float *scaled, int size, int scale_idx,

                                      int cb, const float lambda, const float uplim,

                                      int *bits)

 {

     av_assert0(0);

     return 0;

 }


 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,

                                       PutBitContext *pb, const float *in,

                                       const float *scaled, int size, int scale_idx,

                                       int cb, const float lambda, const float uplim,

                                       int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];

     int i;

     float cost = 0;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];

     float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];


     for (i = 0; i < size; i += 4) {

         const float *vec;

         int curidx;

         int   *in_int = (int   *)&in[i];

         float *in_pos = (float *)&in[i];

         float di0, di1, di2, di3;

         int t0, t1, t2, t3, t4, t5, t6, t7;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                                  \n\t"

             ".set noreorder                             \n\t"


             "slt        %[qc1], $zero,  %[qc1]          \n\t"

             "slt        %[qc2], $zero,  %[qc2]          \n\t"

             "slt        %[qc3], $zero,  %[qc3]          \n\t"

             "slt        %[qc4], $zero,  %[qc4]          \n\t"

             "lw         %[t0],  0(%[in_int])            \n\t"

             "lw         %[t1],  4(%[in_int])            \n\t"

             "lw         %[t2],  8(%[in_int])            \n\t"

             "lw         %[t3],  12(%[in_int])           \n\t"

             "srl        %[t0],  %[t0],  31              \n\t"

             "srl        %[t1],  %[t1],  31              \n\t"

             "srl        %[t2],  %[t2],  31              \n\t"

             "srl        %[t3],  %[t3],  31              \n\t"

             "subu       %[t4],  $zero,  %[qc1]          \n\t"

             "subu       %[t5],  $zero,  %[qc2]          \n\t"

             "subu       %[t6],  $zero,  %[qc3]          \n\t"

             "subu       %[t7],  $zero,  %[qc4]          \n\t"

             "movn       %[qc1], %[t4],  %[t0]           \n\t"

             "movn       %[qc2], %[t5],  %[t1]           \n\t"

             "movn       %[qc3], %[t6],  %[t2]           \n\t"

             "movn       %[qc4], %[t7],  %[t3]           \n\t"


             ".set pop                                   \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx = qc1;

         curidx *= 3;

         curidx += qc2;

         curidx *= 3;

         curidx += qc3;

         curidx *= 3;

         curidx += qc4;

         curidx += 40;


         curbits += p_bits[curidx];

         vec     = &p_codes[curidx*4];


         __asm__ volatile (

             ".set push                                  \n\t"

             ".set noreorder                             \n\t"


             "lwc1       $f0,    0(%[in_pos])            \n\t"

             "lwc1       $f1,    0(%[vec])               \n\t"

             "lwc1       $f2,    4(%[in_pos])            \n\t"

             "lwc1       $f3,    4(%[vec])               \n\t"

             "lwc1       $f4,    8(%[in_pos])            \n\t"

             "lwc1       $f5,    8(%[vec])               \n\t"

             "lwc1       $f6,    12(%[in_pos])           \n\t"

             "lwc1       $f7,    12(%[vec])              \n\t"

             "nmsub.s    %[di0], $f0,    $f1,    %[IQ]   \n\t"

             "nmsub.s    %[di1], $f2,    $f3,    %[IQ]   \n\t"

             "nmsub.s    %[di2], $f4,    $f5,    %[IQ]   \n\t"

             "nmsub.s    %[di3], $f6,    $f7,    %[IQ]   \n\t"


             ".set pop                                   \n\t"


             : [di0]"=&f"(di0), [di1]"=&f"(di1),

               [di2]"=&f"(di2), [di3]"=&f"(di3)

             : [in_pos]"r"(in_pos), [vec]"r"(vec),

               [IQ]"f"(IQ)

             : "$f0", "$f1", "$f2", "$f3",

               "$f4", "$f5", "$f6", "$f7",

               "memory"

         );


         cost += di0 * di0 + di1 * di1

                 + di2 * di2 + di3 * di3;

     }


     if (bits)

         *bits = curbits;

     return cost * lambda + curbits;

 }


 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,

                                       PutBitContext *pb, const float *in,

                                       const float *scaled, int size, int scale_idx,

                                       int cb, const float lambda, const float uplim,

                                       int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];

     int i;

     float cost = 0;

     int curbits = 0;

     int qc1, qc2, qc3, qc4;


     uint8_t *p_bits  = (uint8_t*)ff_aac_spectral_bits[cb-1];

     float   *p_codes = (float  *)ff_aac_codebook_vectors[cb-1];


     for (i = 0; i < size; i += 4) {

         const float *vec;

         int curidx;

         float *in_pos = (float *)&in[i];

         float di0, di1, di2, di3;

         int t0, t1, t2, t3, t4;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                                  \n\t"

             ".set noreorder                             \n\t"


             "ori        %[t4],  $zero,  2               \n\t"

             "slt        %[t0],  %[t4],  %[qc1]          \n\t"

             "slt        %[t1],  %[t4],  %[qc2]          \n\t"

             "slt        %[t2],  %[t4],  %[qc3]          \n\t"

             "slt        %[t3],  %[t4],  %[qc4]          \n\t"

             "movn       %[qc1], %[t4],  %[t0]           \n\t"

             "movn       %[qc2], %[t4],  %[t1]           \n\t"

             "movn       %[qc3], %[t4],  %[t2]           \n\t"

             "movn       %[qc4], %[t4],  %[t3]           \n\t"


             ".set pop                                   \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4)

         );


         curidx = qc1;

         curidx *= 3;

         curidx += qc2;

         curidx *= 3;

         curidx += qc3;

         curidx *= 3;

         curidx += qc4;


         curbits += p_bits[curidx];

         curbits += uquad_sign_bits[curidx];

         vec     = &p_codes[curidx*4];


         __asm__ volatile (

             ".set push                                  \n\t"

             ".set noreorder                             \n\t"


             "lwc1       %[di0], 0(%[in_pos])            \n\t"

             "lwc1       %[di1], 4(%[in_pos])            \n\t"

             "lwc1       %[di2], 8(%[in_pos])            \n\t"

             "lwc1       %[di3], 12(%[in_pos])           \n\t"

             "abs.s      %[di0], %[di0]                  \n\t"

             "abs.s      %[di1], %[di1]                  \n\t"

             "abs.s      %[di2], %[di2]                  \n\t"

             "abs.s      %[di3], %[di3]                  \n\t"

             "lwc1       $f0,    0(%[vec])               \n\t"

             "lwc1       $f1,    4(%[vec])               \n\t"

             "lwc1       $f2,    8(%[vec])               \n\t"

             "lwc1       $f3,    12(%[vec])              \n\t"

             "nmsub.s    %[di0], %[di0], $f0,    %[IQ]   \n\t"

             "nmsub.s    %[di1], %[di1], $f1,    %[IQ]   \n\t"

             "nmsub.s    %[di2], %[di2], $f2,    %[IQ]   \n\t"

             "nmsub.s    %[di3], %[di3], $f3,    %[IQ]   \n\t"


             ".set pop                                   \n\t"


             : [di0]"=&f"(di0), [di1]"=&f"(di1),

               [di2]"=&f"(di2), [di3]"=&f"(di3)

             : [in_pos]"r"(in_pos), [vec]"r"(vec),

               [IQ]"f"(IQ)

             : "$f0", "$f1", "$f2", "$f3",

               "memory"

         );


         cost += di0 * di0 + di1 * di1

                 + di2 * di2 + di3 * di3;

     }


     if (bits)

         *bits = curbits;

     return cost * lambda + curbits;

 }


 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,

                                       PutBitContext *pb, const float *in,

                                       const float *scaled, int size, int scale_idx,

                                       int cb, const float lambda, const float uplim,

                                       int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];

     int i;

     float cost = 0;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];

     float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];


     for (i = 0; i < size; i += 4) {

         const float *vec, *vec2;

         int curidx, curidx2;

         int   *in_int = (int   *)&in[i];

         float *in_pos = (float *)&in[i];

         float di0, di1, di2, di3;

         int t0, t1, t2, t3, t4, t5, t6, t7;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                                  \n\t"

             ".set noreorder                             \n\t"


             "ori        %[t4],  $zero,  4               \n\t"

             "slt        %[t0],  %[t4],  %[qc1]          \n\t"

             "slt        %[t1],  %[t4],  %[qc2]          \n\t"

             "slt        %[t2],  %[t4],  %[qc3]          \n\t"

             "slt        %[t3],  %[t4],  %[qc4]          \n\t"

             "movn       %[qc1], %[t4],  %[t0]           \n\t"

             "movn       %[qc2], %[t4],  %[t1]           \n\t"

             "movn       %[qc3], %[t4],  %[t2]           \n\t"

             "movn       %[qc4], %[t4],  %[t3]           \n\t"

             "lw         %[t0],  0(%[in_int])            \n\t"

             "lw         %[t1],  4(%[in_int])            \n\t"

             "lw         %[t2],  8(%[in_int])            \n\t"

             "lw         %[t3],  12(%[in_int])           \n\t"

             "srl        %[t0],  %[t0],  31              \n\t"

             "srl        %[t1],  %[t1],  31              \n\t"

             "srl        %[t2],  %[t2],  31              \n\t"

             "srl        %[t3],  %[t3],  31              \n\t"

             "subu       %[t4],  $zero,  %[qc1]          \n\t"

             "subu       %[t5],  $zero,  %[qc2]          \n\t"

             "subu       %[t6],  $zero,  %[qc3]          \n\t"

             "subu       %[t7],  $zero,  %[qc4]          \n\t"

             "movn       %[qc1], %[t4],  %[t0]           \n\t"

             "movn       %[qc2], %[t5],  %[t1]           \n\t"

             "movn       %[qc3], %[t6],  %[t2]           \n\t"

             "movn       %[qc4], %[t7],  %[t3]           \n\t"


             ".set pop                                   \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx = 9 * qc1;

         curidx += qc2 + 40;


         curidx2 = 9 * qc3;

         curidx2 += qc4 + 40;


         curbits += p_bits[curidx];

         curbits += p_bits[curidx2];


         vec     = &p_codes[curidx*2];

         vec2    = &p_codes[curidx2*2];


         __asm__ volatile (

             ".set push                                  \n\t"

             ".set noreorder                             \n\t"


             "lwc1       $f0,    0(%[in_pos])            \n\t"

             "lwc1       $f1,    0(%[vec])               \n\t"

             "lwc1       $f2,    4(%[in_pos])            \n\t"

             "lwc1       $f3,    4(%[vec])               \n\t"

             "lwc1       $f4,    8(%[in_pos])            \n\t"

             "lwc1       $f5,    0(%[vec2])              \n\t"

             "lwc1       $f6,    12(%[in_pos])           \n\t"

             "lwc1       $f7,    4(%[vec2])              \n\t"

             "nmsub.s    %[di0], $f0,    $f1,    %[IQ]   \n\t"

             "nmsub.s    %[di1], $f2,    $f3,    %[IQ]   \n\t"

             "nmsub.s    %[di2], $f4,    $f5,    %[IQ]   \n\t"

             "nmsub.s    %[di3], $f6,    $f7,    %[IQ]   \n\t"


             ".set pop                                   \n\t"


             : [di0]"=&f"(di0), [di1]"=&f"(di1),

               [di2]"=&f"(di2), [di3]"=&f"(di3)

             : [in_pos]"r"(in_pos), [vec]"r"(vec),

               [vec2]"r"(vec2), [IQ]"f"(IQ)

             : "$f0", "$f1", "$f2", "$f3",

               "$f4", "$f5", "$f6", "$f7",

               "memory"

         );


         cost += di0 * di0 + di1 * di1

                 + di2 * di2 + di3 * di3;

     }


     if (bits)

         *bits = curbits;

     return cost * lambda + curbits;

 }


 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,

                                        PutBitContext *pb, const float *in,

                                        const float *scaled, int size, int scale_idx,

                                        int cb, const float lambda, const float uplim,

                                        int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];

     int i;

     float cost = 0;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];

     float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];


     for (i = 0; i < size; i += 4) {

         const float *vec, *vec2;

         int curidx, curidx2, sign1, count1, sign2, count2;

         int   *in_int = (int   *)&in[i];

         float *in_pos = (float *)&in[i];

         float di0, di1, di2, di3;

         int t0, t1, t2, t3, t4;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                                          \n\t"

             ".set noreorder                                     \n\t"


             "ori        %[t4],      $zero,      7               \n\t"

             "ori        %[sign1],   $zero,      0               \n\t"

             "ori        %[sign2],   $zero,      0               \n\t"

             "slt        %[t0],      %[t4],      %[qc1]          \n\t"

             "slt        %[t1],      %[t4],      %[qc2]          \n\t"

             "slt        %[t2],      %[t4],      %[qc3]          \n\t"

             "slt        %[t3],      %[t4],      %[qc4]          \n\t"

             "movn       %[qc1],     %[t4],      %[t0]           \n\t"

             "movn       %[qc2],     %[t4],      %[t1]           \n\t"

             "movn       %[qc3],     %[t4],      %[t2]           \n\t"

             "movn       %[qc4],     %[t4],      %[t3]           \n\t"

             "lw         %[t0],      0(%[in_int])                \n\t"

             "lw         %[t1],      4(%[in_int])                \n\t"

             "lw         %[t2],      8(%[in_int])                \n\t"

             "lw         %[t3],      12(%[in_int])               \n\t"

             "slt        %[t0],      %[t0],      $zero           \n\t"

             "movn       %[sign1],   %[t0],      %[qc1]          \n\t"

             "slt        %[t2],      %[t2],      $zero           \n\t"

             "movn       %[sign2],   %[t2],      %[qc3]          \n\t"

             "slt        %[t1],      %[t1],      $zero           \n\t"

             "sll        %[t0],      %[sign1],   1               \n\t"

             "or         %[t0],      %[t0],      %[t1]           \n\t"

             "movn       %[sign1],   %[t0],      %[qc2]          \n\t"

             "slt        %[t3],      %[t3],      $zero           \n\t"

             "sll        %[t0],      %[sign2],   1               \n\t"

             "or         %[t0],      %[t0],      %[t3]           \n\t"

             "movn       %[sign2],   %[t0],      %[qc4]          \n\t"

             "slt        %[count1],  $zero,      %[qc1]          \n\t"

             "slt        %[t1],      $zero,      %[qc2]          \n\t"

             "slt        %[count2],  $zero,      %[qc3]          \n\t"

             "slt        %[t2],      $zero,      %[qc4]          \n\t"

             "addu       %[count1],  %[count1],  %[t1]           \n\t"

             "addu       %[count2],  %[count2],  %[t2]           \n\t"


             ".set pop                                           \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [sign1]"=&r"(sign1), [count1]"=&r"(count1),

               [sign2]"=&r"(sign2), [count2]"=&r"(count2),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx = 8 * qc1;

         curidx += qc2;


         curidx2 = 8 * qc3;

         curidx2 += qc4;


         curbits += p_bits[curidx];

         curbits += upair7_sign_bits[curidx];

         vec     = &p_codes[curidx*2];


         curbits += p_bits[curidx2];

         curbits += upair7_sign_bits[curidx2];

         vec2    = &p_codes[curidx2*2];


         __asm__ volatile (

             ".set push                                          \n\t"

             ".set noreorder                                     \n\t"


             "lwc1       %[di0],     0(%[in_pos])                \n\t"

             "lwc1       %[di1],     4(%[in_pos])                \n\t"

             "lwc1       %[di2],     8(%[in_pos])                \n\t"

             "lwc1       %[di3],     12(%[in_pos])               \n\t"

             "abs.s      %[di0],     %[di0]                      \n\t"

             "abs.s      %[di1],     %[di1]                      \n\t"

             "abs.s      %[di2],     %[di2]                      \n\t"

             "abs.s      %[di3],     %[di3]                      \n\t"

             "lwc1       $f0,        0(%[vec])                   \n\t"

             "lwc1       $f1,        4(%[vec])                   \n\t"

             "lwc1       $f2,        0(%[vec2])                  \n\t"

             "lwc1       $f3,        4(%[vec2])                  \n\t"

             "nmsub.s    %[di0],     %[di0],     $f0,    %[IQ]   \n\t"

             "nmsub.s    %[di1],     %[di1],     $f1,    %[IQ]   \n\t"

             "nmsub.s    %[di2],     %[di2],     $f2,    %[IQ]   \n\t"

             "nmsub.s    %[di3],     %[di3],     $f3,    %[IQ]   \n\t"


             ".set pop                                           \n\t"


             : [di0]"=&f"(di0), [di1]"=&f"(di1),

               [di2]"=&f"(di2), [di3]"=&f"(di3)

             : [in_pos]"r"(in_pos), [vec]"r"(vec),

               [vec2]"r"(vec2), [IQ]"f"(IQ)

             : "$f0", "$f1", "$f2", "$f3",

               "memory"

         );


         cost += di0 * di0 + di1 * di1

                 + di2 * di2 + di3 * di3;

     }


     if (bits)

         *bits = curbits;

     return cost * lambda + curbits;

 }


 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,

                                         PutBitContext *pb, const float *in,

                                         const float *scaled, int size, int scale_idx,

                                         int cb, const float lambda, const float uplim,

                                         int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];

     int i;

     float cost = 0;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];

     float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];


     for (i = 0; i < size; i += 4) {

         const float *vec, *vec2;

         int curidx, curidx2;

         int sign1, count1, sign2, count2;

         int   *in_int = (int   *)&in[i];

         float *in_pos = (float *)&in[i];

         float di0, di1, di2, di3;

         int t0, t1, t2, t3, t4;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                                          \n\t"

             ".set noreorder                                     \n\t"


             "ori        %[t4],      $zero,      12              \n\t"

             "ori        %[sign1],   $zero,      0               \n\t"

             "ori        %[sign2],   $zero,      0               \n\t"

             "slt        %[t0],      %[t4],      %[qc1]          \n\t"

             "slt        %[t1],      %[t4],      %[qc2]          \n\t"

             "slt        %[t2],      %[t4],      %[qc3]          \n\t"

             "slt        %[t3],      %[t4],      %[qc4]          \n\t"

             "movn       %[qc1],     %[t4],      %[t0]           \n\t"

             "movn       %[qc2],     %[t4],      %[t1]           \n\t"

             "movn       %[qc3],     %[t4],      %[t2]           \n\t"

             "movn       %[qc4],     %[t4],      %[t3]           \n\t"

             "lw         %[t0],      0(%[in_int])                \n\t"

             "lw         %[t1],      4(%[in_int])                \n\t"

             "lw         %[t2],      8(%[in_int])                \n\t"

             "lw         %[t3],      12(%[in_int])               \n\t"

             "slt        %[t0],      %[t0],      $zero           \n\t"

             "movn       %[sign1],   %[t0],      %[qc1]          \n\t"

             "slt        %[t2],      %[t2],      $zero           \n\t"

             "movn       %[sign2],   %[t2],      %[qc3]          \n\t"

             "slt        %[t1],      %[t1],      $zero           \n\t"

             "sll        %[t0],      %[sign1],   1               \n\t"

             "or         %[t0],      %[t0],      %[t1]           \n\t"

             "movn       %[sign1],   %[t0],      %[qc2]          \n\t"

             "slt        %[t3],      %[t3],      $zero           \n\t"

             "sll        %[t0],      %[sign2],   1               \n\t"

             "or         %[t0],      %[t0],      %[t3]           \n\t"

             "movn       %[sign2],   %[t0],      %[qc4]          \n\t"

             "slt        %[count1],  $zero,      %[qc1]          \n\t"

             "slt        %[t1],      $zero,      %[qc2]          \n\t"

             "slt        %[count2],  $zero,      %[qc3]          \n\t"

             "slt        %[t2],      $zero,      %[qc4]          \n\t"

             "addu       %[count1],  %[count1],  %[t1]           \n\t"

             "addu       %[count2],  %[count2],  %[t2]           \n\t"


             ".set pop                                           \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [sign1]"=&r"(sign1), [count1]"=&r"(count1),

               [sign2]"=&r"(sign2), [count2]"=&r"(count2),

               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

               [t4]"=&r"(t4)

             : [in_int]"r"(in_int)

             : "memory"

         );


         curidx = 13 * qc1;

         curidx += qc2;


         curidx2 = 13 * qc3;

         curidx2 += qc4;


         curbits += p_bits[curidx];

         curbits += p_bits[curidx2];

         curbits += upair12_sign_bits[curidx];

         curbits += upair12_sign_bits[curidx2];

         vec     = &p_codes[curidx*2];

         vec2    = &p_codes[curidx2*2];


         __asm__ volatile (

             ".set push                                          \n\t"

             ".set noreorder                                     \n\t"


             "lwc1       %[di0],     0(%[in_pos])                \n\t"

             "lwc1       %[di1],     4(%[in_pos])                \n\t"

             "lwc1       %[di2],     8(%[in_pos])                \n\t"

             "lwc1       %[di3],     12(%[in_pos])               \n\t"

             "abs.s      %[di0],     %[di0]                      \n\t"

             "abs.s      %[di1],     %[di1]                      \n\t"

             "abs.s      %[di2],     %[di2]                      \n\t"

             "abs.s      %[di3],     %[di3]                      \n\t"

             "lwc1       $f0,        0(%[vec])                   \n\t"

             "lwc1       $f1,        4(%[vec])                   \n\t"

             "lwc1       $f2,        0(%[vec2])                  \n\t"

             "lwc1       $f3,        4(%[vec2])                  \n\t"

             "nmsub.s    %[di0],     %[di0],     $f0,    %[IQ]   \n\t"

             "nmsub.s    %[di1],     %[di1],     $f1,    %[IQ]   \n\t"

             "nmsub.s    %[di2],     %[di2],     $f2,    %[IQ]   \n\t"

             "nmsub.s    %[di3],     %[di3],     $f3,    %[IQ]   \n\t"


             ".set pop                                           \n\t"


             : [di0]"=&f"(di0), [di1]"=&f"(di1),

               [di2]"=&f"(di2), [di3]"=&f"(di3)

             : [in_pos]"r"(in_pos), [vec]"r"(vec),

               [vec2]"r"(vec2), [IQ]"f"(IQ)

             : "$f0", "$f1", "$f2", "$f3",

               "memory"

         );


         cost += di0 * di0 + di1 * di1

                 + di2 * di2 + di3 * di3;

     }


     if (bits)

         *bits = curbits;

     return cost * lambda + curbits;

 }


 static float get_band_cost_ESC_mips(struct AACEncContext *s,

                                     PutBitContext *pb, const float *in,

                                     const float *scaled, int size, int scale_idx,

                                     int cb, const float lambda, const float uplim,

                                     int *bits)

 {

     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];

     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];

     const float CLIPPED_ESCAPE = 165140.0f * IQ;

     int i;

     float cost = 0;

     int qc1, qc2, qc3, qc4;

     int curbits = 0;


     uint8_t *p_bits  = (uint8_t*)ff_aac_spectral_bits[cb-1];

     float   *p_codes = (float*  )ff_aac_codebook_vectors[cb-1];


     for (i = 0; i < size; i += 4) {

         const float *vec, *vec2;

         int curidx, curidx2;

         float t1, t2, t3, t4;

         float di1, di2, di3, di4;

         int cond0, cond1, cond2, cond3;

         int c1, c2, c3, c4;

         int t6, t7;


         qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;

         qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;

         qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;

         qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;


         __asm__ volatile (

             ".set push                                  \n\t"

             ".set noreorder                             \n\t"


             "ori        %[t6],      $zero,  15          \n\t"

             "ori        %[t7],      $zero,  16          \n\t"

             "shll_s.w   %[c1],      %[qc1], 18          \n\t"

             "shll_s.w   %[c2],      %[qc2], 18          \n\t"

             "shll_s.w   %[c3],      %[qc3], 18          \n\t"

             "shll_s.w   %[c4],      %[qc4], 18          \n\t"

             "srl        %[c1],      %[c1],  18          \n\t"

             "srl        %[c2],      %[c2],  18          \n\t"

             "srl        %[c3],      %[c3],  18          \n\t"

             "srl        %[c4],      %[c4],  18          \n\t"

             "slt        %[cond0],   %[t6],  %[qc1]      \n\t"

             "slt        %[cond1],   %[t6],  %[qc2]      \n\t"

             "slt        %[cond2],   %[t6],  %[qc3]      \n\t"

             "slt        %[cond3],   %[t6],  %[qc4]      \n\t"

             "movn       %[qc1],     %[t7],  %[cond0]    \n\t"

             "movn       %[qc2],     %[t7],  %[cond1]    \n\t"

             "movn       %[qc3],     %[t7],  %[cond2]    \n\t"

             "movn       %[qc4],     %[t7],  %[cond3]    \n\t"


             ".set pop                                   \n\t"


             : [qc1]"+r"(qc1), [qc2]"+r"(qc2),

               [qc3]"+r"(qc3), [qc4]"+r"(qc4),

               [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),

               [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),

               [c1]"=&r"(c1), [c2]"=&r"(c2),

               [c3]"=&r"(c3), [c4]"=&r"(c4),

               [t6]"=&r"(t6), [t7]"=&r"(t7)

         );


         curidx = 17 * qc1;

         curidx += qc2;


         curidx2 = 17 * qc3;

         curidx2 += qc4;


         curbits += p_bits[curidx];

         curbits += esc_sign_bits[curidx];

         vec     = &p_codes[curidx*2];


         curbits += p_bits[curidx2];

         curbits += esc_sign_bits[curidx2];

         vec2     = &p_codes[curidx2*2];


         curbits += (av_log2(c1) * 2 - 3) & (-cond0);

         curbits += (av_log2(c2) * 2 - 3) & (-cond1);

         curbits += (av_log2(c3) * 2 - 3) & (-cond2);

         curbits += (av_log2(c4) * 2 - 3) & (-cond3);


         t1 = fabsf(in[i  ]);

         t2 = fabsf(in[i+1]);

         t3 = fabsf(in[i+2]);

         t4 = fabsf(in[i+3]);


         if (cond0) {

             if (t1 >= CLIPPED_ESCAPE) {

                 di1 = t1 - CLIPPED_ESCAPE;

             } else {

                 di1 = t1 - c1 * cbrtf(c1) * IQ;

             }

         } else

             di1 = t1 - vec[0] * IQ;


         if (cond1) {

             if (t2 >= CLIPPED_ESCAPE) {

                 di2 = t2 - CLIPPED_ESCAPE;

             } else {

                 di2 = t2 - c2 * cbrtf(c2) * IQ;

             }

         } else

             di2 = t2 - vec[1] * IQ;


         if (cond2) {

             if (t3 >= CLIPPED_ESCAPE) {

                 di3 = t3 - CLIPPED_ESCAPE;

             } else {

                 di3 = t3 - c3 * cbrtf(c3) * IQ;

             }

         } else

             di3 = t3 - vec2[0] * IQ;


         if (cond3) {

             if (t4 >= CLIPPED_ESCAPE) {

                 di4 = t4 - CLIPPED_ESCAPE;

             } else {

                 di4 = t4 - c4 * cbrtf(c4) * IQ;

             }

         } else

             di4 = t4 - vec2[1]*IQ;


         cost += di1 * di1 + di2 * di2

                 + di3 * di3 + di4 * di4;

     }


     if (bits)

         *bits = curbits;

     return cost * lambda + curbits;

 }


 static float (*const get_band_cost_arr[])(struct AACEncContext *s,

                                           PutBitContext *pb, const float *in,

                                           const float *scaled, int size, int scale_idx,

                                           int cb, const float lambda, const float uplim,

                                           int *bits) = {

     get_band_cost_ZERO_mips,

     get_band_cost_SQUAD_mips,

     get_band_cost_SQUAD_mips,

     get_band_cost_UQUAD_mips,

     get_band_cost_UQUAD_mips,

     get_band_cost_SPAIR_mips,

     get_band_cost_SPAIR_mips,

     get_band_cost_UPAIR7_mips,

     get_band_cost_UPAIR7_mips,

     get_band_cost_UPAIR12_mips,

     get_band_cost_UPAIR12_mips,

     get_band_cost_ESC_mips,

     get_band_cost_NONE_mips, /* cb 12 doesn't exist */

     get_band_cost_ZERO_mips,

     get_band_cost_ZERO_mips,

     get_band_cost_ZERO_mips,

 };


 #define get_band_cost(                                  \

                                 s, pb, in, scaled, size, scale_idx, cb, \

                                 lambda, uplim, bits)                    \

     get_band_cost_arr[cb](                              \

                                 s, pb, in, scaled, size, scale_idx, cb, \

                                 lambda, uplim, bits)


 static float quantize_band_cost(struct AACEncContext *s, const float *in,

                                 const float *scaled, int size, int scale_idx,

                                 int cb, const float lambda, const float uplim,

                                 int *bits)

 {

     return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);

 }


 static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,

                                                AACEncContext *s,

                                                SingleChannelElement *sce,

                                                const float lambda)

 {

     int start = 0, i, w, w2, g;

     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;

     float dists[128] = { 0 }, uplims[128];

     float maxvals[128];

     int fflag, minscaler;

     int its  = 0;

     int allz = 0;

     float minthr = INFINITY;


     destbits = FFMIN(destbits, 5800);

     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

         for (g = 0;  g < sce->ics.num_swb; g++) {

             int nz = 0;

             float uplim = 0.0f;

             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {

                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];

                 uplim += band->threshold;

                 if (band->energy <= band->threshold || band->threshold == 0.0f) {

                     sce->zeroes[(w+w2)*16+g] = 1;

                     continue;

                 }

                 nz = 1;

             }

             uplims[w*16+g] = uplim *512;

             sce->zeroes[w*16+g] = !nz;

             if (nz)

                 minthr = FFMIN(minthr, uplim);

             allz |= nz;

         }

     }

     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

         for (g = 0;  g < sce->ics.num_swb; g++) {

             if (sce->zeroes[w*16+g]) {

                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;

                 continue;

             }

             sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);

         }

     }


     if (!allz)

         return;

     abs_pow34_v(s->scoefs, sce->coeffs, 1024);


     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

         start = w*128;

         for (g = 0;  g < sce->ics.num_swb; g++) {

             const float *scaled = s->scoefs + start;

             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);

             start += sce->ics.swb_sizes[g];

         }

     }


     do {

         int tbits, qstep;

         minscaler = sce->sf_idx[0];

         qstep = its ? 1 : 32;

         do {

             int prev = -1;

             tbits = 0;

             fflag = 0;


             if (qstep > 1) {

                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

                     start = w*128;

                     for (g = 0;  g < sce->ics.num_swb; g++) {

                         const float *coefs = sce->coeffs + start;

                         const float *scaled = s->scoefs + start;

                         int bits = 0;

                         int cb;


                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {

                             start += sce->ics.swb_sizes[g];

                             continue;

                         }

                         minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);

                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {

                             int b;

                             bits += quantize_band_cost_bits(s, coefs + w2*128,

                                                             scaled + w2*128,

                                                             sce->ics.swb_sizes[g],

                                                             sce->sf_idx[w*16+g],

                                                             cb,

                                                             1.0f,

                                                             INFINITY,

                                                             &b);

                         }

                         if (prev != -1) {

                             bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];

                         }

                         tbits += bits;

                         start += sce->ics.swb_sizes[g];

                         prev = sce->sf_idx[w*16+g];

                     }

                 }

             }

             else {

                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

                     start = w*128;

                     for (g = 0;  g < sce->ics.num_swb; g++) {

                         const float *coefs = sce->coeffs + start;

                         const float *scaled = s->scoefs + start;

                         int bits = 0;

                         int cb;

                         float dist = 0.0f;


                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {

                             start += sce->ics.swb_sizes[g];

                             continue;

                         }

                         minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);

                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {

                             int b;

                             dist += quantize_band_cost(s, coefs + w2*128,

                                                        scaled + w2*128,

                                                        sce->ics.swb_sizes[g],

                                                        sce->sf_idx[w*16+g],

                                                        cb,

                                                        1.0f,

                                                        INFINITY,

                                                        &b);

                             bits += b;

                         }

                         dists[w*16+g] = dist - bits;

                         if (prev != -1) {

                             bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];

                         }

                         tbits += bits;

                         start += sce->ics.swb_sizes[g];

                         prev = sce->sf_idx[w*16+g];

                     }

                 }

             }

             if (tbits > destbits) {

                 for (i = 0; i < 128; i++)

                     if (sce->sf_idx[i] < 218 - qstep)

                         sce->sf_idx[i] += qstep;

             } else {

                 for (i = 0; i < 128; i++)

                     if (sce->sf_idx[i] > 60 - qstep)

                         sce->sf_idx[i] -= qstep;

             }

             qstep >>= 1;

             if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)

                 qstep = 1;

         } while (qstep);


         fflag = 0;

         minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);

         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

             for (g = 0; g < sce->ics.num_swb; g++) {

                 int prevsc = sce->sf_idx[w*16+g];

                 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {

                     if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))

                         sce->sf_idx[w*16+g]--;

                     else

                         sce->sf_idx[w*16+g]-=2;

                 }

                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);

                 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);

                 if (sce->sf_idx[w*16+g] != prevsc)

                     fflag = 1;

                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

             }

         }

         its++;

     } while (fflag && its < 10);

 }


 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)

 {

     int start = 0, i, w, w2, g;

     float M[128], S[128];

     float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;

     SingleChannelElement *sce0 = &cpe->ch[0];

     SingleChannelElement *sce1 = &cpe->ch[1];

     if (!cpe->common_window)

         return;

     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {

         for (g = 0;  g < sce0->ics.num_swb; g++) {

             if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {

                 float dist1 = 0.0f, dist2 = 0.0f;

                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {

                     FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];

                     FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];

                     float minthr = FFMIN(band0->threshold, band1->threshold);

                     float maxthr = FFMAX(band0->threshold, band1->threshold);

                     for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {

                         M[i  ] = (sce0->coeffs[start+w2*128+i  ]

                                 + sce1->coeffs[start+w2*128+i  ]) * 0.5;

                         M[i+1] = (sce0->coeffs[start+w2*128+i+1]

                                 + sce1->coeffs[start+w2*128+i+1]) * 0.5;

                         M[i+2] = (sce0->coeffs[start+w2*128+i+2]

                                 + sce1->coeffs[start+w2*128+i+2]) * 0.5;

                         M[i+3] = (sce0->coeffs[start+w2*128+i+3]

                                 + sce1->coeffs[start+w2*128+i+3]) * 0.5;


                         S[i  ] =  M[i  ]

                                 - sce1->coeffs[start+w2*128+i  ];

                         S[i+1] =  M[i+1]

                                 - sce1->coeffs[start+w2*128+i+1];

                         S[i+2] =  M[i+2]

                                 - sce1->coeffs[start+w2*128+i+2];

                         S[i+3] =  M[i+3]

                                 - sce1->coeffs[start+w2*128+i+3];

                    }

                     abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);

                     abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);

                     abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);

                     abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);

                     dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,

                                                 L34,

                                                 sce0->ics.swb_sizes[g],

                                                 sce0->sf_idx[(w+w2)*16+g],

                                                 sce0->band_type[(w+w2)*16+g],

                                                 s->lambda / band0->threshold, INFINITY, NULL);

                     dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,

                                                 R34,

                                                 sce1->ics.swb_sizes[g],

                                                 sce1->sf_idx[(w+w2)*16+g],

                                                 sce1->band_type[(w+w2)*16+g],

                                                 s->lambda / band1->threshold, INFINITY, NULL);

                     dist2 += quantize_band_cost(s, M,

                                                 M34,

                                                 sce0->ics.swb_sizes[g],

                                                 sce0->sf_idx[(w+w2)*16+g],

                                                 sce0->band_type[(w+w2)*16+g],

                                                 s->lambda / maxthr, INFINITY, NULL);

                     dist2 += quantize_band_cost(s, S,

                                                 S34,

                                                 sce1->ics.swb_sizes[g],

                                                 sce1->sf_idx[(w+w2)*16+g],

                                                 sce1->band_type[(w+w2)*16+g],

                                                 s->lambda / minthr, INFINITY, NULL);

                 }

                 cpe->ms_mask[w*16+g] = dist2 < dist1;

             }

             start += sce0->ics.swb_sizes[g];

         }

     }

 }

 #endif /*HAVE_MIPSFPU */


 static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,

                                        int win, int group_len, const float lambda)

 {

     BandCodingPath path[120][12];

     int w, swb, cb, start, size;

     int i, j;

     const int max_sfb  = sce->ics.max_sfb;

     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;

     const int run_esc  = (1 << run_bits) - 1;

     int idx, ppos, count;

     int stackrun[120], stackcb[120], stack_len;

     float next_minbits = INFINITY;

     int next_mincb = 0;


     abs_pow34_v(s->scoefs, sce->coeffs, 1024);

     start = win*128;

     for (cb = 0; cb < 12; cb++) {

         path[0][cb].cost     = run_bits+4;

         path[0][cb].prev_idx = -1;

         path[0][cb].run      = 0;

     }

     for (swb = 0; swb < max_sfb; swb++) {

         size = sce->ics.swb_sizes[swb];

         if (sce->zeroes[win*16 + swb]) {

             float cost_stay_here = path[swb][0].cost;

             float cost_get_here  = next_minbits + run_bits + 4;

             if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]

                 != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])

                 cost_stay_here += run_bits;

             if (cost_get_here < cost_stay_here) {

                 path[swb+1][0].prev_idx = next_mincb;

                 path[swb+1][0].cost     = cost_get_here;

                 path[swb+1][0].run      = 1;

             } else {

                 path[swb+1][0].prev_idx = 0;

                 path[swb+1][0].cost     = cost_stay_here;

                 path[swb+1][0].run      = path[swb][0].run + 1;

             }

             next_minbits = path[swb+1][0].cost;

             next_mincb = 0;

             for (cb = 1; cb < 12; cb++) {

                 path[swb+1][cb].cost = 61450;

                 path[swb+1][cb].prev_idx = -1;

                 path[swb+1][cb].run = 0;

             }

         } else {

             float minbits = next_minbits;

             int mincb = next_mincb;

             int startcb = sce->band_type[win*16+swb];

             next_minbits = INFINITY;

             next_mincb = 0;

             for (cb = 0; cb < startcb; cb++) {

                 path[swb+1][cb].cost = 61450;

                 path[swb+1][cb].prev_idx = -1;

                 path[swb+1][cb].run = 0;

             }

             for (cb = startcb; cb < 12; cb++) {

                 float cost_stay_here, cost_get_here;

                 float bits = 0.0f;

                 for (w = 0; w < group_len; w++) {

                     bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,

                                                     s->scoefs + start + w*128, size,

                                                     sce->sf_idx[(win+w)*16+swb], cb,

                                                     0, INFINITY, NULL);

                 }

                 cost_stay_here = path[swb][cb].cost + bits;

                 cost_get_here  = minbits            + bits + run_bits + 4;

                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]

                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])

                     cost_stay_here += run_bits;

                 if (cost_get_here < cost_stay_here) {

                     path[swb+1][cb].prev_idx = mincb;

                     path[swb+1][cb].cost     = cost_get_here;

                     path[swb+1][cb].run      = 1;

                 } else {

                     path[swb+1][cb].prev_idx = cb;

                     path[swb+1][cb].cost     = cost_stay_here;

                     path[swb+1][cb].run      = path[swb][cb].run + 1;

                 }

                 if (path[swb+1][cb].cost < next_minbits) {

                     next_minbits = path[swb+1][cb].cost;

                     next_mincb = cb;

                 }

             }

         }

         start += sce->ics.swb_sizes[swb];

     }


     stack_len = 0;

     idx       = 0;

     for (cb = 1; cb < 12; cb++)

         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)

             idx = cb;

     ppos = max_sfb;

     while (ppos > 0) {

         av_assert1(idx >= 0);

         cb = idx;

         stackrun[stack_len] = path[ppos][cb].run;

         stackcb [stack_len] = cb;

         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;

         ppos -= path[ppos][cb].run;

         stack_len++;

     }


     start = 0;

     for (i = stack_len - 1; i >= 0; i--) {

         put_bits(&s->pb, 4, stackcb[i]);

         count = stackrun[i];

         memset(sce->zeroes + win*16 + start, !stackcb[i], count);

         for (j = 0; j < count; j++) {

             sce->band_type[win*16 + start] =  stackcb[i];

             start++;

         }

         while (count >= run_esc) {

             put_bits(&s->pb, run_bits, run_esc);

             count -= run_esc;

         }

         put_bits(&s->pb, run_bits, count);

     }

 }

 #endif /* HAVE_INLINE_ASM */


 void ff_aac_coder_init_mips(AACEncContext *c) {

 #if HAVE_INLINE_ASM

     AACCoefficientsEncoder *e = c->coder;

     int option = c->options.aac_coder;


     if (option == 2) {

 // Disabled due to failure with fate-aac-pns-encode

 //         e->quantize_and_encode_band = quantize_and_encode_band_mips;

 //         e->encode_window_bands_info = codebook_trellis_rate_mips;

 #if HAVE_MIPSFPU

         e->search_for_quantizers    = search_for_quantizers_twoloop_mips;

         e->search_for_ms            = search_for_ms_mips;

 #endif /* HAVE_MIPSFPU */

     }

 #endif /* HAVE_INLINE_ASM */

 }

run_value_bits
static const uint8_t *const run_value_bits[2]
Definition: aacenctab.h:101

NULL
#define NULL
Definition: coverity.c:32

s
const char * s
Definition: avisynth_c.h:631

PutBitContext
Definition: put_bits.h:35

abs_pow34_v
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacenc_utils.h:39

put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:167

SCALE_DIFF_ZERO
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
Definition: aac.h:152

g
const char * g
Definition: vf_curves.c:108

quantize_and_encode_band_cost
#define quantize_and_encode_band_cost(s, pb, in, quant, scaled, size, scale_idx, cb, lambda, uplim, bits, rtz)
Definition: aacenc_quantization.h:236

FFPsyChannel::psy_bands
FFPsyBand psy_bands[PSY_MAX_BANDS]
channel bands information
Definition: psymodel.h:48

SCALE_MAX_DIFF
#define SCALE_MAX_DIFF
maximum scalefactor difference allowed by standard
Definition: aac.h:151

b
const char * b
Definition: vf_curves.c:109

AACEncContext::coder
AACCoefficientsEncoder * coder
Definition: aacenc.h:98

ChannelElement::common_window
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aac.h:273

BandCodingPath::prev_idx
int prev_idx
pointer to the previous path point
Definition: aaccoder.c:67

ChannelElement::ms_mask
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aac.h:276

AACEncContext::lambda
float lambda
Definition: aacenc.h:101

ROUND_TO_ZERO
#define ROUND_TO_ZERO
Definition: aacenc_utils.h:36

band
Definition: af_biquads.c:76

mathematics.h

t7
#define t7
Definition: regdef.h:35

av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37

cb
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:97

AACEncContext::options
AACEncOptions options
encoding options
Definition: aacenc.h:82

M
#define M(a, b)
Definition: vp3dsp.c:44

AACEncContext
AAC encoder context.
Definition: aacenc.h:80

bits
uint8_t bits
Definition: crc.c:295

uint8_t
uint8_t
Definition: audio_convert.c:194

ChannelElement::ch
SingleChannelElement ch[2]
Definition: aac.h:279

ff_aac_pow34sf_tab
float ff_aac_pow34sf_tab[428]
Definition: aac_tablegen.h:33

ff_aac_scalefactor_bits
const uint8_t ff_aac_scalefactor_bits[121]
Definition: aactab.c:80

run_bits
static const uint8_t run_bits[7][16]
Definition: h264_cavlc.c:229

t0
#define t0
Definition: regdef.h:28

ff_aac_coder_init_mips
void ff_aac_coder_init_mips(AACEncContext *c)
Definition: aaccoder_mips.c:2569

c1
static const uint64_t c1
Definition: murmur3.c:49

float.h

FFPsyBand
single band psychoacoustic information
Definition: psymodel.h:37

size
ptrdiff_t size
Definition: opengl_enc.c:101

SingleChannelElement::coeffs
float coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:258

run_value_bits_short
static const uint8_t run_value_bits_short[16]
bits needed to code codebook run value for short windows
Definition: aacenctab.h:84

S
#define S(s, c, i)
Definition: flacdsp_template.c:46

IndividualChannelStream::num_windows
int num_windows
Definition: aac.h:181

t1
#define t1
Definition: regdef.h:29

IndividualChannelStream::max_sfb
uint8_t max_sfb
number of scalefactor bands per group
Definition: aac.h:172

t3
#define t3
Definition: regdef.h:31

FFPsyBand::energy
float energy
Definition: psymodel.h:39

count
GLsizei count
Definition: opengl_enc.c:109

IndividualChannelStream::num_swb
int num_swb
number of scalefactor window bands
Definition: aac.h:180

FFMAX
#define FFMAX(a, b)
Definition: common.h:79

BandCodingPath::cost
float cost
path cost
Definition: aaccoder.c:68

avcodec.h
Libavcodec external API header.

AACCoefficientsEncoder::search_for_quantizers
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
Definition: aacenc.h:56

ff_aac_codebook_vectors
const float *const ff_aac_codebook_vectors[]
Definition: aactab.c:906

aacenc.h

POW_SF2_ZERO
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
Definition: aac_tablegen_decl.h:26

SCALE_DIV_512
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
Definition: aac.h:148

AACCoefficientsEncoder
Definition: aacenc.h:55

AVCodecContext::bit_rate
int bit_rate
the average bitrate
Definition: avcodec.h:1567

av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53

AACEncContext::cur_channel
int cur_channel
Definition: aacenc.h:99

ff_aac_spectral_bits
const uint8_t *const ff_aac_spectral_bits[11]
Definition: aactab.c:410

FFMIN
#define FFMIN(a, b)
Definition: common.h:81

void
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)

aac.h
AAC definitions and structures.

AACEncContext::pb
PutBitContext pb
Definition: aacenc.h:83

ROUND_STANDARD
#define ROUND_STANDARD
Definition: aacenc_utils.h:35

av_log2
#define av_log2
Definition: intmath.h:100

INFINITY
#define INFINITY
Definition: math.h:27

AACCoefficientsEncoder::search_for_ms
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:70

find_min_book
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:86

AVCodecContext::sample_rate
int sample_rate
samples per second
Definition: avcodec.h:2262

AVCodecContext
main external API structure.
Definition: avcodec.h:1502

SingleChannelElement::ics
IndividualChannelStream ics
Definition: aac.h:246

in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
Definition: audio_convert.c:194

cbrtf
static av_always_inline float cbrtf(float x)
Definition: libm.h:59

BandCodingPath
structure used in optimal codebook search
Definition: aaccoder.c:66

IndividualChannelStream::group_len
uint8_t group_len[8]
Definition: aac.h:176

libm.h
Replacements for frequently missing libm functions.

option
option
Definition: libkvazaar.c:224

IndividualChannelStream::swb_sizes
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:179

t5
#define t5
Definition: regdef.h:33

AACEncContext::psy
FFPsyContext psy
Definition: aacenc.h:96

quantize_and_encode_band_cost_arr
static float(*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits)
Definition: aacenc_quantization.h:188

SingleChannelElement::zeroes
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:254

SingleChannelElement::sf_idx
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:253

AACEncOptions::aac_coder
int aac_coder
Definition: aacenc.h:46

SCALE_ONE_POS
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Definition: aac.h:149

t6
#define t6
Definition: regdef.h:34

SingleChannelElement
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:245

log2f
#define log2f(x)
Definition: libm.h:127

c
static double c[64]
Definition: vsrc_mptestsrc.c:87

AACEncContext::cpe
ChannelElement * cpe
channel elements
Definition: aacenc.h:95

ff_aac_pow2sf_tab
float ff_aac_pow2sf_tab[428]
Definition: aac_tablegen.h:32

c2
static const uint64_t c2
Definition: murmur3.c:50

ChannelElement
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:270

ff_aac_spectral_codes
const uint16_t *const ff_aac_spectral_codes[11]
Definition: aactab.c:405

t4
#define t4
Definition: regdef.h:32

len
int len
Definition: vorbis_enc_data.h:452

AVCodecContext::channels
int channels
number of audio channels
Definition: avcodec.h:2263

BandCodingPath::run
int run
Definition: aaccoder.c:69

run_value_bits_long
static const uint8_t run_value_bits_long[64]
bits needed to code codebook run value for long windows
Definition: aacenctab.h:76

FFPsyContext::ch
FFPsyChannel * ch
single channel information
Definition: psymodel.h:80

SingleChannelElement::band_type
enum BandType band_type[128]
band types
Definition: aac.h:249

out
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> out
Definition: audio_convert.c:194

find_max_val
static float find_max_val(int group_len, int swb_size, const float *scaled)
Definition: aacenc_utils.h:74

start
void INT64 start
Definition: avisynth_c.h:553

FFPsyBand::threshold
float threshold
Definition: psymodel.h:40

aactab.h
AAC data declarations.

AACEncContext::scoefs
float scoefs[1024]
scaled coefficients
Definition: aacenc.h:104

quantize_band_cost
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, int rtz)
Definition: aacenc_quantization.h:243

t2
#define t2
Definition: regdef.h:30

a
a
Definition: h264pred_template.c:468

put_bits.h
bitstream writer API