FFmpeg: libavcodec/dsputil.c Source File

00001 /*
00002  * DSP utils
00003  * Copyright (c) 2000, 2001 Fabrice Bellard
00004  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
00005  *
00006  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
00007  *
00008  * This file is part of FFmpeg.
00009  *
00010  * FFmpeg is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * FFmpeg is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with FFmpeg; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "diracdsp.h"
00042 
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045 
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049 
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053 
00054 #define BIT_DEPTH 8
00055 #include "dsputil_template.c"
00056 
00057 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
00058 #define pb_7f (~0UL/255 * 0x7f)
00059 #define pb_80 (~0UL/255 * 0x80)
00060 
00061 const uint8_t ff_zigzag_direct[64] = {
00062     0,   1,  8, 16,  9,  2,  3, 10,
00063     17, 24, 32, 25, 18, 11,  4,  5,
00064     12, 19, 26, 33, 40, 48, 41, 34,
00065     27, 20, 13,  6,  7, 14, 21, 28,
00066     35, 42, 49, 56, 57, 50, 43, 36,
00067     29, 22, 15, 23, 30, 37, 44, 51,
00068     58, 59, 52, 45, 38, 31, 39, 46,
00069     53, 60, 61, 54, 47, 55, 62, 63
00070 };
00071 
00072 /* Specific zigzag scan for 248 idct. NOTE that unlike the
00073    specification, we interleave the fields */
00074 const uint8_t ff_zigzag248_direct[64] = {
00075      0,  8,  1,  9, 16, 24,  2, 10,
00076     17, 25, 32, 40, 48, 56, 33, 41,
00077     18, 26,  3, 11,  4, 12, 19, 27,
00078     34, 42, 49, 57, 50, 58, 35, 43,
00079     20, 28,  5, 13,  6, 14, 21, 29,
00080     36, 44, 51, 59, 52, 60, 37, 45,
00081     22, 30,  7, 15, 23, 31, 38, 46,
00082     53, 61, 54, 62, 39, 47, 55, 63,
00083 };
00084 
00085 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
00086 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00087 
00088 const uint8_t ff_alternate_horizontal_scan[64] = {
00089     0,  1,   2,  3,  8,  9, 16, 17,
00090     10, 11,  4,  5,  6,  7, 15, 14,
00091     13, 12, 19, 18, 24, 25, 32, 33,
00092     26, 27, 20, 21, 22, 23, 28, 29,
00093     30, 31, 34, 35, 40, 41, 48, 49,
00094     42, 43, 36, 37, 38, 39, 44, 45,
00095     46, 47, 50, 51, 56, 57, 58, 59,
00096     52, 53, 54, 55, 60, 61, 62, 63,
00097 };
00098 
00099 const uint8_t ff_alternate_vertical_scan[64] = {
00100     0,  8,  16, 24,  1,  9,  2, 10,
00101     17, 25, 32, 40, 48, 56, 57, 49,
00102     41, 33, 26, 18,  3, 11,  4, 12,
00103     19, 27, 34, 42, 50, 58, 35, 43,
00104     51, 59, 20, 28,  5, 13,  6, 14,
00105     21, 29, 36, 44, 52, 60, 37, 45,
00106     53, 61, 22, 30,  7, 15, 23, 31,
00107     38, 46, 54, 62, 39, 47, 55, 63,
00108 };
00109 
00110 /* Input permutation for the simple_idct_mmx */
00111 static const uint8_t simple_mmx_permutation[64]={
00112         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00113         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00114         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00115         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00116         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00117         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00118         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00119         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00120 };
00121 
00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00123 
00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00125     int i;
00126     int end;
00127 
00128     st->scantable= src_scantable;
00129 
00130     for(i=0; i<64; i++){
00131         int j;
00132         j = src_scantable[i];
00133         st->permutated[i] = permutation[j];
00134 #if ARCH_PPC
00135         st->inverse[j] = i;
00136 #endif
00137     }
00138 
00139     end=-1;
00140     for(i=0; i<64; i++){
00141         int j;
00142         j = st->permutated[i];
00143         if(j>end) end=j;
00144         st->raster_end[i]= end;
00145     }
00146 }
00147 
00148 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00149                                    int idct_permutation_type)
00150 {
00151     int i;
00152 
00153     switch(idct_permutation_type){
00154     case FF_NO_IDCT_PERM:
00155         for(i=0; i<64; i++)
00156             idct_permutation[i]= i;
00157         break;
00158     case FF_LIBMPEG2_IDCT_PERM:
00159         for(i=0; i<64; i++)
00160             idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00161         break;
00162     case FF_SIMPLE_IDCT_PERM:
00163         for(i=0; i<64; i++)
00164             idct_permutation[i]= simple_mmx_permutation[i];
00165         break;
00166     case FF_TRANSPOSE_IDCT_PERM:
00167         for(i=0; i<64; i++)
00168             idct_permutation[i]= ((i&7)<<3) | (i>>3);
00169         break;
00170     case FF_PARTTRANS_IDCT_PERM:
00171         for(i=0; i<64; i++)
00172             idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00173         break;
00174     case FF_SSE2_IDCT_PERM:
00175         for(i=0; i<64; i++)
00176             idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00177         break;
00178     default:
00179         av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00180     }
00181 }
00182 
00183 static int pix_sum_c(uint8_t * pix, int line_size)
00184 {
00185     int s, i, j;
00186 
00187     s = 0;
00188     for (i = 0; i < 16; i++) {
00189         for (j = 0; j < 16; j += 8) {
00190             s += pix[0];
00191             s += pix[1];
00192             s += pix[2];
00193             s += pix[3];
00194             s += pix[4];
00195             s += pix[5];
00196             s += pix[6];
00197             s += pix[7];
00198             pix += 8;
00199         }
00200         pix += line_size - 16;
00201     }
00202     return s;
00203 }
00204 
00205 static int pix_norm1_c(uint8_t * pix, int line_size)
00206 {
00207     int s, i, j;
00208     uint32_t *sq = ff_squareTbl + 256;
00209 
00210     s = 0;
00211     for (i = 0; i < 16; i++) {
00212         for (j = 0; j < 16; j += 8) {
00213 #if 0
00214             s += sq[pix[0]];
00215             s += sq[pix[1]];
00216             s += sq[pix[2]];
00217             s += sq[pix[3]];
00218             s += sq[pix[4]];
00219             s += sq[pix[5]];
00220             s += sq[pix[6]];
00221             s += sq[pix[7]];
00222 #else
00223 #if HAVE_FAST_64BIT
00224             register uint64_t x=*(uint64_t*)pix;
00225             s += sq[x&0xff];
00226             s += sq[(x>>8)&0xff];
00227             s += sq[(x>>16)&0xff];
00228             s += sq[(x>>24)&0xff];
00229             s += sq[(x>>32)&0xff];
00230             s += sq[(x>>40)&0xff];
00231             s += sq[(x>>48)&0xff];
00232             s += sq[(x>>56)&0xff];
00233 #else
00234             register uint32_t x=*(uint32_t*)pix;
00235             s += sq[x&0xff];
00236             s += sq[(x>>8)&0xff];
00237             s += sq[(x>>16)&0xff];
00238             s += sq[(x>>24)&0xff];
00239             x=*(uint32_t*)(pix+4);
00240             s += sq[x&0xff];
00241             s += sq[(x>>8)&0xff];
00242             s += sq[(x>>16)&0xff];
00243             s += sq[(x>>24)&0xff];
00244 #endif
00245 #endif
00246             pix += 8;
00247         }
00248         pix += line_size - 16;
00249     }
00250     return s;
00251 }
00252 
00253 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00254     int i;
00255 
00256     for(i=0; i+8<=w; i+=8){
00257         dst[i+0]= av_bswap32(src[i+0]);
00258         dst[i+1]= av_bswap32(src[i+1]);
00259         dst[i+2]= av_bswap32(src[i+2]);
00260         dst[i+3]= av_bswap32(src[i+3]);
00261         dst[i+4]= av_bswap32(src[i+4]);
00262         dst[i+5]= av_bswap32(src[i+5]);
00263         dst[i+6]= av_bswap32(src[i+6]);
00264         dst[i+7]= av_bswap32(src[i+7]);
00265     }
00266     for(;i<w; i++){
00267         dst[i+0]= av_bswap32(src[i+0]);
00268     }
00269 }
00270 
00271 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00272 {
00273     while (len--)
00274         *dst++ = av_bswap16(*src++);
00275 }
00276 
00277 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00278 {
00279     int s, i;
00280     uint32_t *sq = ff_squareTbl + 256;
00281 
00282     s = 0;
00283     for (i = 0; i < h; i++) {
00284         s += sq[pix1[0] - pix2[0]];
00285         s += sq[pix1[1] - pix2[1]];
00286         s += sq[pix1[2] - pix2[2]];
00287         s += sq[pix1[3] - pix2[3]];
00288         pix1 += line_size;
00289         pix2 += line_size;
00290     }
00291     return s;
00292 }
00293 
00294 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00295 {
00296     int s, i;
00297     uint32_t *sq = ff_squareTbl + 256;
00298 
00299     s = 0;
00300     for (i = 0; i < h; i++) {
00301         s += sq[pix1[0] - pix2[0]];
00302         s += sq[pix1[1] - pix2[1]];
00303         s += sq[pix1[2] - pix2[2]];
00304         s += sq[pix1[3] - pix2[3]];
00305         s += sq[pix1[4] - pix2[4]];
00306         s += sq[pix1[5] - pix2[5]];
00307         s += sq[pix1[6] - pix2[6]];
00308         s += sq[pix1[7] - pix2[7]];
00309         pix1 += line_size;
00310         pix2 += line_size;
00311     }
00312     return s;
00313 }
00314 
00315 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00316 {
00317     int s, i;
00318     uint32_t *sq = ff_squareTbl + 256;
00319 
00320     s = 0;
00321     for (i = 0; i < h; i++) {
00322         s += sq[pix1[ 0] - pix2[ 0]];
00323         s += sq[pix1[ 1] - pix2[ 1]];
00324         s += sq[pix1[ 2] - pix2[ 2]];
00325         s += sq[pix1[ 3] - pix2[ 3]];
00326         s += sq[pix1[ 4] - pix2[ 4]];
00327         s += sq[pix1[ 5] - pix2[ 5]];
00328         s += sq[pix1[ 6] - pix2[ 6]];
00329         s += sq[pix1[ 7] - pix2[ 7]];
00330         s += sq[pix1[ 8] - pix2[ 8]];
00331         s += sq[pix1[ 9] - pix2[ 9]];
00332         s += sq[pix1[10] - pix2[10]];
00333         s += sq[pix1[11] - pix2[11]];
00334         s += sq[pix1[12] - pix2[12]];
00335         s += sq[pix1[13] - pix2[13]];
00336         s += sq[pix1[14] - pix2[14]];
00337         s += sq[pix1[15] - pix2[15]];
00338 
00339         pix1 += line_size;
00340         pix2 += line_size;
00341     }
00342     return s;
00343 }
00344 
00345 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00346                           const uint8_t *s2, int stride){
00347     int i;
00348 
00349     /* read the pixels */
00350     for(i=0;i<8;i++) {
00351         block[0] = s1[0] - s2[0];
00352         block[1] = s1[1] - s2[1];
00353         block[2] = s1[2] - s2[2];
00354         block[3] = s1[3] - s2[3];
00355         block[4] = s1[4] - s2[4];
00356         block[5] = s1[5] - s2[5];
00357         block[6] = s1[6] - s2[6];
00358         block[7] = s1[7] - s2[7];
00359         s1 += stride;
00360         s2 += stride;
00361         block += 8;
00362     }
00363 }
00364 
00365 
00366 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00367                              int line_size)
00368 {
00369     int i;
00370     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00371 
00372     /* read the pixels */
00373     for(i=0;i<8;i++) {
00374         pixels[0] = cm[block[0]];
00375         pixels[1] = cm[block[1]];
00376         pixels[2] = cm[block[2]];
00377         pixels[3] = cm[block[3]];
00378         pixels[4] = cm[block[4]];
00379         pixels[5] = cm[block[5]];
00380         pixels[6] = cm[block[6]];
00381         pixels[7] = cm[block[7]];
00382 
00383         pixels += line_size;
00384         block += 8;
00385     }
00386 }
00387 
00388 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00389                                  int line_size)
00390 {
00391     int i;
00392     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00393 
00394     /* read the pixels */
00395     for(i=0;i<4;i++) {
00396         pixels[0] = cm[block[0]];
00397         pixels[1] = cm[block[1]];
00398         pixels[2] = cm[block[2]];
00399         pixels[3] = cm[block[3]];
00400 
00401         pixels += line_size;
00402         block += 8;
00403     }
00404 }
00405 
00406 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00407                                  int line_size)
00408 {
00409     int i;
00410     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00411 
00412     /* read the pixels */
00413     for(i=0;i<2;i++) {
00414         pixels[0] = cm[block[0]];
00415         pixels[1] = cm[block[1]];
00416 
00417         pixels += line_size;
00418         block += 8;
00419     }
00420 }
00421 
00422 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00423                                     uint8_t *restrict pixels,
00424                                     int line_size)
00425 {
00426     int i, j;
00427 
00428     for (i = 0; i < 8; i++) {
00429         for (j = 0; j < 8; j++) {
00430             if (*block < -128)
00431                 *pixels = 0;
00432             else if (*block > 127)
00433                 *pixels = 255;
00434             else
00435                 *pixels = (uint8_t)(*block + 128);
00436             block++;
00437             pixels++;
00438         }
00439         pixels += (line_size - 8);
00440     }
00441 }
00442 
00443 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00444                              int line_size)
00445 {
00446     int i;
00447     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00448 
00449     /* read the pixels */
00450     for(i=0;i<8;i++) {
00451         pixels[0] = cm[pixels[0] + block[0]];
00452         pixels[1] = cm[pixels[1] + block[1]];
00453         pixels[2] = cm[pixels[2] + block[2]];
00454         pixels[3] = cm[pixels[3] + block[3]];
00455         pixels[4] = cm[pixels[4] + block[4]];
00456         pixels[5] = cm[pixels[5] + block[5]];
00457         pixels[6] = cm[pixels[6] + block[6]];
00458         pixels[7] = cm[pixels[7] + block[7]];
00459         pixels += line_size;
00460         block += 8;
00461     }
00462 }
00463 
00464 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00465                           int line_size)
00466 {
00467     int i;
00468     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00469 
00470     /* read the pixels */
00471     for(i=0;i<4;i++) {
00472         pixels[0] = cm[pixels[0] + block[0]];
00473         pixels[1] = cm[pixels[1] + block[1]];
00474         pixels[2] = cm[pixels[2] + block[2]];
00475         pixels[3] = cm[pixels[3] + block[3]];
00476         pixels += line_size;
00477         block += 8;
00478     }
00479 }
00480 
00481 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00482                           int line_size)
00483 {
00484     int i;
00485     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00486 
00487     /* read the pixels */
00488     for(i=0;i<2;i++) {
00489         pixels[0] = cm[pixels[0] + block[0]];
00490         pixels[1] = cm[pixels[1] + block[1]];
00491         pixels += line_size;
00492         block += 8;
00493     }
00494 }
00495 
00496 static int sum_abs_dctelem_c(DCTELEM *block)
00497 {
00498     int sum=0, i;
00499     for(i=0; i<64; i++)
00500         sum+= FFABS(block[i]);
00501     return sum;
00502 }
00503 
00504 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00505 {
00506     int i;
00507 
00508     for (i = 0; i < h; i++) {
00509         memset(block, value, 16);
00510         block += line_size;
00511     }
00512 }
00513 
00514 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00515 {
00516     int i;
00517 
00518     for (i = 0; i < h; i++) {
00519         memset(block, value, 8);
00520         block += line_size;
00521     }
00522 }
00523 
00524 #define avg2(a,b) ((a+b+1)>>1)
00525 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00526 
00527 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00528 {
00529     const int A=(16-x16)*(16-y16);
00530     const int B=(   x16)*(16-y16);
00531     const int C=(16-x16)*(   y16);
00532     const int D=(   x16)*(   y16);
00533     int i;
00534 
00535     for(i=0; i<h; i++)
00536     {
00537         dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00538         dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00539         dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00540         dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00541         dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00542         dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00543         dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00544         dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00545         dst+= stride;
00546         src+= stride;
00547     }
00548 }
00549 
00550 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00551                   int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00552 {
00553     int y, vx, vy;
00554     const int s= 1<<shift;
00555 
00556     width--;
00557     height--;
00558 
00559     for(y=0; y<h; y++){
00560         int x;
00561 
00562         vx= ox;
00563         vy= oy;
00564         for(x=0; x<8; x++){ //XXX FIXME optimize
00565             int src_x, src_y, frac_x, frac_y, index;
00566 
00567             src_x= vx>>16;
00568             src_y= vy>>16;
00569             frac_x= src_x&(s-1);
00570             frac_y= src_y&(s-1);
00571             src_x>>=shift;
00572             src_y>>=shift;
00573 
00574             if((unsigned)src_x < width){
00575                 if((unsigned)src_y < height){
00576                     index= src_x + src_y*stride;
00577                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
00578                                            + src[index       +1]*   frac_x )*(s-frac_y)
00579                                         + (  src[index+stride  ]*(s-frac_x)
00580                                            + src[index+stride+1]*   frac_x )*   frac_y
00581                                         + r)>>(shift*2);
00582                 }else{
00583                     index= src_x + av_clip(src_y, 0, height)*stride;
00584                     dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
00585                                           + src[index       +1]*   frac_x )*s
00586                                         + r)>>(shift*2);
00587                 }
00588             }else{
00589                 if((unsigned)src_y < height){
00590                     index= av_clip(src_x, 0, width) + src_y*stride;
00591                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
00592                                            + src[index+stride  ]*   frac_y )*s
00593                                         + r)>>(shift*2);
00594                 }else{
00595                     index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00596                     dst[y*stride + x]=    src[index         ];
00597                 }
00598             }
00599 
00600             vx+= dxx;
00601             vy+= dyx;
00602         }
00603         ox += dxy;
00604         oy += dyy;
00605     }
00606 }
00607 
00608 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00609     switch(width){
00610     case 2: put_pixels2_8_c (dst, src, stride, height); break;
00611     case 4: put_pixels4_8_c (dst, src, stride, height); break;
00612     case 8: put_pixels8_8_c (dst, src, stride, height); break;
00613     case 16:put_pixels16_8_c(dst, src, stride, height); break;
00614     }
00615 }
00616 
00617 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00618     int i,j;
00619     for (i=0; i < height; i++) {
00620       for (j=0; j < width; j++) {
00621         dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00622       }
00623       src += stride;
00624       dst += stride;
00625     }
00626 }
00627 
00628 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00629     int i,j;
00630     for (i=0; i < height; i++) {
00631       for (j=0; j < width; j++) {
00632         dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00633       }
00634       src += stride;
00635       dst += stride;
00636     }
00637 }
00638 
00639 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00640     int i,j;
00641     for (i=0; i < height; i++) {
00642       for (j=0; j < width; j++) {
00643         dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00644       }
00645       src += stride;
00646       dst += stride;
00647     }
00648 }
00649 
00650 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00651     int i,j;
00652     for (i=0; i < height; i++) {
00653       for (j=0; j < width; j++) {
00654         dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00655       }
00656       src += stride;
00657       dst += stride;
00658     }
00659 }
00660 
00661 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00662     int i,j;
00663     for (i=0; i < height; i++) {
00664       for (j=0; j < width; j++) {
00665         dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00666       }
00667       src += stride;
00668       dst += stride;
00669     }
00670 }
00671 
00672 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00673     int i,j;
00674     for (i=0; i < height; i++) {
00675       for (j=0; j < width; j++) {
00676         dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00677       }
00678       src += stride;
00679       dst += stride;
00680     }
00681 }
00682 
00683 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00684     int i,j;
00685     for (i=0; i < height; i++) {
00686       for (j=0; j < width; j++) {
00687         dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00688       }
00689       src += stride;
00690       dst += stride;
00691     }
00692 }
00693 
00694 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00695     int i,j;
00696     for (i=0; i < height; i++) {
00697       for (j=0; j < width; j++) {
00698         dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00699       }
00700       src += stride;
00701       dst += stride;
00702     }
00703 }
00704 
00705 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00706     switch(width){
00707     case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00708     case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00709     case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00710     case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00711     }
00712 }
00713 
00714 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00715     int i,j;
00716     for (i=0; i < height; i++) {
00717       for (j=0; j < width; j++) {
00718         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00719       }
00720       src += stride;
00721       dst += stride;
00722     }
00723 }
00724 
00725 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00726     int i,j;
00727     for (i=0; i < height; i++) {
00728       for (j=0; j < width; j++) {
00729         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00730       }
00731       src += stride;
00732       dst += stride;
00733     }
00734 }
00735 
00736 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00737     int i,j;
00738     for (i=0; i < height; i++) {
00739       for (j=0; j < width; j++) {
00740         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00741       }
00742       src += stride;
00743       dst += stride;
00744     }
00745 }
00746 
00747 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00748     int i,j;
00749     for (i=0; i < height; i++) {
00750       for (j=0; j < width; j++) {
00751         dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00752       }
00753       src += stride;
00754       dst += stride;
00755     }
00756 }
00757 
00758 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00759     int i,j;
00760     for (i=0; i < height; i++) {
00761       for (j=0; j < width; j++) {
00762         dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00763       }
00764       src += stride;
00765       dst += stride;
00766     }
00767 }
00768 
00769 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00770     int i,j;
00771     for (i=0; i < height; i++) {
00772       for (j=0; j < width; j++) {
00773         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00774       }
00775       src += stride;
00776       dst += stride;
00777     }
00778 }
00779 
00780 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00781     int i,j;
00782     for (i=0; i < height; i++) {
00783       for (j=0; j < width; j++) {
00784         dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00785       }
00786       src += stride;
00787       dst += stride;
00788     }
00789 }
00790 
00791 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00792     int i,j;
00793     for (i=0; i < height; i++) {
00794       for (j=0; j < width; j++) {
00795         dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00796       }
00797       src += stride;
00798       dst += stride;
00799     }
00800 }
00801 
00802 #define QPEL_MC(r, OPNAME, RND, OP) \
00803 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00804     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00805     int i;\
00806     for(i=0; i<h; i++)\
00807     {\
00808         OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00809         OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00810         OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00811         OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00812         OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00813         OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00814         OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00815         OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00816         dst+=dstStride;\
00817         src+=srcStride;\
00818     }\
00819 }\
00820 \
00821 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00822     const int w=8;\
00823     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00824     int i;\
00825     for(i=0; i<w; i++)\
00826     {\
00827         const int src0= src[0*srcStride];\
00828         const int src1= src[1*srcStride];\
00829         const int src2= src[2*srcStride];\
00830         const int src3= src[3*srcStride];\
00831         const int src4= src[4*srcStride];\
00832         const int src5= src[5*srcStride];\
00833         const int src6= src[6*srcStride];\
00834         const int src7= src[7*srcStride];\
00835         const int src8= src[8*srcStride];\
00836         OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00837         OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00838         OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00839         OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00840         OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00841         OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00842         OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00843         OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00844         dst++;\
00845         src++;\
00846     }\
00847 }\
00848 \
00849 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00850     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00851     int i;\
00852     \
00853     for(i=0; i<h; i++)\
00854     {\
00855         OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00856         OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00857         OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00858         OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00859         OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00860         OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00861         OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00862         OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00863         OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00864         OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00865         OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00866         OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00867         OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00868         OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00869         OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00870         OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00871         dst+=dstStride;\
00872         src+=srcStride;\
00873     }\
00874 }\
00875 \
00876 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00877     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00878     int i;\
00879     const int w=16;\
00880     for(i=0; i<w; i++)\
00881     {\
00882         const int src0= src[0*srcStride];\
00883         const int src1= src[1*srcStride];\
00884         const int src2= src[2*srcStride];\
00885         const int src3= src[3*srcStride];\
00886         const int src4= src[4*srcStride];\
00887         const int src5= src[5*srcStride];\
00888         const int src6= src[6*srcStride];\
00889         const int src7= src[7*srcStride];\
00890         const int src8= src[8*srcStride];\
00891         const int src9= src[9*srcStride];\
00892         const int src10= src[10*srcStride];\
00893         const int src11= src[11*srcStride];\
00894         const int src12= src[12*srcStride];\
00895         const int src13= src[13*srcStride];\
00896         const int src14= src[14*srcStride];\
00897         const int src15= src[15*srcStride];\
00898         const int src16= src[16*srcStride];\
00899         OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00900         OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00901         OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00902         OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00903         OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00904         OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00905         OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00906         OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00907         OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00908         OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00909         OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00910         OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00911         OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00912         OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00913         OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00914         OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00915         dst++;\
00916         src++;\
00917     }\
00918 }\
00919 \
00920 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00921     uint8_t half[64];\
00922     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00923     OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00924 }\
00925 \
00926 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00927     OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00928 }\
00929 \
00930 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00931     uint8_t half[64];\
00932     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00933     OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00934 }\
00935 \
00936 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00937     uint8_t full[16*9];\
00938     uint8_t half[64];\
00939     copy_block9(full, src, 16, stride, 9);\
00940     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00941     OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00942 }\
00943 \
00944 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00945     uint8_t full[16*9];\
00946     copy_block9(full, src, 16, stride, 9);\
00947     OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00948 }\
00949 \
00950 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00951     uint8_t full[16*9];\
00952     uint8_t half[64];\
00953     copy_block9(full, src, 16, stride, 9);\
00954     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00955     OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00956 }\
00957 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00958     uint8_t full[16*9];\
00959     uint8_t halfH[72];\
00960     uint8_t halfV[64];\
00961     uint8_t halfHV[64];\
00962     copy_block9(full, src, 16, stride, 9);\
00963     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00964     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00965     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00966     OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00967 }\
00968 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00969     uint8_t full[16*9];\
00970     uint8_t halfH[72];\
00971     uint8_t halfHV[64];\
00972     copy_block9(full, src, 16, stride, 9);\
00973     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00974     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00975     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00976     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00977 }\
00978 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00979     uint8_t full[16*9];\
00980     uint8_t halfH[72];\
00981     uint8_t halfV[64];\
00982     uint8_t halfHV[64];\
00983     copy_block9(full, src, 16, stride, 9);\
00984     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00985     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00986     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00987     OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00988 }\
00989 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00990     uint8_t full[16*9];\
00991     uint8_t halfH[72];\
00992     uint8_t halfHV[64];\
00993     copy_block9(full, src, 16, stride, 9);\
00994     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00995     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
00996     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00997     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00998 }\
00999 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01000     uint8_t full[16*9];\
01001     uint8_t halfH[72];\
01002     uint8_t halfV[64];\
01003     uint8_t halfHV[64];\
01004     copy_block9(full, src, 16, stride, 9);\
01005     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01006     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01007     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01008     OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01009 }\
01010 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01011     uint8_t full[16*9];\
01012     uint8_t halfH[72];\
01013     uint8_t halfHV[64];\
01014     copy_block9(full, src, 16, stride, 9);\
01015     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01016     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01017     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01018     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01019 }\
01020 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01021     uint8_t full[16*9];\
01022     uint8_t halfH[72];\
01023     uint8_t halfV[64];\
01024     uint8_t halfHV[64];\
01025     copy_block9(full, src, 16, stride, 9);\
01026     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
01027     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01028     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01029     OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01030 }\
01031 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01032     uint8_t full[16*9];\
01033     uint8_t halfH[72];\
01034     uint8_t halfHV[64];\
01035     copy_block9(full, src, 16, stride, 9);\
01036     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01037     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01038     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01039     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01040 }\
01041 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01042     uint8_t halfH[72];\
01043     uint8_t halfHV[64];\
01044     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01045     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01046     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01047 }\
01048 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01049     uint8_t halfH[72];\
01050     uint8_t halfHV[64];\
01051     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01052     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01053     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01054 }\
01055 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01056     uint8_t full[16*9];\
01057     uint8_t halfH[72];\
01058     uint8_t halfV[64];\
01059     uint8_t halfHV[64];\
01060     copy_block9(full, src, 16, stride, 9);\
01061     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01062     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01063     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01064     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01065 }\
01066 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01067     uint8_t full[16*9];\
01068     uint8_t halfH[72];\
01069     copy_block9(full, src, 16, stride, 9);\
01070     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01071     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01072     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01073 }\
01074 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01075     uint8_t full[16*9];\
01076     uint8_t halfH[72];\
01077     uint8_t halfV[64];\
01078     uint8_t halfHV[64];\
01079     copy_block9(full, src, 16, stride, 9);\
01080     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01081     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01082     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01083     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01084 }\
01085 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01086     uint8_t full[16*9];\
01087     uint8_t halfH[72];\
01088     copy_block9(full, src, 16, stride, 9);\
01089     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01090     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01091     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01092 }\
01093 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01094     uint8_t halfH[72];\
01095     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01096     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01097 }\
01098 \
01099 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01100     uint8_t half[256];\
01101     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01102     OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01103 }\
01104 \
01105 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01106     OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01107 }\
01108 \
01109 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01110     uint8_t half[256];\
01111     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01112     OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01113 }\
01114 \
01115 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01116     uint8_t full[24*17];\
01117     uint8_t half[256];\
01118     copy_block17(full, src, 24, stride, 17);\
01119     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01120     OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01121 }\
01122 \
01123 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01124     uint8_t full[24*17];\
01125     copy_block17(full, src, 24, stride, 17);\
01126     OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01127 }\
01128 \
01129 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01130     uint8_t full[24*17];\
01131     uint8_t half[256];\
01132     copy_block17(full, src, 24, stride, 17);\
01133     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01134     OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01135 }\
01136 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01137     uint8_t full[24*17];\
01138     uint8_t halfH[272];\
01139     uint8_t halfV[256];\
01140     uint8_t halfHV[256];\
01141     copy_block17(full, src, 24, stride, 17);\
01142     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01143     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01144     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01145     OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01146 }\
01147 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01148     uint8_t full[24*17];\
01149     uint8_t halfH[272];\
01150     uint8_t halfHV[256];\
01151     copy_block17(full, src, 24, stride, 17);\
01152     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01153     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01154     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01155     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01156 }\
01157 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01158     uint8_t full[24*17];\
01159     uint8_t halfH[272];\
01160     uint8_t halfV[256];\
01161     uint8_t halfHV[256];\
01162     copy_block17(full, src, 24, stride, 17);\
01163     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01164     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01165     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01166     OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01167 }\
01168 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01169     uint8_t full[24*17];\
01170     uint8_t halfH[272];\
01171     uint8_t halfHV[256];\
01172     copy_block17(full, src, 24, stride, 17);\
01173     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01174     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01175     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01176     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01177 }\
01178 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01179     uint8_t full[24*17];\
01180     uint8_t halfH[272];\
01181     uint8_t halfV[256];\
01182     uint8_t halfHV[256];\
01183     copy_block17(full, src, 24, stride, 17);\
01184     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01185     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01186     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01187     OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01188 }\
01189 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01190     uint8_t full[24*17];\
01191     uint8_t halfH[272];\
01192     uint8_t halfHV[256];\
01193     copy_block17(full, src, 24, stride, 17);\
01194     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01195     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01196     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01197     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01198 }\
01199 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01200     uint8_t full[24*17];\
01201     uint8_t halfH[272];\
01202     uint8_t halfV[256];\
01203     uint8_t halfHV[256];\
01204     copy_block17(full, src, 24, stride, 17);\
01205     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
01206     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01207     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01208     OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01209 }\
01210 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01211     uint8_t full[24*17];\
01212     uint8_t halfH[272];\
01213     uint8_t halfHV[256];\
01214     copy_block17(full, src, 24, stride, 17);\
01215     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01216     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01217     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01218     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01219 }\
01220 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01221     uint8_t halfH[272];\
01222     uint8_t halfHV[256];\
01223     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01224     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01225     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01226 }\
01227 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01228     uint8_t halfH[272];\
01229     uint8_t halfHV[256];\
01230     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01231     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01232     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01233 }\
01234 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01235     uint8_t full[24*17];\
01236     uint8_t halfH[272];\
01237     uint8_t halfV[256];\
01238     uint8_t halfHV[256];\
01239     copy_block17(full, src, 24, stride, 17);\
01240     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01241     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01242     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01243     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01244 }\
01245 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01246     uint8_t full[24*17];\
01247     uint8_t halfH[272];\
01248     copy_block17(full, src, 24, stride, 17);\
01249     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01250     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01251     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01252 }\
01253 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01254     uint8_t full[24*17];\
01255     uint8_t halfH[272];\
01256     uint8_t halfV[256];\
01257     uint8_t halfHV[256];\
01258     copy_block17(full, src, 24, stride, 17);\
01259     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01260     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01261     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01262     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01263 }\
01264 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01265     uint8_t full[24*17];\
01266     uint8_t halfH[272];\
01267     copy_block17(full, src, 24, stride, 17);\
01268     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01269     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01270     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01271 }\
01272 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01273     uint8_t halfH[272];\
01274     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01275     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01276 }
01277 
01278 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01279 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01280 #define op_put(a, b) a = cm[((b) + 16)>>5]
01281 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01282 
01283 QPEL_MC(0, put_       , _       , op_put)
01284 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01285 QPEL_MC(0, avg_       , _       , op_avg)
01286 //QPEL_MC(1, avg_no_rnd , _       , op_avg)
01287 #undef op_avg
01288 #undef op_avg_no_rnd
01289 #undef op_put
01290 #undef op_put_no_rnd
01291 
01292 #define put_qpel8_mc00_c  ff_put_pixels8x8_c
01293 #define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
01294 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01295 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01296 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
01297 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01298 
01299 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01300     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01301     int i;
01302 
01303     for(i=0; i<h; i++){
01304         dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01305         dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01306         dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01307         dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01308         dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01309         dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01310         dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01311         dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01312         dst+=dstStride;
01313         src+=srcStride;
01314     }
01315 }
01316 
01317 #if CONFIG_RV40_DECODER
01318 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01319     put_pixels16_xy2_8_c(dst, src, stride, 16);
01320 }
01321 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01322     avg_pixels16_xy2_8_c(dst, src, stride, 16);
01323 }
01324 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01325     put_pixels8_xy2_8_c(dst, src, stride, 8);
01326 }
01327 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01328     avg_pixels8_xy2_8_c(dst, src, stride, 8);
01329 }
01330 #endif /* CONFIG_RV40_DECODER */
01331 
01332 #if CONFIG_DIRAC_DECODER
01333 #define DIRAC_MC(OPNAME)\
01334 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01335 {\
01336      OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
01337 }\
01338 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01339 {\
01340     OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
01341 }\
01342 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01343 {\
01344     OPNAME ## _pixels16_8_c(dst   , src[0]   , stride, h);\
01345     OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
01346 }\
01347 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01348 {\
01349     OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01350 }\
01351 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01352 {\
01353     OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01354 }\
01355 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01356 {\
01357     OPNAME ## _pixels16_l2_8(dst   , src[0]   , src[1]   , stride, stride, stride, h);\
01358     OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
01359 }\
01360 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01361 {\
01362     OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01363 }\
01364 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01365 {\
01366     OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01367 }\
01368 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01369 {\
01370     OPNAME ## _pixels16_l4_8(dst   , src[0]   , src[1]   , src[2]   , src[3]   , stride, stride, stride, stride, stride, h);\
01371     OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
01372 }
01373 DIRAC_MC(put)
01374 DIRAC_MC(avg)
01375 #endif
01376 
01377 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01378     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01379     int i;
01380 
01381     for(i=0; i<w; i++){
01382         const int src_1= src[ -srcStride];
01383         const int src0 = src[0          ];
01384         const int src1 = src[  srcStride];
01385         const int src2 = src[2*srcStride];
01386         const int src3 = src[3*srcStride];
01387         const int src4 = src[4*srcStride];
01388         const int src5 = src[5*srcStride];
01389         const int src6 = src[6*srcStride];
01390         const int src7 = src[7*srcStride];
01391         const int src8 = src[8*srcStride];
01392         const int src9 = src[9*srcStride];
01393         dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01394         dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
01395         dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
01396         dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
01397         dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
01398         dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
01399         dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
01400         dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
01401         src++;
01402         dst++;
01403     }
01404 }
01405 
01406 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01407     uint8_t half[64];
01408     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01409     put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01410 }
01411 
01412 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01413     wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01414 }
01415 
01416 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01417     uint8_t half[64];
01418     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01419     put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01420 }
01421 
01422 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01423     wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01424 }
01425 
01426 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01427     uint8_t halfH[88];
01428     uint8_t halfV[64];
01429     uint8_t halfHV[64];
01430     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01431     wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01432     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01433     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01434 }
01435 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01436     uint8_t halfH[88];
01437     uint8_t halfV[64];
01438     uint8_t halfHV[64];
01439     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01440     wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01441     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01442     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01443 }
01444 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01445     uint8_t halfH[88];
01446     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01447     wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01448 }
01449 
01450 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01451     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01452     int x;
01453     const int strength= ff_h263_loop_filter_strength[qscale];
01454 
01455     for(x=0; x<8; x++){
01456         int d1, d2, ad1;
01457         int p0= src[x-2*stride];
01458         int p1= src[x-1*stride];
01459         int p2= src[x+0*stride];
01460         int p3= src[x+1*stride];
01461         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01462 
01463         if     (d<-2*strength) d1= 0;
01464         else if(d<-  strength) d1=-2*strength - d;
01465         else if(d<   strength) d1= d;
01466         else if(d< 2*strength) d1= 2*strength - d;
01467         else                   d1= 0;
01468 
01469         p1 += d1;
01470         p2 -= d1;
01471         if(p1&256) p1= ~(p1>>31);
01472         if(p2&256) p2= ~(p2>>31);
01473 
01474         src[x-1*stride] = p1;
01475         src[x+0*stride] = p2;
01476 
01477         ad1= FFABS(d1)>>1;
01478 
01479         d2= av_clip((p0-p3)/4, -ad1, ad1);
01480 
01481         src[x-2*stride] = p0 - d2;
01482         src[x+  stride] = p3 + d2;
01483     }
01484     }
01485 }
01486 
01487 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01488     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01489     int y;
01490     const int strength= ff_h263_loop_filter_strength[qscale];
01491 
01492     for(y=0; y<8; y++){
01493         int d1, d2, ad1;
01494         int p0= src[y*stride-2];
01495         int p1= src[y*stride-1];
01496         int p2= src[y*stride+0];
01497         int p3= src[y*stride+1];
01498         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01499 
01500         if     (d<-2*strength) d1= 0;
01501         else if(d<-  strength) d1=-2*strength - d;
01502         else if(d<   strength) d1= d;
01503         else if(d< 2*strength) d1= 2*strength - d;
01504         else                   d1= 0;
01505 
01506         p1 += d1;
01507         p2 -= d1;
01508         if(p1&256) p1= ~(p1>>31);
01509         if(p2&256) p2= ~(p2>>31);
01510 
01511         src[y*stride-1] = p1;
01512         src[y*stride+0] = p2;
01513 
01514         ad1= FFABS(d1)>>1;
01515 
01516         d2= av_clip((p0-p3)/4, -ad1, ad1);
01517 
01518         src[y*stride-2] = p0 - d2;
01519         src[y*stride+1] = p3 + d2;
01520     }
01521     }
01522 }
01523 
01524 static void h261_loop_filter_c(uint8_t *src, int stride){
01525     int x,y,xy,yz;
01526     int temp[64];
01527 
01528     for(x=0; x<8; x++){
01529         temp[x      ] = 4*src[x           ];
01530         temp[x + 7*8] = 4*src[x + 7*stride];
01531     }
01532     for(y=1; y<7; y++){
01533         for(x=0; x<8; x++){
01534             xy = y * stride + x;
01535             yz = y * 8 + x;
01536             temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01537         }
01538     }
01539 
01540     for(y=0; y<8; y++){
01541         src[  y*stride] = (temp[  y*8] + 2)>>2;
01542         src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01543         for(x=1; x<7; x++){
01544             xy = y * stride + x;
01545             yz = y * 8 + x;
01546             src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01547         }
01548     }
01549 }
01550 
01551 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01552 {
01553     int s, i;
01554 
01555     s = 0;
01556     for(i=0;i<h;i++) {
01557         s += abs(pix1[0] - pix2[0]);
01558         s += abs(pix1[1] - pix2[1]);
01559         s += abs(pix1[2] - pix2[2]);
01560         s += abs(pix1[3] - pix2[3]);
01561         s += abs(pix1[4] - pix2[4]);
01562         s += abs(pix1[5] - pix2[5]);
01563         s += abs(pix1[6] - pix2[6]);
01564         s += abs(pix1[7] - pix2[7]);
01565         s += abs(pix1[8] - pix2[8]);
01566         s += abs(pix1[9] - pix2[9]);
01567         s += abs(pix1[10] - pix2[10]);
01568         s += abs(pix1[11] - pix2[11]);
01569         s += abs(pix1[12] - pix2[12]);
01570         s += abs(pix1[13] - pix2[13]);
01571         s += abs(pix1[14] - pix2[14]);
01572         s += abs(pix1[15] - pix2[15]);
01573         pix1 += line_size;
01574         pix2 += line_size;
01575     }
01576     return s;
01577 }
01578 
01579 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01580 {
01581     int s, i;
01582 
01583     s = 0;
01584     for(i=0;i<h;i++) {
01585         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01586         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01587         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01588         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01589         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01590         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01591         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01592         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01593         s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01594         s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01595         s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01596         s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01597         s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01598         s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01599         s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01600         s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01601         pix1 += line_size;
01602         pix2 += line_size;
01603     }
01604     return s;
01605 }
01606 
01607 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01608 {
01609     int s, i;
01610     uint8_t *pix3 = pix2 + line_size;
01611 
01612     s = 0;
01613     for(i=0;i<h;i++) {
01614         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01615         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01616         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01617         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01618         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01619         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01620         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01621         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01622         s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01623         s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01624         s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01625         s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01626         s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01627         s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01628         s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01629         s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01630         pix1 += line_size;
01631         pix2 += line_size;
01632         pix3 += line_size;
01633     }
01634     return s;
01635 }
01636 
01637 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01638 {
01639     int s, i;
01640     uint8_t *pix3 = pix2 + line_size;
01641 
01642     s = 0;
01643     for(i=0;i<h;i++) {
01644         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01645         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01646         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01647         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01648         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01649         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01650         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01651         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01652         s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01653         s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01654         s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01655         s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01656         s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01657         s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01658         s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01659         s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01660         pix1 += line_size;
01661         pix2 += line_size;
01662         pix3 += line_size;
01663     }
01664     return s;
01665 }
01666 
01667 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01668 {
01669     int s, i;
01670 
01671     s = 0;
01672     for(i=0;i<h;i++) {
01673         s += abs(pix1[0] - pix2[0]);
01674         s += abs(pix1[1] - pix2[1]);
01675         s += abs(pix1[2] - pix2[2]);
01676         s += abs(pix1[3] - pix2[3]);
01677         s += abs(pix1[4] - pix2[4]);
01678         s += abs(pix1[5] - pix2[5]);
01679         s += abs(pix1[6] - pix2[6]);
01680         s += abs(pix1[7] - pix2[7]);
01681         pix1 += line_size;
01682         pix2 += line_size;
01683     }
01684     return s;
01685 }
01686 
01687 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01688 {
01689     int s, i;
01690 
01691     s = 0;
01692     for(i=0;i<h;i++) {
01693         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01694         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01695         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01696         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01697         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01698         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01699         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01700         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01701         pix1 += line_size;
01702         pix2 += line_size;
01703     }
01704     return s;
01705 }
01706 
01707 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01708 {
01709     int s, i;
01710     uint8_t *pix3 = pix2 + line_size;
01711 
01712     s = 0;
01713     for(i=0;i<h;i++) {
01714         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01715         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01716         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01717         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01718         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01719         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01720         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01721         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01722         pix1 += line_size;
01723         pix2 += line_size;
01724         pix3 += line_size;
01725     }
01726     return s;
01727 }
01728 
01729 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01730 {
01731     int s, i;
01732     uint8_t *pix3 = pix2 + line_size;
01733 
01734     s = 0;
01735     for(i=0;i<h;i++) {
01736         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01737         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01738         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01739         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01740         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01741         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01742         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01743         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01744         pix1 += line_size;
01745         pix2 += line_size;
01746         pix3 += line_size;
01747     }
01748     return s;
01749 }
01750 
01751 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01752     MpegEncContext *c = v;
01753     int score1=0;
01754     int score2=0;
01755     int x,y;
01756 
01757     for(y=0; y<h; y++){
01758         for(x=0; x<16; x++){
01759             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01760         }
01761         if(y+1<h){
01762             for(x=0; x<15; x++){
01763                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01764                              - s1[x+1] + s1[x+1+stride])
01765                         -FFABS(  s2[x  ] - s2[x  +stride]
01766                              - s2[x+1] + s2[x+1+stride]);
01767             }
01768         }
01769         s1+= stride;
01770         s2+= stride;
01771     }
01772 
01773     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01774     else  return score1 + FFABS(score2)*8;
01775 }
01776 
01777 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01778     MpegEncContext *c = v;
01779     int score1=0;
01780     int score2=0;
01781     int x,y;
01782 
01783     for(y=0; y<h; y++){
01784         for(x=0; x<8; x++){
01785             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01786         }
01787         if(y+1<h){
01788             for(x=0; x<7; x++){
01789                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01790                              - s1[x+1] + s1[x+1+stride])
01791                         -FFABS(  s2[x  ] - s2[x  +stride]
01792                              - s2[x+1] + s2[x+1+stride]);
01793             }
01794         }
01795         s1+= stride;
01796         s2+= stride;
01797     }
01798 
01799     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01800     else  return score1 + FFABS(score2)*8;
01801 }
01802 
01803 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01804     int i;
01805     unsigned int sum=0;
01806 
01807     for(i=0; i<8*8; i++){
01808         int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01809         int w= weight[i];
01810         b>>= RECON_SHIFT;
01811         assert(-512<b && b<512);
01812 
01813         sum += (w*b)*(w*b)>>4;
01814     }
01815     return sum>>2;
01816 }
01817 
01818 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01819     int i;
01820 
01821     for(i=0; i<8*8; i++){
01822         rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01823     }
01824 }
01825 
01834 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01835 {
01836     int i;
01837     DCTELEM temp[64];
01838 
01839     if(last<=0) return;
01840     //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
01841 
01842     for(i=0; i<=last; i++){
01843         const int j= scantable[i];
01844         temp[j]= block[j];
01845         block[j]=0;
01846     }
01847 
01848     for(i=0; i<=last; i++){
01849         const int j= scantable[i];
01850         const int perm_j= permutation[j];
01851         block[perm_j]= temp[j];
01852     }
01853 }
01854 
01855 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01856     return 0;
01857 }
01858 
01859 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01860     int i;
01861 
01862     memset(cmp, 0, sizeof(void*)*6);
01863 
01864     for(i=0; i<6; i++){
01865         switch(type&0xFF){
01866         case FF_CMP_SAD:
01867             cmp[i]= c->sad[i];
01868             break;
01869         case FF_CMP_SATD:
01870             cmp[i]= c->hadamard8_diff[i];
01871             break;
01872         case FF_CMP_SSE:
01873             cmp[i]= c->sse[i];
01874             break;
01875         case FF_CMP_DCT:
01876             cmp[i]= c->dct_sad[i];
01877             break;
01878         case FF_CMP_DCT264:
01879             cmp[i]= c->dct264_sad[i];
01880             break;
01881         case FF_CMP_DCTMAX:
01882             cmp[i]= c->dct_max[i];
01883             break;
01884         case FF_CMP_PSNR:
01885             cmp[i]= c->quant_psnr[i];
01886             break;
01887         case FF_CMP_BIT:
01888             cmp[i]= c->bit[i];
01889             break;
01890         case FF_CMP_RD:
01891             cmp[i]= c->rd[i];
01892             break;
01893         case FF_CMP_VSAD:
01894             cmp[i]= c->vsad[i];
01895             break;
01896         case FF_CMP_VSSE:
01897             cmp[i]= c->vsse[i];
01898             break;
01899         case FF_CMP_ZERO:
01900             cmp[i]= zero_cmp;
01901             break;
01902         case FF_CMP_NSSE:
01903             cmp[i]= c->nsse[i];
01904             break;
01905 #if CONFIG_DWT
01906         case FF_CMP_W53:
01907             cmp[i]= c->w53[i];
01908             break;
01909         case FF_CMP_W97:
01910             cmp[i]= c->w97[i];
01911             break;
01912 #endif
01913         default:
01914             av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01915         }
01916     }
01917 }
01918 
01919 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01920     long i;
01921     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01922         long a = *(long*)(src+i);
01923         long b = *(long*)(dst+i);
01924         *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01925     }
01926     for(; i<w; i++)
01927         dst[i+0] += src[i+0];
01928 }
01929 
01930 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01931     long i;
01932 #if !HAVE_FAST_UNALIGNED
01933     if((long)src2 & (sizeof(long)-1)){
01934         for(i=0; i+7<w; i+=8){
01935             dst[i+0] = src1[i+0]-src2[i+0];
01936             dst[i+1] = src1[i+1]-src2[i+1];
01937             dst[i+2] = src1[i+2]-src2[i+2];
01938             dst[i+3] = src1[i+3]-src2[i+3];
01939             dst[i+4] = src1[i+4]-src2[i+4];
01940             dst[i+5] = src1[i+5]-src2[i+5];
01941             dst[i+6] = src1[i+6]-src2[i+6];
01942             dst[i+7] = src1[i+7]-src2[i+7];
01943         }
01944     }else
01945 #endif
01946     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01947         long a = *(long*)(src1+i);
01948         long b = *(long*)(src2+i);
01949         *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01950     }
01951     for(; i<w; i++)
01952         dst[i+0] = src1[i+0]-src2[i+0];
01953 }
01954 
01955 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01956     int i;
01957     uint8_t l, lt;
01958 
01959     l= *left;
01960     lt= *left_top;
01961 
01962     for(i=0; i<w; i++){
01963         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01964         lt= src1[i];
01965         dst[i]= l;
01966     }
01967 
01968     *left= l;
01969     *left_top= lt;
01970 }
01971 
01972 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01973     int i;
01974     uint8_t l, lt;
01975 
01976     l= *left;
01977     lt= *left_top;
01978 
01979     for(i=0; i<w; i++){
01980         const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01981         lt= src1[i];
01982         l= src2[i];
01983         dst[i]= l - pred;
01984     }
01985 
01986     *left= l;
01987     *left_top= lt;
01988 }
01989 
01990 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01991     int i;
01992 
01993     for(i=0; i<w-1; i++){
01994         acc+= src[i];
01995         dst[i]= acc;
01996         i++;
01997         acc+= src[i];
01998         dst[i]= acc;
01999     }
02000 
02001     for(; i<w; i++){
02002         acc+= src[i];
02003         dst[i]= acc;
02004     }
02005 
02006     return acc;
02007 }
02008 
02009 #if HAVE_BIGENDIAN
02010 #define B 3
02011 #define G 2
02012 #define R 1
02013 #define A 0
02014 #else
02015 #define B 0
02016 #define G 1
02017 #define R 2
02018 #define A 3
02019 #endif
02020 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02021     int i;
02022     int r,g,b,a;
02023     r= *red;
02024     g= *green;
02025     b= *blue;
02026     a= *alpha;
02027 
02028     for(i=0; i<w; i++){
02029         b+= src[4*i+B];
02030         g+= src[4*i+G];
02031         r+= src[4*i+R];
02032         a+= src[4*i+A];
02033 
02034         dst[4*i+B]= b;
02035         dst[4*i+G]= g;
02036         dst[4*i+R]= r;
02037         dst[4*i+A]= a;
02038     }
02039 
02040     *red= r;
02041     *green= g;
02042     *blue= b;
02043     *alpha= a;
02044 }
02045 #undef B
02046 #undef G
02047 #undef R
02048 #undef A
02049 
02050 #define BUTTERFLY2(o1,o2,i1,i2) \
02051 o1= (i1)+(i2);\
02052 o2= (i1)-(i2);
02053 
02054 #define BUTTERFLY1(x,y) \
02055 {\
02056     int a,b;\
02057     a= x;\
02058     b= y;\
02059     x= a+b;\
02060     y= a-b;\
02061 }
02062 
02063 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02064 
02065 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02066     int i;
02067     int temp[64];
02068     int sum=0;
02069 
02070     assert(h==8);
02071 
02072     for(i=0; i<8; i++){
02073         //FIXME try pointer walks
02074         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02075         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02076         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02077         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02078 
02079         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02080         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02081         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02082         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02083 
02084         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02085         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02086         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02087         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02088     }
02089 
02090     for(i=0; i<8; i++){
02091         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02092         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02093         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02094         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02095 
02096         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02097         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02098         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02099         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02100 
02101         sum +=
02102              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02103             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02104             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02105             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02106     }
02107     return sum;
02108 }
02109 
02110 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02111     int i;
02112     int temp[64];
02113     int sum=0;
02114 
02115     assert(h==8);
02116 
02117     for(i=0; i<8; i++){
02118         //FIXME try pointer walks
02119         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02120         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02121         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02122         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02123 
02124         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02125         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02126         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02127         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02128 
02129         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02130         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02131         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02132         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02133     }
02134 
02135     for(i=0; i<8; i++){
02136         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02137         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02138         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02139         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02140 
02141         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02142         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02143         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02144         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02145 
02146         sum +=
02147              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02148             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02149             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02150             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02151     }
02152 
02153     sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
02154 
02155     return sum;
02156 }
02157 
02158 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02159     MpegEncContext * const s= (MpegEncContext *)c;
02160     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02161 
02162     assert(h==8);
02163 
02164     s->dsp.diff_pixels(temp, src1, src2, stride);
02165     s->dsp.fdct(temp);
02166     return s->dsp.sum_abs_dctelem(temp);
02167 }
02168 
02169 #if CONFIG_GPL
02170 #define DCT8_1D {\
02171     const int s07 = SRC(0) + SRC(7);\
02172     const int s16 = SRC(1) + SRC(6);\
02173     const int s25 = SRC(2) + SRC(5);\
02174     const int s34 = SRC(3) + SRC(4);\
02175     const int a0 = s07 + s34;\
02176     const int a1 = s16 + s25;\
02177     const int a2 = s07 - s34;\
02178     const int a3 = s16 - s25;\
02179     const int d07 = SRC(0) - SRC(7);\
02180     const int d16 = SRC(1) - SRC(6);\
02181     const int d25 = SRC(2) - SRC(5);\
02182     const int d34 = SRC(3) - SRC(4);\
02183     const int a4 = d16 + d25 + (d07 + (d07>>1));\
02184     const int a5 = d07 - d34 - (d25 + (d25>>1));\
02185     const int a6 = d07 + d34 - (d16 + (d16>>1));\
02186     const int a7 = d16 - d25 + (d34 + (d34>>1));\
02187     DST(0,  a0 + a1     ) ;\
02188     DST(1,  a4 + (a7>>2)) ;\
02189     DST(2,  a2 + (a3>>1)) ;\
02190     DST(3,  a5 + (a6>>2)) ;\
02191     DST(4,  a0 - a1     ) ;\
02192     DST(5,  a6 - (a5>>2)) ;\
02193     DST(6, (a2>>1) - a3 ) ;\
02194     DST(7, (a4>>2) - a7 ) ;\
02195 }
02196 
02197 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02198     MpegEncContext * const s= (MpegEncContext *)c;
02199     DCTELEM dct[8][8];
02200     int i;
02201     int sum=0;
02202 
02203     s->dsp.diff_pixels(dct[0], src1, src2, stride);
02204 
02205 #define SRC(x) dct[i][x]
02206 #define DST(x,v) dct[i][x]= v
02207     for( i = 0; i < 8; i++ )
02208         DCT8_1D
02209 #undef SRC
02210 #undef DST
02211 
02212 #define SRC(x) dct[x][i]
02213 #define DST(x,v) sum += FFABS(v)
02214     for( i = 0; i < 8; i++ )
02215         DCT8_1D
02216 #undef SRC
02217 #undef DST
02218     return sum;
02219 }
02220 #endif
02221 
02222 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02223     MpegEncContext * const s= (MpegEncContext *)c;
02224     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02225     int sum=0, i;
02226 
02227     assert(h==8);
02228 
02229     s->dsp.diff_pixels(temp, src1, src2, stride);
02230     s->dsp.fdct(temp);
02231 
02232     for(i=0; i<64; i++)
02233         sum= FFMAX(sum, FFABS(temp[i]));
02234 
02235     return sum;
02236 }
02237 
02238 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02239     MpegEncContext * const s= (MpegEncContext *)c;
02240     LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02241     DCTELEM * const bak = temp+64;
02242     int sum=0, i;
02243 
02244     assert(h==8);
02245     s->mb_intra=0;
02246 
02247     s->dsp.diff_pixels(temp, src1, src2, stride);
02248 
02249     memcpy(bak, temp, 64*sizeof(DCTELEM));
02250 
02251     s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02252     s->dct_unquantize_inter(s, temp, 0, s->qscale);
02253     ff_simple_idct_8(temp); //FIXME
02254 
02255     for(i=0; i<64; i++)
02256         sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02257 
02258     return sum;
02259 }
02260 
02261 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02262     MpegEncContext * const s= (MpegEncContext *)c;
02263     const uint8_t *scantable= s->intra_scantable.permutated;
02264     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02265     LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02266     LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02267     int i, last, run, bits, level, distortion, start_i;
02268     const int esc_length= s->ac_esc_length;
02269     uint8_t * length;
02270     uint8_t * last_length;
02271 
02272     assert(h==8);
02273 
02274     copy_block8(lsrc1, src1, 8, stride, 8);
02275     copy_block8(lsrc2, src2, 8, stride, 8);
02276 
02277     s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02278 
02279     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02280 
02281     bits=0;
02282 
02283     if (s->mb_intra) {
02284         start_i = 1;
02285         length     = s->intra_ac_vlc_length;
02286         last_length= s->intra_ac_vlc_last_length;
02287         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02288     } else {
02289         start_i = 0;
02290         length     = s->inter_ac_vlc_length;
02291         last_length= s->inter_ac_vlc_last_length;
02292     }
02293 
02294     if(last>=start_i){
02295         run=0;
02296         for(i=start_i; i<last; i++){
02297             int j= scantable[i];
02298             level= temp[j];
02299 
02300             if(level){
02301                 level+=64;
02302                 if((level&(~127)) == 0){
02303                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02304                 }else
02305                     bits+= esc_length;
02306                 run=0;
02307             }else
02308                 run++;
02309         }
02310         i= scantable[last];
02311 
02312         level= temp[i] + 64;
02313 
02314         assert(level - 64);
02315 
02316         if((level&(~127)) == 0){
02317             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02318         }else
02319             bits+= esc_length;
02320 
02321     }
02322 
02323     if(last>=0){
02324         if(s->mb_intra)
02325             s->dct_unquantize_intra(s, temp, 0, s->qscale);
02326         else
02327             s->dct_unquantize_inter(s, temp, 0, s->qscale);
02328     }
02329 
02330     s->dsp.idct_add(lsrc2, 8, temp);
02331 
02332     distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02333 
02334     return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02335 }
02336 
02337 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02338     MpegEncContext * const s= (MpegEncContext *)c;
02339     const uint8_t *scantable= s->intra_scantable.permutated;
02340     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02341     int i, last, run, bits, level, start_i;
02342     const int esc_length= s->ac_esc_length;
02343     uint8_t * length;
02344     uint8_t * last_length;
02345 
02346     assert(h==8);
02347 
02348     s->dsp.diff_pixels(temp, src1, src2, stride);
02349 
02350     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02351 
02352     bits=0;
02353 
02354     if (s->mb_intra) {
02355         start_i = 1;
02356         length     = s->intra_ac_vlc_length;
02357         last_length= s->intra_ac_vlc_last_length;
02358         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02359     } else {
02360         start_i = 0;
02361         length     = s->inter_ac_vlc_length;
02362         last_length= s->inter_ac_vlc_last_length;
02363     }
02364 
02365     if(last>=start_i){
02366         run=0;
02367         for(i=start_i; i<last; i++){
02368             int j= scantable[i];
02369             level= temp[j];
02370 
02371             if(level){
02372                 level+=64;
02373                 if((level&(~127)) == 0){
02374                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02375                 }else
02376                     bits+= esc_length;
02377                 run=0;
02378             }else
02379                 run++;
02380         }
02381         i= scantable[last];
02382 
02383         level= temp[i] + 64;
02384 
02385         assert(level - 64);
02386 
02387         if((level&(~127)) == 0){
02388             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02389         }else
02390             bits+= esc_length;
02391     }
02392 
02393     return bits;
02394 }
02395 
02396 #define VSAD_INTRA(size) \
02397 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02398     int score=0;                                                                                            \
02399     int x,y;                                                                                                \
02400                                                                                                             \
02401     for(y=1; y<h; y++){                                                                                     \
02402         for(x=0; x<size; x+=4){                                                                             \
02403             score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
02404                    +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
02405         }                                                                                                   \
02406         s+= stride;                                                                                         \
02407     }                                                                                                       \
02408                                                                                                             \
02409     return score;                                                                                           \
02410 }
02411 VSAD_INTRA(8)
02412 VSAD_INTRA(16)
02413 
02414 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02415     int score=0;
02416     int x,y;
02417 
02418     for(y=1; y<h; y++){
02419         for(x=0; x<16; x++){
02420             score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02421         }
02422         s1+= stride;
02423         s2+= stride;
02424     }
02425 
02426     return score;
02427 }
02428 
02429 #define SQ(a) ((a)*(a))
02430 #define VSSE_INTRA(size) \
02431 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02432     int score=0;                                                                                            \
02433     int x,y;                                                                                                \
02434                                                                                                             \
02435     for(y=1; y<h; y++){                                                                                     \
02436         for(x=0; x<size; x+=4){                                                                               \
02437             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
02438                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
02439         }                                                                                                   \
02440         s+= stride;                                                                                         \
02441     }                                                                                                       \
02442                                                                                                             \
02443     return score;                                                                                           \
02444 }
02445 VSSE_INTRA(8)
02446 VSSE_INTRA(16)
02447 
02448 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02449     int score=0;
02450     int x,y;
02451 
02452     for(y=1; y<h; y++){
02453         for(x=0; x<16; x++){
02454             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02455         }
02456         s1+= stride;
02457         s2+= stride;
02458     }
02459 
02460     return score;
02461 }
02462 
02463 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02464                                int size){
02465     int score=0;
02466     int i;
02467     for(i=0; i<size; i++)
02468         score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02469     return score;
02470 }
02471 
02472 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02473 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02474 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02475 #if CONFIG_GPL
02476 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02477 #endif
02478 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02479 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02480 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02481 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02482 
02483 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02484     int i;
02485     for(i=0; i<len; i++)
02486         dst[i] = src0[i] * src1[i];
02487 }
02488 
02489 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02490     int i;
02491     src1 += len-1;
02492     for(i=0; i<len; i++)
02493         dst[i] = src0[i] * src1[-i];
02494 }
02495 
02496 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02497     int i;
02498     for(i=0; i<len; i++)
02499         dst[i] = src0[i] * src1[i] + src2[i];
02500 }
02501 
02502 static void vector_fmul_window_c(float *dst, const float *src0,
02503                                  const float *src1, const float *win, int len)
02504 {
02505     int i,j;
02506     dst += len;
02507     win += len;
02508     src0+= len;
02509     for(i=-len, j=len-1; i<0; i++, j--) {
02510         float s0 = src0[i];
02511         float s1 = src1[j];
02512         float wi = win[i];
02513         float wj = win[j];
02514         dst[i] = s0*wj - s1*wi;
02515         dst[j] = s0*wi + s1*wj;
02516     }
02517 }
02518 
02519 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02520                                  int len)
02521 {
02522     int i;
02523     for (i = 0; i < len; i++)
02524         dst[i] = src[i] * mul;
02525 }
02526 
02527 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
02528                                  int len)
02529 {
02530     int i;
02531     for (i = 0; i < len; i++)
02532         dst[i] += src[i] * mul;
02533 }
02534 
02535 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02536                                 int len)
02537 {
02538     int i;
02539     for (i = 0; i < len; i++) {
02540         float t = v1[i] - v2[i];
02541         v1[i] += v2[i];
02542         v2[i] = t;
02543     }
02544 }
02545 
02546 static void butterflies_float_interleave_c(float *dst, const float *src0,
02547                                            const float *src1, int len)
02548 {
02549     int i;
02550     for (i = 0; i < len; i++) {
02551         float f1 = src0[i];
02552         float f2 = src1[i];
02553         dst[2*i    ] = f1 + f2;
02554         dst[2*i + 1] = f1 - f2;
02555     }
02556 }
02557 
02558 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02559 {
02560     float p = 0.0;
02561     int i;
02562 
02563     for (i = 0; i < len; i++)
02564         p += v1[i] * v2[i];
02565 
02566     return p;
02567 }
02568 
02569 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02570                    uint32_t maxi, uint32_t maxisign)
02571 {
02572 
02573     if(a > mini) return mini;
02574     else if((a^(1U<<31)) > maxisign) return maxi;
02575     else return a;
02576 }
02577 
02578 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02579     int i;
02580     uint32_t mini = *(uint32_t*)min;
02581     uint32_t maxi = *(uint32_t*)max;
02582     uint32_t maxisign = maxi ^ (1U<<31);
02583     uint32_t *dsti = (uint32_t*)dst;
02584     const uint32_t *srci = (const uint32_t*)src;
02585     for(i=0; i<len; i+=8) {
02586         dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02587         dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02588         dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02589         dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02590         dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02591         dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02592         dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02593         dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02594     }
02595 }
02596 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02597     int i;
02598     if(min < 0 && max > 0) {
02599         vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02600     } else {
02601         for(i=0; i < len; i+=8) {
02602             dst[i    ] = av_clipf(src[i    ], min, max);
02603             dst[i + 1] = av_clipf(src[i + 1], min, max);
02604             dst[i + 2] = av_clipf(src[i + 2], min, max);
02605             dst[i + 3] = av_clipf(src[i + 3], min, max);
02606             dst[i + 4] = av_clipf(src[i + 4], min, max);
02607             dst[i + 5] = av_clipf(src[i + 5], min, max);
02608             dst[i + 6] = av_clipf(src[i + 6], min, max);
02609             dst[i + 7] = av_clipf(src[i + 7], min, max);
02610         }
02611     }
02612 }
02613 
02614 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
02615 {
02616     int res = 0;
02617 
02618     while (order--)
02619         res += (*v1++ * *v2++) >> shift;
02620 
02621     return res;
02622 }
02623 
02624 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02625 {
02626     int res = 0;
02627     while (order--) {
02628         res   += *v1 * *v2++;
02629         *v1++ += mul * *v3++;
02630     }
02631     return res;
02632 }
02633 
02634 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02635                                  const int16_t *window, unsigned int len)
02636 {
02637     int i;
02638     int len2 = len >> 1;
02639 
02640     for (i = 0; i < len2; i++) {
02641         int16_t w       = window[i];
02642         output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
02643         output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02644     }
02645 }
02646 
02647 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02648                                 int32_t max, unsigned int len)
02649 {
02650     do {
02651         *dst++ = av_clip(*src++, min, max);
02652         *dst++ = av_clip(*src++, min, max);
02653         *dst++ = av_clip(*src++, min, max);
02654         *dst++ = av_clip(*src++, min, max);
02655         *dst++ = av_clip(*src++, min, max);
02656         *dst++ = av_clip(*src++, min, max);
02657         *dst++ = av_clip(*src++, min, max);
02658         *dst++ = av_clip(*src++, min, max);
02659         len -= 8;
02660     } while (len > 0);
02661 }
02662 
02663 #define W0 2048
02664 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
02665 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
02666 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
02667 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
02668 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
02669 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
02670 #define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
02671 
02672 static void wmv2_idct_row(short * b)
02673 {
02674     int s1,s2;
02675     int a0,a1,a2,a3,a4,a5,a6,a7;
02676     /*step 1*/
02677     a1 = W1*b[1]+W7*b[7];
02678     a7 = W7*b[1]-W1*b[7];
02679     a5 = W5*b[5]+W3*b[3];
02680     a3 = W3*b[5]-W5*b[3];
02681     a2 = W2*b[2]+W6*b[6];
02682     a6 = W6*b[2]-W2*b[6];
02683     a0 = W0*b[0]+W0*b[4];
02684     a4 = W0*b[0]-W0*b[4];
02685     /*step 2*/
02686     s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
02687     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02688     /*step 3*/
02689     b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02690     b[1] = (a4+a6 +s1   + (1<<7))>>8;
02691     b[2] = (a4-a6 +s2   + (1<<7))>>8;
02692     b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02693     b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02694     b[5] = (a4-a6 -s2   + (1<<7))>>8;
02695     b[6] = (a4+a6 -s1   + (1<<7))>>8;
02696     b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02697 }
02698 static void wmv2_idct_col(short * b)
02699 {
02700     int s1,s2;
02701     int a0,a1,a2,a3,a4,a5,a6,a7;
02702     /*step 1, with extended precision*/
02703     a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02704     a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02705     a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02706     a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02707     a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02708     a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02709     a0 = (W0*b[8*0]+W0*b[8*4]    )>>3;
02710     a4 = (W0*b[8*0]-W0*b[8*4]    )>>3;
02711     /*step 2*/
02712     s1 = (181*(a1-a5+a7-a3)+128)>>8;
02713     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02714     /*step 3*/
02715     b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02716     b[8*1] = (a4+a6 +s1   + (1<<13))>>14;
02717     b[8*2] = (a4-a6 +s2   + (1<<13))>>14;
02718     b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02719 
02720     b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02721     b[8*5] = (a4-a6 -s2   + (1<<13))>>14;
02722     b[8*6] = (a4+a6 -s1   + (1<<13))>>14;
02723     b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02724 }
02725 void ff_wmv2_idct_c(short * block){
02726     int i;
02727 
02728     for(i=0;i<64;i+=8){
02729         wmv2_idct_row(block+i);
02730     }
02731     for(i=0;i<8;i++){
02732         wmv2_idct_col(block+i);
02733     }
02734 }
02735 /* XXX: those functions should be suppressed ASAP when all IDCTs are
02736  converted */
02737 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02738 {
02739     ff_wmv2_idct_c(block);
02740     ff_put_pixels_clamped_c(block, dest, line_size);
02741 }
02742 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02743 {
02744     ff_wmv2_idct_c(block);
02745     ff_add_pixels_clamped_c(block, dest, line_size);
02746 }
02747 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02748 {
02749     j_rev_dct (block);
02750     ff_put_pixels_clamped_c(block, dest, line_size);
02751 }
02752 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02753 {
02754     j_rev_dct (block);
02755     ff_add_pixels_clamped_c(block, dest, line_size);
02756 }
02757 
02758 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02759 {
02760     j_rev_dct4 (block);
02761     put_pixels_clamped4_c(block, dest, line_size);
02762 }
02763 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02764 {
02765     j_rev_dct4 (block);
02766     add_pixels_clamped4_c(block, dest, line_size);
02767 }
02768 
02769 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02770 {
02771     j_rev_dct2 (block);
02772     put_pixels_clamped2_c(block, dest, line_size);
02773 }
02774 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02775 {
02776     j_rev_dct2 (block);
02777     add_pixels_clamped2_c(block, dest, line_size);
02778 }
02779 
02780 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02781 {
02782     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02783 
02784     dest[0] = cm[(block[0] + 4)>>3];
02785 }
02786 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02787 {
02788     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02789 
02790     dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
02791 }
02792 
02793 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02794 
02795 /* init static data */
02796 av_cold void dsputil_static_init(void)
02797 {
02798     int i;
02799 
02800     for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02801     for(i=0;i<MAX_NEG_CROP;i++) {
02802         ff_cropTbl[i] = 0;
02803         ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02804     }
02805 
02806     for(i=0;i<512;i++) {
02807         ff_squareTbl[i] = (i - 256) * (i - 256);
02808     }
02809 
02810     for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02811 }
02812 
02813 int ff_check_alignment(void){
02814     static int did_fail=0;
02815     LOCAL_ALIGNED_16(int, aligned, [4]);
02816 
02817     if((intptr_t)aligned & 15){
02818         if(!did_fail){
02819 #if HAVE_MMX || HAVE_ALTIVEC
02820             av_log(NULL, AV_LOG_ERROR,
02821                 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02822                 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02823                 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02824                 "Do not report crashes to FFmpeg developers.\n");
02825 #endif
02826             did_fail=1;
02827         }
02828         return -1;
02829     }
02830     return 0;
02831 }
02832 
02833 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
02834 {
02835     int i;
02836 
02837     ff_check_alignment();
02838 
02839 #if CONFIG_ENCODERS
02840     if (avctx->bits_per_raw_sample == 10) {
02841         c->fdct    = ff_jpeg_fdct_islow_10;
02842         c->fdct248 = ff_fdct248_islow_10;
02843     } else {
02844         if(avctx->dct_algo==FF_DCT_FASTINT) {
02845             c->fdct    = fdct_ifast;
02846             c->fdct248 = fdct_ifast248;
02847         }
02848         else if(avctx->dct_algo==FF_DCT_FAAN) {
02849             c->fdct    = ff_faandct;
02850             c->fdct248 = ff_faandct248;
02851         }
02852         else {
02853             c->fdct    = ff_jpeg_fdct_islow_8; //slow/accurate/default
02854             c->fdct248 = ff_fdct248_islow_8;
02855         }
02856     }
02857 #endif //CONFIG_ENCODERS
02858 
02859     if(avctx->lowres==1){
02860         c->idct_put= ff_jref_idct4_put;
02861         c->idct_add= ff_jref_idct4_add;
02862         c->idct    = j_rev_dct4;
02863         c->idct_permutation_type= FF_NO_IDCT_PERM;
02864     }else if(avctx->lowres==2){
02865         c->idct_put= ff_jref_idct2_put;
02866         c->idct_add= ff_jref_idct2_add;
02867         c->idct    = j_rev_dct2;
02868         c->idct_permutation_type= FF_NO_IDCT_PERM;
02869     }else if(avctx->lowres==3){
02870         c->idct_put= ff_jref_idct1_put;
02871         c->idct_add= ff_jref_idct1_add;
02872         c->idct    = j_rev_dct1;
02873         c->idct_permutation_type= FF_NO_IDCT_PERM;
02874     }else{
02875         if (avctx->bits_per_raw_sample == 10) {
02876             c->idct_put              = ff_simple_idct_put_10;
02877             c->idct_add              = ff_simple_idct_add_10;
02878             c->idct                  = ff_simple_idct_10;
02879             c->idct_permutation_type = FF_NO_IDCT_PERM;
02880         } else {
02881         if(avctx->idct_algo==FF_IDCT_INT){
02882             c->idct_put= ff_jref_idct_put;
02883             c->idct_add= ff_jref_idct_add;
02884             c->idct    = j_rev_dct;
02885             c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02886         }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02887                 avctx->idct_algo==FF_IDCT_VP3){
02888             c->idct_put= ff_vp3_idct_put_c;
02889             c->idct_add= ff_vp3_idct_add_c;
02890             c->idct    = ff_vp3_idct_c;
02891             c->idct_permutation_type= FF_NO_IDCT_PERM;
02892         }else if(avctx->idct_algo==FF_IDCT_WMV2){
02893             c->idct_put= ff_wmv2_idct_put_c;
02894             c->idct_add= ff_wmv2_idct_add_c;
02895             c->idct    = ff_wmv2_idct_c;
02896             c->idct_permutation_type= FF_NO_IDCT_PERM;
02897         }else if(avctx->idct_algo==FF_IDCT_FAAN){
02898             c->idct_put= ff_faanidct_put;
02899             c->idct_add= ff_faanidct_add;
02900             c->idct    = ff_faanidct;
02901             c->idct_permutation_type= FF_NO_IDCT_PERM;
02902         }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02903             c->idct_put= ff_ea_idct_put_c;
02904             c->idct_permutation_type= FF_NO_IDCT_PERM;
02905         }else{ //accurate/default
02906             c->idct_put = ff_simple_idct_put_8;
02907             c->idct_add = ff_simple_idct_add_8;
02908             c->idct     = ff_simple_idct_8;
02909             c->idct_permutation_type= FF_NO_IDCT_PERM;
02910         }
02911         }
02912     }
02913 
02914     c->diff_pixels = diff_pixels_c;
02915     c->put_pixels_clamped = ff_put_pixels_clamped_c;
02916     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02917     c->add_pixels_clamped = ff_add_pixels_clamped_c;
02918     c->sum_abs_dctelem = sum_abs_dctelem_c;
02919     c->gmc1 = gmc1_c;
02920     c->gmc = ff_gmc_c;
02921     c->pix_sum = pix_sum_c;
02922     c->pix_norm1 = pix_norm1_c;
02923 
02924     c->fill_block_tab[0] = fill_block16_c;
02925     c->fill_block_tab[1] = fill_block8_c;
02926 
02927     /* TODO [0] 16  [1] 8 */
02928     c->pix_abs[0][0] = pix_abs16_c;
02929     c->pix_abs[0][1] = pix_abs16_x2_c;
02930     c->pix_abs[0][2] = pix_abs16_y2_c;
02931     c->pix_abs[0][3] = pix_abs16_xy2_c;
02932     c->pix_abs[1][0] = pix_abs8_c;
02933     c->pix_abs[1][1] = pix_abs8_x2_c;
02934     c->pix_abs[1][2] = pix_abs8_y2_c;
02935     c->pix_abs[1][3] = pix_abs8_xy2_c;
02936 
02937     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02938     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02939     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02940     c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02941     c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02942     c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02943     c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02944     c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02945     c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02946 
02947     c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02948     c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02949     c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02950     c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02951     c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02952     c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02953     c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02954     c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02955     c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02956 
02957 #define dspfunc(PFX, IDX, NUM) \
02958     c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02959     c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02960     c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02961     c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02962     c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02963     c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02964     c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02965     c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02966     c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02967     c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02968     c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02969     c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02970     c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02971     c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02972     c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02973     c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02974 
02975     dspfunc(put_qpel, 0, 16);
02976     dspfunc(put_no_rnd_qpel, 0, 16);
02977 
02978     dspfunc(avg_qpel, 0, 16);
02979     /* dspfunc(avg_no_rnd_qpel, 0, 16); */
02980 
02981     dspfunc(put_qpel, 1, 8);
02982     dspfunc(put_no_rnd_qpel, 1, 8);
02983 
02984     dspfunc(avg_qpel, 1, 8);
02985     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
02986 
02987 #undef dspfunc
02988 
02989 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02990     ff_mlp_init(c, avctx);
02991 #endif
02992 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
02993     ff_intrax8dsp_init(c,avctx);
02994 #endif
02995 
02996     c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02997     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02998     c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02999     c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
03000     c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
03001     c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
03002     c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
03003     c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
03004 
03005 #define SET_CMP_FUNC(name) \
03006     c->name[0]= name ## 16_c;\
03007     c->name[1]= name ## 8x8_c;
03008 
03009     SET_CMP_FUNC(hadamard8_diff)
03010     c->hadamard8_diff[4]= hadamard8_intra16_c;
03011     c->hadamard8_diff[5]= hadamard8_intra8x8_c;
03012     SET_CMP_FUNC(dct_sad)
03013     SET_CMP_FUNC(dct_max)
03014 #if CONFIG_GPL
03015     SET_CMP_FUNC(dct264_sad)
03016 #endif
03017     c->sad[0]= pix_abs16_c;
03018     c->sad[1]= pix_abs8_c;
03019     c->sse[0]= sse16_c;
03020     c->sse[1]= sse8_c;
03021     c->sse[2]= sse4_c;
03022     SET_CMP_FUNC(quant_psnr)
03023     SET_CMP_FUNC(rd)
03024     SET_CMP_FUNC(bit)
03025     c->vsad[0]= vsad16_c;
03026     c->vsad[4]= vsad_intra16_c;
03027     c->vsad[5]= vsad_intra8_c;
03028     c->vsse[0]= vsse16_c;
03029     c->vsse[4]= vsse_intra16_c;
03030     c->vsse[5]= vsse_intra8_c;
03031     c->nsse[0]= nsse16_c;
03032     c->nsse[1]= nsse8_c;
03033 #if CONFIG_DWT
03034     ff_dsputil_init_dwt(c);
03035 #endif
03036 
03037     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03038 
03039     c->add_bytes= add_bytes_c;
03040     c->diff_bytes= diff_bytes_c;
03041     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03042     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03043     c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
03044     c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03045     c->bswap_buf= bswap_buf;
03046     c->bswap16_buf = bswap16_buf;
03047 
03048     if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03049         c->h263_h_loop_filter= h263_h_loop_filter_c;
03050         c->h263_v_loop_filter= h263_v_loop_filter_c;
03051     }
03052 
03053     if (CONFIG_VP3_DECODER) {
03054         c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03055         c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03056         c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03057     }
03058 
03059     c->h261_loop_filter= h261_loop_filter_c;
03060 
03061     c->try_8x8basis= try_8x8basis_c;
03062     c->add_8x8basis= add_8x8basis_c;
03063 
03064 #if CONFIG_VORBIS_DECODER
03065     c->vorbis_inverse_coupling = vorbis_inverse_coupling;
03066 #endif
03067 #if CONFIG_AC3_DECODER
03068     c->ac3_downmix = ff_ac3_downmix_c;
03069 #endif
03070     c->vector_fmul = vector_fmul_c;
03071     c->vector_fmul_reverse = vector_fmul_reverse_c;
03072     c->vector_fmul_add = vector_fmul_add_c;
03073     c->vector_fmul_window = vector_fmul_window_c;
03074     c->vector_clipf = vector_clipf_c;
03075     c->scalarproduct_int16 = scalarproduct_int16_c;
03076     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03077     c->apply_window_int16 = apply_window_int16_c;
03078     c->vector_clip_int32 = vector_clip_int32_c;
03079     c->scalarproduct_float = scalarproduct_float_c;
03080     c->butterflies_float = butterflies_float_c;
03081     c->butterflies_float_interleave = butterflies_float_interleave_c;
03082     c->vector_fmul_scalar = vector_fmul_scalar_c;
03083     c->vector_fmac_scalar = vector_fmac_scalar_c;
03084 
03085     c->shrink[0]= av_image_copy_plane;
03086     c->shrink[1]= ff_shrink22;
03087     c->shrink[2]= ff_shrink44;
03088     c->shrink[3]= ff_shrink88;
03089 
03090     c->prefetch= just_return;
03091 
03092     memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03093     memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03094 
03095 #undef FUNC
03096 #undef FUNCC
03097 #define FUNC(f, depth) f ## _ ## depth
03098 #define FUNCC(f, depth) f ## _ ## depth ## _c
03099 
03100 #define dspfunc1(PFX, IDX, NUM, depth)\
03101     c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
03102     c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03103     c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03104     c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03105 
03106 #define dspfunc2(PFX, IDX, NUM, depth)\
03107     c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03108     c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03109     c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03110     c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03111     c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03112     c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03113     c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03114     c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03115     c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03116     c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03117     c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03118     c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03119     c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03120     c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03121     c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03122     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03123 
03124 
03125 #define BIT_DEPTH_FUNCS(depth, dct)\
03126     c->get_pixels                    = FUNCC(get_pixels   ## dct   , depth);\
03127     c->draw_edges                    = FUNCC(draw_edges            , depth);\
03128     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
03129     c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
03130     c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
03131     c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
03132     c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
03133     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
03134     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03135 \
03136     c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
03137     c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
03138     c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
03139     c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
03140     c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
03141     c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
03142 \
03143     dspfunc1(put       , 0, 16, depth);\
03144     dspfunc1(put       , 1,  8, depth);\
03145     dspfunc1(put       , 2,  4, depth);\
03146     dspfunc1(put       , 3,  2, depth);\
03147     dspfunc1(put_no_rnd, 0, 16, depth);\
03148     dspfunc1(put_no_rnd, 1,  8, depth);\
03149     dspfunc1(avg       , 0, 16, depth);\
03150     dspfunc1(avg       , 1,  8, depth);\
03151     dspfunc1(avg       , 2,  4, depth);\
03152     dspfunc1(avg       , 3,  2, depth);\
03153     dspfunc1(avg_no_rnd, 0, 16, depth);\
03154     dspfunc1(avg_no_rnd, 1,  8, depth);\
03155 \
03156     dspfunc2(put_h264_qpel, 0, 16, depth);\
03157     dspfunc2(put_h264_qpel, 1,  8, depth);\
03158     dspfunc2(put_h264_qpel, 2,  4, depth);\
03159     dspfunc2(put_h264_qpel, 3,  2, depth);\
03160     dspfunc2(avg_h264_qpel, 0, 16, depth);\
03161     dspfunc2(avg_h264_qpel, 1,  8, depth);\
03162     dspfunc2(avg_h264_qpel, 2,  4, depth);
03163 
03164     switch (avctx->bits_per_raw_sample) {
03165     case 9:
03166         if (c->dct_bits == 32) {
03167             BIT_DEPTH_FUNCS(9, _32);
03168         } else {
03169             BIT_DEPTH_FUNCS(9, _16);
03170         }
03171         break;
03172     case 10:
03173         if (c->dct_bits == 32) {
03174             BIT_DEPTH_FUNCS(10, _32);
03175         } else {
03176             BIT_DEPTH_FUNCS(10, _16);
03177         }
03178         break;
03179     default:
03180         av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
03181     case 8:
03182         BIT_DEPTH_FUNCS(8, _16);
03183         break;
03184     }
03185 
03186 
03187     if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
03188     if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
03189     if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
03190     if (HAVE_VIS)        dsputil_init_vis   (c, avctx);
03191     if (ARCH_ALPHA)      dsputil_init_alpha (c, avctx);
03192     if (ARCH_PPC)        dsputil_init_ppc   (c, avctx);
03193     if (HAVE_MMI)        dsputil_init_mmi   (c, avctx);
03194     if (ARCH_SH4)        dsputil_init_sh4   (c, avctx);
03195     if (ARCH_BFIN)       dsputil_init_bfin  (c, avctx);
03196 
03197     for(i=0; i<64; i++){
03198         if(!c->put_2tap_qpel_pixels_tab[0][i])
03199             c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
03200         if(!c->avg_2tap_qpel_pixels_tab[0][i])
03201             c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
03202     }
03203 
03204     ff_init_scantable_permutation(c->idct_permutation,
03205                                   c->idct_permutation_type);
03206 }