00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "diracdsp.h"
00042
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053
00054 #define BIT_DEPTH 8
00055 #include "dsputil_template.c"
00056
00057
00058 #define pb_7f (~0UL/255 * 0x7f)
00059 #define pb_80 (~0UL/255 * 0x80)
00060
00061 const uint8_t ff_zigzag_direct[64] = {
00062 0, 1, 8, 16, 9, 2, 3, 10,
00063 17, 24, 32, 25, 18, 11, 4, 5,
00064 12, 19, 26, 33, 40, 48, 41, 34,
00065 27, 20, 13, 6, 7, 14, 21, 28,
00066 35, 42, 49, 56, 57, 50, 43, 36,
00067 29, 22, 15, 23, 30, 37, 44, 51,
00068 58, 59, 52, 45, 38, 31, 39, 46,
00069 53, 60, 61, 54, 47, 55, 62, 63
00070 };
00071
00072
00073
00074 const uint8_t ff_zigzag248_direct[64] = {
00075 0, 8, 1, 9, 16, 24, 2, 10,
00076 17, 25, 32, 40, 48, 56, 33, 41,
00077 18, 26, 3, 11, 4, 12, 19, 27,
00078 34, 42, 49, 57, 50, 58, 35, 43,
00079 20, 28, 5, 13, 6, 14, 21, 29,
00080 36, 44, 51, 59, 52, 60, 37, 45,
00081 22, 30, 7, 15, 23, 31, 38, 46,
00082 53, 61, 54, 62, 39, 47, 55, 63,
00083 };
00084
00085
00086 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00087
00088 const uint8_t ff_alternate_horizontal_scan[64] = {
00089 0, 1, 2, 3, 8, 9, 16, 17,
00090 10, 11, 4, 5, 6, 7, 15, 14,
00091 13, 12, 19, 18, 24, 25, 32, 33,
00092 26, 27, 20, 21, 22, 23, 28, 29,
00093 30, 31, 34, 35, 40, 41, 48, 49,
00094 42, 43, 36, 37, 38, 39, 44, 45,
00095 46, 47, 50, 51, 56, 57, 58, 59,
00096 52, 53, 54, 55, 60, 61, 62, 63,
00097 };
00098
00099 const uint8_t ff_alternate_vertical_scan[64] = {
00100 0, 8, 16, 24, 1, 9, 2, 10,
00101 17, 25, 32, 40, 48, 56, 57, 49,
00102 41, 33, 26, 18, 3, 11, 4, 12,
00103 19, 27, 34, 42, 50, 58, 35, 43,
00104 51, 59, 20, 28, 5, 13, 6, 14,
00105 21, 29, 36, 44, 52, 60, 37, 45,
00106 53, 61, 22, 30, 7, 15, 23, 31,
00107 38, 46, 54, 62, 39, 47, 55, 63,
00108 };
00109
00110
00111 static const uint8_t simple_mmx_permutation[64]={
00112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00120 };
00121
00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00123
00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00125 int i;
00126 int end;
00127
00128 st->scantable= src_scantable;
00129
00130 for(i=0; i<64; i++){
00131 int j;
00132 j = src_scantable[i];
00133 st->permutated[i] = permutation[j];
00134 #if ARCH_PPC
00135 st->inverse[j] = i;
00136 #endif
00137 }
00138
00139 end=-1;
00140 for(i=0; i<64; i++){
00141 int j;
00142 j = st->permutated[i];
00143 if(j>end) end=j;
00144 st->raster_end[i]= end;
00145 }
00146 }
00147
00148 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00149 int idct_permutation_type)
00150 {
00151 int i;
00152
00153 switch(idct_permutation_type){
00154 case FF_NO_IDCT_PERM:
00155 for(i=0; i<64; i++)
00156 idct_permutation[i]= i;
00157 break;
00158 case FF_LIBMPEG2_IDCT_PERM:
00159 for(i=0; i<64; i++)
00160 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00161 break;
00162 case FF_SIMPLE_IDCT_PERM:
00163 for(i=0; i<64; i++)
00164 idct_permutation[i]= simple_mmx_permutation[i];
00165 break;
00166 case FF_TRANSPOSE_IDCT_PERM:
00167 for(i=0; i<64; i++)
00168 idct_permutation[i]= ((i&7)<<3) | (i>>3);
00169 break;
00170 case FF_PARTTRANS_IDCT_PERM:
00171 for(i=0; i<64; i++)
00172 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00173 break;
00174 case FF_SSE2_IDCT_PERM:
00175 for(i=0; i<64; i++)
00176 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00177 break;
00178 default:
00179 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00180 }
00181 }
00182
00183 static int pix_sum_c(uint8_t * pix, int line_size)
00184 {
00185 int s, i, j;
00186
00187 s = 0;
00188 for (i = 0; i < 16; i++) {
00189 for (j = 0; j < 16; j += 8) {
00190 s += pix[0];
00191 s += pix[1];
00192 s += pix[2];
00193 s += pix[3];
00194 s += pix[4];
00195 s += pix[5];
00196 s += pix[6];
00197 s += pix[7];
00198 pix += 8;
00199 }
00200 pix += line_size - 16;
00201 }
00202 return s;
00203 }
00204
00205 static int pix_norm1_c(uint8_t * pix, int line_size)
00206 {
00207 int s, i, j;
00208 uint32_t *sq = ff_squareTbl + 256;
00209
00210 s = 0;
00211 for (i = 0; i < 16; i++) {
00212 for (j = 0; j < 16; j += 8) {
00213 #if 0
00214 s += sq[pix[0]];
00215 s += sq[pix[1]];
00216 s += sq[pix[2]];
00217 s += sq[pix[3]];
00218 s += sq[pix[4]];
00219 s += sq[pix[5]];
00220 s += sq[pix[6]];
00221 s += sq[pix[7]];
00222 #else
00223 #if HAVE_FAST_64BIT
00224 register uint64_t x=*(uint64_t*)pix;
00225 s += sq[x&0xff];
00226 s += sq[(x>>8)&0xff];
00227 s += sq[(x>>16)&0xff];
00228 s += sq[(x>>24)&0xff];
00229 s += sq[(x>>32)&0xff];
00230 s += sq[(x>>40)&0xff];
00231 s += sq[(x>>48)&0xff];
00232 s += sq[(x>>56)&0xff];
00233 #else
00234 register uint32_t x=*(uint32_t*)pix;
00235 s += sq[x&0xff];
00236 s += sq[(x>>8)&0xff];
00237 s += sq[(x>>16)&0xff];
00238 s += sq[(x>>24)&0xff];
00239 x=*(uint32_t*)(pix+4);
00240 s += sq[x&0xff];
00241 s += sq[(x>>8)&0xff];
00242 s += sq[(x>>16)&0xff];
00243 s += sq[(x>>24)&0xff];
00244 #endif
00245 #endif
00246 pix += 8;
00247 }
00248 pix += line_size - 16;
00249 }
00250 return s;
00251 }
00252
00253 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00254 int i;
00255
00256 for(i=0; i+8<=w; i+=8){
00257 dst[i+0]= av_bswap32(src[i+0]);
00258 dst[i+1]= av_bswap32(src[i+1]);
00259 dst[i+2]= av_bswap32(src[i+2]);
00260 dst[i+3]= av_bswap32(src[i+3]);
00261 dst[i+4]= av_bswap32(src[i+4]);
00262 dst[i+5]= av_bswap32(src[i+5]);
00263 dst[i+6]= av_bswap32(src[i+6]);
00264 dst[i+7]= av_bswap32(src[i+7]);
00265 }
00266 for(;i<w; i++){
00267 dst[i+0]= av_bswap32(src[i+0]);
00268 }
00269 }
00270
00271 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00272 {
00273 while (len--)
00274 *dst++ = av_bswap16(*src++);
00275 }
00276
00277 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00278 {
00279 int s, i;
00280 uint32_t *sq = ff_squareTbl + 256;
00281
00282 s = 0;
00283 for (i = 0; i < h; i++) {
00284 s += sq[pix1[0] - pix2[0]];
00285 s += sq[pix1[1] - pix2[1]];
00286 s += sq[pix1[2] - pix2[2]];
00287 s += sq[pix1[3] - pix2[3]];
00288 pix1 += line_size;
00289 pix2 += line_size;
00290 }
00291 return s;
00292 }
00293
00294 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00295 {
00296 int s, i;
00297 uint32_t *sq = ff_squareTbl + 256;
00298
00299 s = 0;
00300 for (i = 0; i < h; i++) {
00301 s += sq[pix1[0] - pix2[0]];
00302 s += sq[pix1[1] - pix2[1]];
00303 s += sq[pix1[2] - pix2[2]];
00304 s += sq[pix1[3] - pix2[3]];
00305 s += sq[pix1[4] - pix2[4]];
00306 s += sq[pix1[5] - pix2[5]];
00307 s += sq[pix1[6] - pix2[6]];
00308 s += sq[pix1[7] - pix2[7]];
00309 pix1 += line_size;
00310 pix2 += line_size;
00311 }
00312 return s;
00313 }
00314
00315 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00316 {
00317 int s, i;
00318 uint32_t *sq = ff_squareTbl + 256;
00319
00320 s = 0;
00321 for (i = 0; i < h; i++) {
00322 s += sq[pix1[ 0] - pix2[ 0]];
00323 s += sq[pix1[ 1] - pix2[ 1]];
00324 s += sq[pix1[ 2] - pix2[ 2]];
00325 s += sq[pix1[ 3] - pix2[ 3]];
00326 s += sq[pix1[ 4] - pix2[ 4]];
00327 s += sq[pix1[ 5] - pix2[ 5]];
00328 s += sq[pix1[ 6] - pix2[ 6]];
00329 s += sq[pix1[ 7] - pix2[ 7]];
00330 s += sq[pix1[ 8] - pix2[ 8]];
00331 s += sq[pix1[ 9] - pix2[ 9]];
00332 s += sq[pix1[10] - pix2[10]];
00333 s += sq[pix1[11] - pix2[11]];
00334 s += sq[pix1[12] - pix2[12]];
00335 s += sq[pix1[13] - pix2[13]];
00336 s += sq[pix1[14] - pix2[14]];
00337 s += sq[pix1[15] - pix2[15]];
00338
00339 pix1 += line_size;
00340 pix2 += line_size;
00341 }
00342 return s;
00343 }
00344
00345 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00346 const uint8_t *s2, int stride){
00347 int i;
00348
00349
00350 for(i=0;i<8;i++) {
00351 block[0] = s1[0] - s2[0];
00352 block[1] = s1[1] - s2[1];
00353 block[2] = s1[2] - s2[2];
00354 block[3] = s1[3] - s2[3];
00355 block[4] = s1[4] - s2[4];
00356 block[5] = s1[5] - s2[5];
00357 block[6] = s1[6] - s2[6];
00358 block[7] = s1[7] - s2[7];
00359 s1 += stride;
00360 s2 += stride;
00361 block += 8;
00362 }
00363 }
00364
00365
00366 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00367 int line_size)
00368 {
00369 int i;
00370 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00371
00372
00373 for(i=0;i<8;i++) {
00374 pixels[0] = cm[block[0]];
00375 pixels[1] = cm[block[1]];
00376 pixels[2] = cm[block[2]];
00377 pixels[3] = cm[block[3]];
00378 pixels[4] = cm[block[4]];
00379 pixels[5] = cm[block[5]];
00380 pixels[6] = cm[block[6]];
00381 pixels[7] = cm[block[7]];
00382
00383 pixels += line_size;
00384 block += 8;
00385 }
00386 }
00387
00388 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00389 int line_size)
00390 {
00391 int i;
00392 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00393
00394
00395 for(i=0;i<4;i++) {
00396 pixels[0] = cm[block[0]];
00397 pixels[1] = cm[block[1]];
00398 pixels[2] = cm[block[2]];
00399 pixels[3] = cm[block[3]];
00400
00401 pixels += line_size;
00402 block += 8;
00403 }
00404 }
00405
00406 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00407 int line_size)
00408 {
00409 int i;
00410 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00411
00412
00413 for(i=0;i<2;i++) {
00414 pixels[0] = cm[block[0]];
00415 pixels[1] = cm[block[1]];
00416
00417 pixels += line_size;
00418 block += 8;
00419 }
00420 }
00421
00422 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00423 uint8_t *restrict pixels,
00424 int line_size)
00425 {
00426 int i, j;
00427
00428 for (i = 0; i < 8; i++) {
00429 for (j = 0; j < 8; j++) {
00430 if (*block < -128)
00431 *pixels = 0;
00432 else if (*block > 127)
00433 *pixels = 255;
00434 else
00435 *pixels = (uint8_t)(*block + 128);
00436 block++;
00437 pixels++;
00438 }
00439 pixels += (line_size - 8);
00440 }
00441 }
00442
00443 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00444 int line_size)
00445 {
00446 int i;
00447 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00448
00449
00450 for(i=0;i<8;i++) {
00451 pixels[0] = cm[pixels[0] + block[0]];
00452 pixels[1] = cm[pixels[1] + block[1]];
00453 pixels[2] = cm[pixels[2] + block[2]];
00454 pixels[3] = cm[pixels[3] + block[3]];
00455 pixels[4] = cm[pixels[4] + block[4]];
00456 pixels[5] = cm[pixels[5] + block[5]];
00457 pixels[6] = cm[pixels[6] + block[6]];
00458 pixels[7] = cm[pixels[7] + block[7]];
00459 pixels += line_size;
00460 block += 8;
00461 }
00462 }
00463
00464 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00465 int line_size)
00466 {
00467 int i;
00468 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00469
00470
00471 for(i=0;i<4;i++) {
00472 pixels[0] = cm[pixels[0] + block[0]];
00473 pixels[1] = cm[pixels[1] + block[1]];
00474 pixels[2] = cm[pixels[2] + block[2]];
00475 pixels[3] = cm[pixels[3] + block[3]];
00476 pixels += line_size;
00477 block += 8;
00478 }
00479 }
00480
00481 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00482 int line_size)
00483 {
00484 int i;
00485 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00486
00487
00488 for(i=0;i<2;i++) {
00489 pixels[0] = cm[pixels[0] + block[0]];
00490 pixels[1] = cm[pixels[1] + block[1]];
00491 pixels += line_size;
00492 block += 8;
00493 }
00494 }
00495
00496 static int sum_abs_dctelem_c(DCTELEM *block)
00497 {
00498 int sum=0, i;
00499 for(i=0; i<64; i++)
00500 sum+= FFABS(block[i]);
00501 return sum;
00502 }
00503
00504 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00505 {
00506 int i;
00507
00508 for (i = 0; i < h; i++) {
00509 memset(block, value, 16);
00510 block += line_size;
00511 }
00512 }
00513
00514 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00515 {
00516 int i;
00517
00518 for (i = 0; i < h; i++) {
00519 memset(block, value, 8);
00520 block += line_size;
00521 }
00522 }
00523
00524 #define avg2(a,b) ((a+b+1)>>1)
00525 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00526
00527 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00528 {
00529 const int A=(16-x16)*(16-y16);
00530 const int B=( x16)*(16-y16);
00531 const int C=(16-x16)*( y16);
00532 const int D=( x16)*( y16);
00533 int i;
00534
00535 for(i=0; i<h; i++)
00536 {
00537 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00538 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00539 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00540 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00541 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00542 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00543 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00544 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00545 dst+= stride;
00546 src+= stride;
00547 }
00548 }
00549
00550 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00551 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00552 {
00553 int y, vx, vy;
00554 const int s= 1<<shift;
00555
00556 width--;
00557 height--;
00558
00559 for(y=0; y<h; y++){
00560 int x;
00561
00562 vx= ox;
00563 vy= oy;
00564 for(x=0; x<8; x++){
00565 int src_x, src_y, frac_x, frac_y, index;
00566
00567 src_x= vx>>16;
00568 src_y= vy>>16;
00569 frac_x= src_x&(s-1);
00570 frac_y= src_y&(s-1);
00571 src_x>>=shift;
00572 src_y>>=shift;
00573
00574 if((unsigned)src_x < width){
00575 if((unsigned)src_y < height){
00576 index= src_x + src_y*stride;
00577 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00578 + src[index +1]* frac_x )*(s-frac_y)
00579 + ( src[index+stride ]*(s-frac_x)
00580 + src[index+stride+1]* frac_x )* frac_y
00581 + r)>>(shift*2);
00582 }else{
00583 index= src_x + av_clip(src_y, 0, height)*stride;
00584 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00585 + src[index +1]* frac_x )*s
00586 + r)>>(shift*2);
00587 }
00588 }else{
00589 if((unsigned)src_y < height){
00590 index= av_clip(src_x, 0, width) + src_y*stride;
00591 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
00592 + src[index+stride ]* frac_y )*s
00593 + r)>>(shift*2);
00594 }else{
00595 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00596 dst[y*stride + x]= src[index ];
00597 }
00598 }
00599
00600 vx+= dxx;
00601 vy+= dyx;
00602 }
00603 ox += dxy;
00604 oy += dyy;
00605 }
00606 }
00607
00608 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00609 switch(width){
00610 case 2: put_pixels2_8_c (dst, src, stride, height); break;
00611 case 4: put_pixels4_8_c (dst, src, stride, height); break;
00612 case 8: put_pixels8_8_c (dst, src, stride, height); break;
00613 case 16:put_pixels16_8_c(dst, src, stride, height); break;
00614 }
00615 }
00616
00617 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00618 int i,j;
00619 for (i=0; i < height; i++) {
00620 for (j=0; j < width; j++) {
00621 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00622 }
00623 src += stride;
00624 dst += stride;
00625 }
00626 }
00627
00628 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00629 int i,j;
00630 for (i=0; i < height; i++) {
00631 for (j=0; j < width; j++) {
00632 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00633 }
00634 src += stride;
00635 dst += stride;
00636 }
00637 }
00638
00639 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00640 int i,j;
00641 for (i=0; i < height; i++) {
00642 for (j=0; j < width; j++) {
00643 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00644 }
00645 src += stride;
00646 dst += stride;
00647 }
00648 }
00649
00650 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00651 int i,j;
00652 for (i=0; i < height; i++) {
00653 for (j=0; j < width; j++) {
00654 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00655 }
00656 src += stride;
00657 dst += stride;
00658 }
00659 }
00660
00661 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00662 int i,j;
00663 for (i=0; i < height; i++) {
00664 for (j=0; j < width; j++) {
00665 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00666 }
00667 src += stride;
00668 dst += stride;
00669 }
00670 }
00671
00672 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00673 int i,j;
00674 for (i=0; i < height; i++) {
00675 for (j=0; j < width; j++) {
00676 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00677 }
00678 src += stride;
00679 dst += stride;
00680 }
00681 }
00682
00683 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00684 int i,j;
00685 for (i=0; i < height; i++) {
00686 for (j=0; j < width; j++) {
00687 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00688 }
00689 src += stride;
00690 dst += stride;
00691 }
00692 }
00693
00694 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00695 int i,j;
00696 for (i=0; i < height; i++) {
00697 for (j=0; j < width; j++) {
00698 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00699 }
00700 src += stride;
00701 dst += stride;
00702 }
00703 }
00704
00705 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00706 switch(width){
00707 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00708 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00709 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00710 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00711 }
00712 }
00713
00714 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00715 int i,j;
00716 for (i=0; i < height; i++) {
00717 for (j=0; j < width; j++) {
00718 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00719 }
00720 src += stride;
00721 dst += stride;
00722 }
00723 }
00724
00725 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00726 int i,j;
00727 for (i=0; i < height; i++) {
00728 for (j=0; j < width; j++) {
00729 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00730 }
00731 src += stride;
00732 dst += stride;
00733 }
00734 }
00735
00736 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00737 int i,j;
00738 for (i=0; i < height; i++) {
00739 for (j=0; j < width; j++) {
00740 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00741 }
00742 src += stride;
00743 dst += stride;
00744 }
00745 }
00746
00747 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00748 int i,j;
00749 for (i=0; i < height; i++) {
00750 for (j=0; j < width; j++) {
00751 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00752 }
00753 src += stride;
00754 dst += stride;
00755 }
00756 }
00757
00758 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00759 int i,j;
00760 for (i=0; i < height; i++) {
00761 for (j=0; j < width; j++) {
00762 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00763 }
00764 src += stride;
00765 dst += stride;
00766 }
00767 }
00768
00769 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00770 int i,j;
00771 for (i=0; i < height; i++) {
00772 for (j=0; j < width; j++) {
00773 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00774 }
00775 src += stride;
00776 dst += stride;
00777 }
00778 }
00779
00780 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00781 int i,j;
00782 for (i=0; i < height; i++) {
00783 for (j=0; j < width; j++) {
00784 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00785 }
00786 src += stride;
00787 dst += stride;
00788 }
00789 }
00790
00791 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00792 int i,j;
00793 for (i=0; i < height; i++) {
00794 for (j=0; j < width; j++) {
00795 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00796 }
00797 src += stride;
00798 dst += stride;
00799 }
00800 }
00801
00802 #define QPEL_MC(r, OPNAME, RND, OP) \
00803 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00804 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00805 int i;\
00806 for(i=0; i<h; i++)\
00807 {\
00808 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00809 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00810 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00811 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00812 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00813 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00814 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00815 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00816 dst+=dstStride;\
00817 src+=srcStride;\
00818 }\
00819 }\
00820 \
00821 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00822 const int w=8;\
00823 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00824 int i;\
00825 for(i=0; i<w; i++)\
00826 {\
00827 const int src0= src[0*srcStride];\
00828 const int src1= src[1*srcStride];\
00829 const int src2= src[2*srcStride];\
00830 const int src3= src[3*srcStride];\
00831 const int src4= src[4*srcStride];\
00832 const int src5= src[5*srcStride];\
00833 const int src6= src[6*srcStride];\
00834 const int src7= src[7*srcStride];\
00835 const int src8= src[8*srcStride];\
00836 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00837 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00838 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00839 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00840 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00841 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00842 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00843 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00844 dst++;\
00845 src++;\
00846 }\
00847 }\
00848 \
00849 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00850 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00851 int i;\
00852 \
00853 for(i=0; i<h; i++)\
00854 {\
00855 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00856 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00857 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00858 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00859 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00860 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00861 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00862 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00863 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00864 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00865 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00866 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00867 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00868 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00869 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00870 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00871 dst+=dstStride;\
00872 src+=srcStride;\
00873 }\
00874 }\
00875 \
00876 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00877 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00878 int i;\
00879 const int w=16;\
00880 for(i=0; i<w; i++)\
00881 {\
00882 const int src0= src[0*srcStride];\
00883 const int src1= src[1*srcStride];\
00884 const int src2= src[2*srcStride];\
00885 const int src3= src[3*srcStride];\
00886 const int src4= src[4*srcStride];\
00887 const int src5= src[5*srcStride];\
00888 const int src6= src[6*srcStride];\
00889 const int src7= src[7*srcStride];\
00890 const int src8= src[8*srcStride];\
00891 const int src9= src[9*srcStride];\
00892 const int src10= src[10*srcStride];\
00893 const int src11= src[11*srcStride];\
00894 const int src12= src[12*srcStride];\
00895 const int src13= src[13*srcStride];\
00896 const int src14= src[14*srcStride];\
00897 const int src15= src[15*srcStride];\
00898 const int src16= src[16*srcStride];\
00899 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00900 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00901 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00902 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00903 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00904 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00905 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00906 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00907 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00908 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00909 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00910 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00911 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00912 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00913 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00914 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00915 dst++;\
00916 src++;\
00917 }\
00918 }\
00919 \
00920 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00921 uint8_t half[64];\
00922 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00923 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00924 }\
00925 \
00926 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00927 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00928 }\
00929 \
00930 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00931 uint8_t half[64];\
00932 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00933 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00934 }\
00935 \
00936 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00937 uint8_t full[16*9];\
00938 uint8_t half[64];\
00939 copy_block9(full, src, 16, stride, 9);\
00940 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00941 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00942 }\
00943 \
00944 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00945 uint8_t full[16*9];\
00946 copy_block9(full, src, 16, stride, 9);\
00947 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00948 }\
00949 \
00950 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00951 uint8_t full[16*9];\
00952 uint8_t half[64];\
00953 copy_block9(full, src, 16, stride, 9);\
00954 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00955 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00956 }\
00957 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00958 uint8_t full[16*9];\
00959 uint8_t halfH[72];\
00960 uint8_t halfV[64];\
00961 uint8_t halfHV[64];\
00962 copy_block9(full, src, 16, stride, 9);\
00963 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00964 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00965 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00966 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00967 }\
00968 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00969 uint8_t full[16*9];\
00970 uint8_t halfH[72];\
00971 uint8_t halfHV[64];\
00972 copy_block9(full, src, 16, stride, 9);\
00973 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00974 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00975 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00976 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00977 }\
00978 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00979 uint8_t full[16*9];\
00980 uint8_t halfH[72];\
00981 uint8_t halfV[64];\
00982 uint8_t halfHV[64];\
00983 copy_block9(full, src, 16, stride, 9);\
00984 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00985 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00986 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00987 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00988 }\
00989 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00990 uint8_t full[16*9];\
00991 uint8_t halfH[72];\
00992 uint8_t halfHV[64];\
00993 copy_block9(full, src, 16, stride, 9);\
00994 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00995 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
00996 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00997 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00998 }\
00999 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01000 uint8_t full[16*9];\
01001 uint8_t halfH[72];\
01002 uint8_t halfV[64];\
01003 uint8_t halfHV[64];\
01004 copy_block9(full, src, 16, stride, 9);\
01005 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01006 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01007 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01008 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01009 }\
01010 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01011 uint8_t full[16*9];\
01012 uint8_t halfH[72];\
01013 uint8_t halfHV[64];\
01014 copy_block9(full, src, 16, stride, 9);\
01015 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01016 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01017 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01018 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01019 }\
01020 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01021 uint8_t full[16*9];\
01022 uint8_t halfH[72];\
01023 uint8_t halfV[64];\
01024 uint8_t halfHV[64];\
01025 copy_block9(full, src, 16, stride, 9);\
01026 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01027 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01028 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01029 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01030 }\
01031 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01032 uint8_t full[16*9];\
01033 uint8_t halfH[72];\
01034 uint8_t halfHV[64];\
01035 copy_block9(full, src, 16, stride, 9);\
01036 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01037 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01038 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01039 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01040 }\
01041 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01042 uint8_t halfH[72];\
01043 uint8_t halfHV[64];\
01044 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01045 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01046 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01047 }\
01048 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01049 uint8_t halfH[72];\
01050 uint8_t halfHV[64];\
01051 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01052 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01053 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01054 }\
01055 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01056 uint8_t full[16*9];\
01057 uint8_t halfH[72];\
01058 uint8_t halfV[64];\
01059 uint8_t halfHV[64];\
01060 copy_block9(full, src, 16, stride, 9);\
01061 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01062 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01063 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01064 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01065 }\
01066 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01067 uint8_t full[16*9];\
01068 uint8_t halfH[72];\
01069 copy_block9(full, src, 16, stride, 9);\
01070 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01071 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01072 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01073 }\
01074 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01075 uint8_t full[16*9];\
01076 uint8_t halfH[72];\
01077 uint8_t halfV[64];\
01078 uint8_t halfHV[64];\
01079 copy_block9(full, src, 16, stride, 9);\
01080 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01081 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01082 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01083 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01084 }\
01085 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01086 uint8_t full[16*9];\
01087 uint8_t halfH[72];\
01088 copy_block9(full, src, 16, stride, 9);\
01089 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01090 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01091 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01092 }\
01093 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01094 uint8_t halfH[72];\
01095 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01096 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01097 }\
01098 \
01099 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01100 uint8_t half[256];\
01101 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01102 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01103 }\
01104 \
01105 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01106 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01107 }\
01108 \
01109 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01110 uint8_t half[256];\
01111 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01112 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01113 }\
01114 \
01115 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01116 uint8_t full[24*17];\
01117 uint8_t half[256];\
01118 copy_block17(full, src, 24, stride, 17);\
01119 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01120 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01121 }\
01122 \
01123 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01124 uint8_t full[24*17];\
01125 copy_block17(full, src, 24, stride, 17);\
01126 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01127 }\
01128 \
01129 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01130 uint8_t full[24*17];\
01131 uint8_t half[256];\
01132 copy_block17(full, src, 24, stride, 17);\
01133 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01134 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01135 }\
01136 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01137 uint8_t full[24*17];\
01138 uint8_t halfH[272];\
01139 uint8_t halfV[256];\
01140 uint8_t halfHV[256];\
01141 copy_block17(full, src, 24, stride, 17);\
01142 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01143 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01144 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01145 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01146 }\
01147 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01148 uint8_t full[24*17];\
01149 uint8_t halfH[272];\
01150 uint8_t halfHV[256];\
01151 copy_block17(full, src, 24, stride, 17);\
01152 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01153 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01154 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01155 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01156 }\
01157 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01158 uint8_t full[24*17];\
01159 uint8_t halfH[272];\
01160 uint8_t halfV[256];\
01161 uint8_t halfHV[256];\
01162 copy_block17(full, src, 24, stride, 17);\
01163 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01164 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01165 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01166 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01167 }\
01168 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01169 uint8_t full[24*17];\
01170 uint8_t halfH[272];\
01171 uint8_t halfHV[256];\
01172 copy_block17(full, src, 24, stride, 17);\
01173 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01174 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01175 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01176 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01177 }\
01178 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01179 uint8_t full[24*17];\
01180 uint8_t halfH[272];\
01181 uint8_t halfV[256];\
01182 uint8_t halfHV[256];\
01183 copy_block17(full, src, 24, stride, 17);\
01184 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01185 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01186 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01187 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01188 }\
01189 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01190 uint8_t full[24*17];\
01191 uint8_t halfH[272];\
01192 uint8_t halfHV[256];\
01193 copy_block17(full, src, 24, stride, 17);\
01194 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01195 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01196 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01197 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01198 }\
01199 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01200 uint8_t full[24*17];\
01201 uint8_t halfH[272];\
01202 uint8_t halfV[256];\
01203 uint8_t halfHV[256];\
01204 copy_block17(full, src, 24, stride, 17);\
01205 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01206 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01207 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01208 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01209 }\
01210 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01211 uint8_t full[24*17];\
01212 uint8_t halfH[272];\
01213 uint8_t halfHV[256];\
01214 copy_block17(full, src, 24, stride, 17);\
01215 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01216 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01217 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01218 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01219 }\
01220 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01221 uint8_t halfH[272];\
01222 uint8_t halfHV[256];\
01223 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01224 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01225 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01226 }\
01227 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01228 uint8_t halfH[272];\
01229 uint8_t halfHV[256];\
01230 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01231 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01232 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01233 }\
01234 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01235 uint8_t full[24*17];\
01236 uint8_t halfH[272];\
01237 uint8_t halfV[256];\
01238 uint8_t halfHV[256];\
01239 copy_block17(full, src, 24, stride, 17);\
01240 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01241 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01242 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01243 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01244 }\
01245 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01246 uint8_t full[24*17];\
01247 uint8_t halfH[272];\
01248 copy_block17(full, src, 24, stride, 17);\
01249 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01250 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01251 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01252 }\
01253 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01254 uint8_t full[24*17];\
01255 uint8_t halfH[272];\
01256 uint8_t halfV[256];\
01257 uint8_t halfHV[256];\
01258 copy_block17(full, src, 24, stride, 17);\
01259 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01260 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01261 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01262 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01263 }\
01264 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01265 uint8_t full[24*17];\
01266 uint8_t halfH[272];\
01267 copy_block17(full, src, 24, stride, 17);\
01268 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01269 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01270 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01271 }\
01272 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01273 uint8_t halfH[272];\
01274 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01275 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01276 }
01277
01278 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01279 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01280 #define op_put(a, b) a = cm[((b) + 16)>>5]
01281 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01282
01283 QPEL_MC(0, put_ , _ , op_put)
01284 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01285 QPEL_MC(0, avg_ , _ , op_avg)
01286
01287 #undef op_avg
01288 #undef op_avg_no_rnd
01289 #undef op_put
01290 #undef op_put_no_rnd
01291
01292 #define put_qpel8_mc00_c ff_put_pixels8x8_c
01293 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
01294 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01295 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01296 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
01297 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01298
01299 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01300 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01301 int i;
01302
01303 for(i=0; i<h; i++){
01304 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01305 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01306 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01307 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01308 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01309 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01310 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01311 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01312 dst+=dstStride;
01313 src+=srcStride;
01314 }
01315 }
01316
01317 #if CONFIG_RV40_DECODER
01318 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01319 put_pixels16_xy2_8_c(dst, src, stride, 16);
01320 }
01321 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01322 avg_pixels16_xy2_8_c(dst, src, stride, 16);
01323 }
01324 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01325 put_pixels8_xy2_8_c(dst, src, stride, 8);
01326 }
01327 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01328 avg_pixels8_xy2_8_c(dst, src, stride, 8);
01329 }
01330 #endif
01331
01332 #if CONFIG_DIRAC_DECODER
01333 #define DIRAC_MC(OPNAME)\
01334 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01335 {\
01336 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
01337 }\
01338 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01339 {\
01340 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
01341 }\
01342 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01343 {\
01344 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
01345 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
01346 }\
01347 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01348 {\
01349 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01350 }\
01351 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01352 {\
01353 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01354 }\
01355 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01356 {\
01357 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
01358 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
01359 }\
01360 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01361 {\
01362 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01363 }\
01364 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01365 {\
01366 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01367 }\
01368 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01369 {\
01370 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
01371 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
01372 }
01373 DIRAC_MC(put)
01374 DIRAC_MC(avg)
01375 #endif
01376
01377 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01378 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01379 int i;
01380
01381 for(i=0; i<w; i++){
01382 const int src_1= src[ -srcStride];
01383 const int src0 = src[0 ];
01384 const int src1 = src[ srcStride];
01385 const int src2 = src[2*srcStride];
01386 const int src3 = src[3*srcStride];
01387 const int src4 = src[4*srcStride];
01388 const int src5 = src[5*srcStride];
01389 const int src6 = src[6*srcStride];
01390 const int src7 = src[7*srcStride];
01391 const int src8 = src[8*srcStride];
01392 const int src9 = src[9*srcStride];
01393 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01394 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01395 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01396 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01397 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01398 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01399 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01400 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01401 src++;
01402 dst++;
01403 }
01404 }
01405
01406 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01407 uint8_t half[64];
01408 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01409 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01410 }
01411
01412 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01413 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01414 }
01415
01416 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01417 uint8_t half[64];
01418 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01419 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01420 }
01421
01422 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01423 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01424 }
01425
01426 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01427 uint8_t halfH[88];
01428 uint8_t halfV[64];
01429 uint8_t halfHV[64];
01430 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01431 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01432 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01433 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01434 }
01435 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01436 uint8_t halfH[88];
01437 uint8_t halfV[64];
01438 uint8_t halfHV[64];
01439 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01440 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01441 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01442 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01443 }
01444 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01445 uint8_t halfH[88];
01446 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01447 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01448 }
01449
01450 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01451 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01452 int x;
01453 const int strength= ff_h263_loop_filter_strength[qscale];
01454
01455 for(x=0; x<8; x++){
01456 int d1, d2, ad1;
01457 int p0= src[x-2*stride];
01458 int p1= src[x-1*stride];
01459 int p2= src[x+0*stride];
01460 int p3= src[x+1*stride];
01461 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01462
01463 if (d<-2*strength) d1= 0;
01464 else if(d<- strength) d1=-2*strength - d;
01465 else if(d< strength) d1= d;
01466 else if(d< 2*strength) d1= 2*strength - d;
01467 else d1= 0;
01468
01469 p1 += d1;
01470 p2 -= d1;
01471 if(p1&256) p1= ~(p1>>31);
01472 if(p2&256) p2= ~(p2>>31);
01473
01474 src[x-1*stride] = p1;
01475 src[x+0*stride] = p2;
01476
01477 ad1= FFABS(d1)>>1;
01478
01479 d2= av_clip((p0-p3)/4, -ad1, ad1);
01480
01481 src[x-2*stride] = p0 - d2;
01482 src[x+ stride] = p3 + d2;
01483 }
01484 }
01485 }
01486
01487 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01488 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01489 int y;
01490 const int strength= ff_h263_loop_filter_strength[qscale];
01491
01492 for(y=0; y<8; y++){
01493 int d1, d2, ad1;
01494 int p0= src[y*stride-2];
01495 int p1= src[y*stride-1];
01496 int p2= src[y*stride+0];
01497 int p3= src[y*stride+1];
01498 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01499
01500 if (d<-2*strength) d1= 0;
01501 else if(d<- strength) d1=-2*strength - d;
01502 else if(d< strength) d1= d;
01503 else if(d< 2*strength) d1= 2*strength - d;
01504 else d1= 0;
01505
01506 p1 += d1;
01507 p2 -= d1;
01508 if(p1&256) p1= ~(p1>>31);
01509 if(p2&256) p2= ~(p2>>31);
01510
01511 src[y*stride-1] = p1;
01512 src[y*stride+0] = p2;
01513
01514 ad1= FFABS(d1)>>1;
01515
01516 d2= av_clip((p0-p3)/4, -ad1, ad1);
01517
01518 src[y*stride-2] = p0 - d2;
01519 src[y*stride+1] = p3 + d2;
01520 }
01521 }
01522 }
01523
01524 static void h261_loop_filter_c(uint8_t *src, int stride){
01525 int x,y,xy,yz;
01526 int temp[64];
01527
01528 for(x=0; x<8; x++){
01529 temp[x ] = 4*src[x ];
01530 temp[x + 7*8] = 4*src[x + 7*stride];
01531 }
01532 for(y=1; y<7; y++){
01533 for(x=0; x<8; x++){
01534 xy = y * stride + x;
01535 yz = y * 8 + x;
01536 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01537 }
01538 }
01539
01540 for(y=0; y<8; y++){
01541 src[ y*stride] = (temp[ y*8] + 2)>>2;
01542 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01543 for(x=1; x<7; x++){
01544 xy = y * stride + x;
01545 yz = y * 8 + x;
01546 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01547 }
01548 }
01549 }
01550
01551 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01552 {
01553 int s, i;
01554
01555 s = 0;
01556 for(i=0;i<h;i++) {
01557 s += abs(pix1[0] - pix2[0]);
01558 s += abs(pix1[1] - pix2[1]);
01559 s += abs(pix1[2] - pix2[2]);
01560 s += abs(pix1[3] - pix2[3]);
01561 s += abs(pix1[4] - pix2[4]);
01562 s += abs(pix1[5] - pix2[5]);
01563 s += abs(pix1[6] - pix2[6]);
01564 s += abs(pix1[7] - pix2[7]);
01565 s += abs(pix1[8] - pix2[8]);
01566 s += abs(pix1[9] - pix2[9]);
01567 s += abs(pix1[10] - pix2[10]);
01568 s += abs(pix1[11] - pix2[11]);
01569 s += abs(pix1[12] - pix2[12]);
01570 s += abs(pix1[13] - pix2[13]);
01571 s += abs(pix1[14] - pix2[14]);
01572 s += abs(pix1[15] - pix2[15]);
01573 pix1 += line_size;
01574 pix2 += line_size;
01575 }
01576 return s;
01577 }
01578
01579 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01580 {
01581 int s, i;
01582
01583 s = 0;
01584 for(i=0;i<h;i++) {
01585 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01586 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01587 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01588 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01589 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01590 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01591 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01592 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01593 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01594 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01595 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01596 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01597 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01598 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01599 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01600 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01601 pix1 += line_size;
01602 pix2 += line_size;
01603 }
01604 return s;
01605 }
01606
01607 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01608 {
01609 int s, i;
01610 uint8_t *pix3 = pix2 + line_size;
01611
01612 s = 0;
01613 for(i=0;i<h;i++) {
01614 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01615 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01616 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01617 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01618 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01619 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01620 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01621 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01622 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01623 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01624 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01625 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01626 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01627 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01628 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01629 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01630 pix1 += line_size;
01631 pix2 += line_size;
01632 pix3 += line_size;
01633 }
01634 return s;
01635 }
01636
01637 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01638 {
01639 int s, i;
01640 uint8_t *pix3 = pix2 + line_size;
01641
01642 s = 0;
01643 for(i=0;i<h;i++) {
01644 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01645 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01646 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01647 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01648 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01649 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01650 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01651 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01652 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01653 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01654 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01655 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01656 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01657 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01658 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01659 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01660 pix1 += line_size;
01661 pix2 += line_size;
01662 pix3 += line_size;
01663 }
01664 return s;
01665 }
01666
01667 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01668 {
01669 int s, i;
01670
01671 s = 0;
01672 for(i=0;i<h;i++) {
01673 s += abs(pix1[0] - pix2[0]);
01674 s += abs(pix1[1] - pix2[1]);
01675 s += abs(pix1[2] - pix2[2]);
01676 s += abs(pix1[3] - pix2[3]);
01677 s += abs(pix1[4] - pix2[4]);
01678 s += abs(pix1[5] - pix2[5]);
01679 s += abs(pix1[6] - pix2[6]);
01680 s += abs(pix1[7] - pix2[7]);
01681 pix1 += line_size;
01682 pix2 += line_size;
01683 }
01684 return s;
01685 }
01686
01687 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01688 {
01689 int s, i;
01690
01691 s = 0;
01692 for(i=0;i<h;i++) {
01693 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01694 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01695 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01696 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01697 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01698 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01699 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01700 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01701 pix1 += line_size;
01702 pix2 += line_size;
01703 }
01704 return s;
01705 }
01706
01707 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01708 {
01709 int s, i;
01710 uint8_t *pix3 = pix2 + line_size;
01711
01712 s = 0;
01713 for(i=0;i<h;i++) {
01714 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01715 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01716 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01717 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01718 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01719 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01720 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01721 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01722 pix1 += line_size;
01723 pix2 += line_size;
01724 pix3 += line_size;
01725 }
01726 return s;
01727 }
01728
01729 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01730 {
01731 int s, i;
01732 uint8_t *pix3 = pix2 + line_size;
01733
01734 s = 0;
01735 for(i=0;i<h;i++) {
01736 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01737 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01738 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01739 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01740 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01741 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01742 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01743 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01744 pix1 += line_size;
01745 pix2 += line_size;
01746 pix3 += line_size;
01747 }
01748 return s;
01749 }
01750
01751 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01752 MpegEncContext *c = v;
01753 int score1=0;
01754 int score2=0;
01755 int x,y;
01756
01757 for(y=0; y<h; y++){
01758 for(x=0; x<16; x++){
01759 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01760 }
01761 if(y+1<h){
01762 for(x=0; x<15; x++){
01763 score2+= FFABS( s1[x ] - s1[x +stride]
01764 - s1[x+1] + s1[x+1+stride])
01765 -FFABS( s2[x ] - s2[x +stride]
01766 - s2[x+1] + s2[x+1+stride]);
01767 }
01768 }
01769 s1+= stride;
01770 s2+= stride;
01771 }
01772
01773 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01774 else return score1 + FFABS(score2)*8;
01775 }
01776
01777 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01778 MpegEncContext *c = v;
01779 int score1=0;
01780 int score2=0;
01781 int x,y;
01782
01783 for(y=0; y<h; y++){
01784 for(x=0; x<8; x++){
01785 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01786 }
01787 if(y+1<h){
01788 for(x=0; x<7; x++){
01789 score2+= FFABS( s1[x ] - s1[x +stride]
01790 - s1[x+1] + s1[x+1+stride])
01791 -FFABS( s2[x ] - s2[x +stride]
01792 - s2[x+1] + s2[x+1+stride]);
01793 }
01794 }
01795 s1+= stride;
01796 s2+= stride;
01797 }
01798
01799 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01800 else return score1 + FFABS(score2)*8;
01801 }
01802
01803 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01804 int i;
01805 unsigned int sum=0;
01806
01807 for(i=0; i<8*8; i++){
01808 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01809 int w= weight[i];
01810 b>>= RECON_SHIFT;
01811 assert(-512<b && b<512);
01812
01813 sum += (w*b)*(w*b)>>4;
01814 }
01815 return sum>>2;
01816 }
01817
01818 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01819 int i;
01820
01821 for(i=0; i<8*8; i++){
01822 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01823 }
01824 }
01825
01834 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01835 {
01836 int i;
01837 DCTELEM temp[64];
01838
01839 if(last<=0) return;
01840
01841
01842 for(i=0; i<=last; i++){
01843 const int j= scantable[i];
01844 temp[j]= block[j];
01845 block[j]=0;
01846 }
01847
01848 for(i=0; i<=last; i++){
01849 const int j= scantable[i];
01850 const int perm_j= permutation[j];
01851 block[perm_j]= temp[j];
01852 }
01853 }
01854
01855 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01856 return 0;
01857 }
01858
01859 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01860 int i;
01861
01862 memset(cmp, 0, sizeof(void*)*6);
01863
01864 for(i=0; i<6; i++){
01865 switch(type&0xFF){
01866 case FF_CMP_SAD:
01867 cmp[i]= c->sad[i];
01868 break;
01869 case FF_CMP_SATD:
01870 cmp[i]= c->hadamard8_diff[i];
01871 break;
01872 case FF_CMP_SSE:
01873 cmp[i]= c->sse[i];
01874 break;
01875 case FF_CMP_DCT:
01876 cmp[i]= c->dct_sad[i];
01877 break;
01878 case FF_CMP_DCT264:
01879 cmp[i]= c->dct264_sad[i];
01880 break;
01881 case FF_CMP_DCTMAX:
01882 cmp[i]= c->dct_max[i];
01883 break;
01884 case FF_CMP_PSNR:
01885 cmp[i]= c->quant_psnr[i];
01886 break;
01887 case FF_CMP_BIT:
01888 cmp[i]= c->bit[i];
01889 break;
01890 case FF_CMP_RD:
01891 cmp[i]= c->rd[i];
01892 break;
01893 case FF_CMP_VSAD:
01894 cmp[i]= c->vsad[i];
01895 break;
01896 case FF_CMP_VSSE:
01897 cmp[i]= c->vsse[i];
01898 break;
01899 case FF_CMP_ZERO:
01900 cmp[i]= zero_cmp;
01901 break;
01902 case FF_CMP_NSSE:
01903 cmp[i]= c->nsse[i];
01904 break;
01905 #if CONFIG_DWT
01906 case FF_CMP_W53:
01907 cmp[i]= c->w53[i];
01908 break;
01909 case FF_CMP_W97:
01910 cmp[i]= c->w97[i];
01911 break;
01912 #endif
01913 default:
01914 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01915 }
01916 }
01917 }
01918
01919 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01920 long i;
01921 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01922 long a = *(long*)(src+i);
01923 long b = *(long*)(dst+i);
01924 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01925 }
01926 for(; i<w; i++)
01927 dst[i+0] += src[i+0];
01928 }
01929
01930 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01931 long i;
01932 #if !HAVE_FAST_UNALIGNED
01933 if((long)src2 & (sizeof(long)-1)){
01934 for(i=0; i+7<w; i+=8){
01935 dst[i+0] = src1[i+0]-src2[i+0];
01936 dst[i+1] = src1[i+1]-src2[i+1];
01937 dst[i+2] = src1[i+2]-src2[i+2];
01938 dst[i+3] = src1[i+3]-src2[i+3];
01939 dst[i+4] = src1[i+4]-src2[i+4];
01940 dst[i+5] = src1[i+5]-src2[i+5];
01941 dst[i+6] = src1[i+6]-src2[i+6];
01942 dst[i+7] = src1[i+7]-src2[i+7];
01943 }
01944 }else
01945 #endif
01946 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01947 long a = *(long*)(src1+i);
01948 long b = *(long*)(src2+i);
01949 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01950 }
01951 for(; i<w; i++)
01952 dst[i+0] = src1[i+0]-src2[i+0];
01953 }
01954
01955 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01956 int i;
01957 uint8_t l, lt;
01958
01959 l= *left;
01960 lt= *left_top;
01961
01962 for(i=0; i<w; i++){
01963 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01964 lt= src1[i];
01965 dst[i]= l;
01966 }
01967
01968 *left= l;
01969 *left_top= lt;
01970 }
01971
01972 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01973 int i;
01974 uint8_t l, lt;
01975
01976 l= *left;
01977 lt= *left_top;
01978
01979 for(i=0; i<w; i++){
01980 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01981 lt= src1[i];
01982 l= src2[i];
01983 dst[i]= l - pred;
01984 }
01985
01986 *left= l;
01987 *left_top= lt;
01988 }
01989
01990 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01991 int i;
01992
01993 for(i=0; i<w-1; i++){
01994 acc+= src[i];
01995 dst[i]= acc;
01996 i++;
01997 acc+= src[i];
01998 dst[i]= acc;
01999 }
02000
02001 for(; i<w; i++){
02002 acc+= src[i];
02003 dst[i]= acc;
02004 }
02005
02006 return acc;
02007 }
02008
02009 #if HAVE_BIGENDIAN
02010 #define B 3
02011 #define G 2
02012 #define R 1
02013 #define A 0
02014 #else
02015 #define B 0
02016 #define G 1
02017 #define R 2
02018 #define A 3
02019 #endif
02020 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02021 int i;
02022 int r,g,b,a;
02023 r= *red;
02024 g= *green;
02025 b= *blue;
02026 a= *alpha;
02027
02028 for(i=0; i<w; i++){
02029 b+= src[4*i+B];
02030 g+= src[4*i+G];
02031 r+= src[4*i+R];
02032 a+= src[4*i+A];
02033
02034 dst[4*i+B]= b;
02035 dst[4*i+G]= g;
02036 dst[4*i+R]= r;
02037 dst[4*i+A]= a;
02038 }
02039
02040 *red= r;
02041 *green= g;
02042 *blue= b;
02043 *alpha= a;
02044 }
02045 #undef B
02046 #undef G
02047 #undef R
02048 #undef A
02049
02050 #define BUTTERFLY2(o1,o2,i1,i2) \
02051 o1= (i1)+(i2);\
02052 o2= (i1)-(i2);
02053
02054 #define BUTTERFLY1(x,y) \
02055 {\
02056 int a,b;\
02057 a= x;\
02058 b= y;\
02059 x= a+b;\
02060 y= a-b;\
02061 }
02062
02063 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02064
02065 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02066 int i;
02067 int temp[64];
02068 int sum=0;
02069
02070 assert(h==8);
02071
02072 for(i=0; i<8; i++){
02073
02074 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02075 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02076 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02077 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02078
02079 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02080 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02081 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02082 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02083
02084 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02085 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02086 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02087 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02088 }
02089
02090 for(i=0; i<8; i++){
02091 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02092 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02093 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02094 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02095
02096 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02097 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02098 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02099 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02100
02101 sum +=
02102 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02103 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02104 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02105 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02106 }
02107 return sum;
02108 }
02109
02110 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02111 int i;
02112 int temp[64];
02113 int sum=0;
02114
02115 assert(h==8);
02116
02117 for(i=0; i<8; i++){
02118
02119 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02120 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02121 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02122 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02123
02124 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02125 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02126 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02127 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02128
02129 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02130 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02131 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02132 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02133 }
02134
02135 for(i=0; i<8; i++){
02136 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02137 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02138 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02139 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02140
02141 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02142 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02143 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02144 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02145
02146 sum +=
02147 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02148 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02149 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02150 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02151 }
02152
02153 sum -= FFABS(temp[8*0] + temp[8*4]);
02154
02155 return sum;
02156 }
02157
02158 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02159 MpegEncContext * const s= (MpegEncContext *)c;
02160 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02161
02162 assert(h==8);
02163
02164 s->dsp.diff_pixels(temp, src1, src2, stride);
02165 s->dsp.fdct(temp);
02166 return s->dsp.sum_abs_dctelem(temp);
02167 }
02168
02169 #if CONFIG_GPL
02170 #define DCT8_1D {\
02171 const int s07 = SRC(0) + SRC(7);\
02172 const int s16 = SRC(1) + SRC(6);\
02173 const int s25 = SRC(2) + SRC(5);\
02174 const int s34 = SRC(3) + SRC(4);\
02175 const int a0 = s07 + s34;\
02176 const int a1 = s16 + s25;\
02177 const int a2 = s07 - s34;\
02178 const int a3 = s16 - s25;\
02179 const int d07 = SRC(0) - SRC(7);\
02180 const int d16 = SRC(1) - SRC(6);\
02181 const int d25 = SRC(2) - SRC(5);\
02182 const int d34 = SRC(3) - SRC(4);\
02183 const int a4 = d16 + d25 + (d07 + (d07>>1));\
02184 const int a5 = d07 - d34 - (d25 + (d25>>1));\
02185 const int a6 = d07 + d34 - (d16 + (d16>>1));\
02186 const int a7 = d16 - d25 + (d34 + (d34>>1));\
02187 DST(0, a0 + a1 ) ;\
02188 DST(1, a4 + (a7>>2)) ;\
02189 DST(2, a2 + (a3>>1)) ;\
02190 DST(3, a5 + (a6>>2)) ;\
02191 DST(4, a0 - a1 ) ;\
02192 DST(5, a6 - (a5>>2)) ;\
02193 DST(6, (a2>>1) - a3 ) ;\
02194 DST(7, (a4>>2) - a7 ) ;\
02195 }
02196
02197 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02198 MpegEncContext * const s= (MpegEncContext *)c;
02199 DCTELEM dct[8][8];
02200 int i;
02201 int sum=0;
02202
02203 s->dsp.diff_pixels(dct[0], src1, src2, stride);
02204
02205 #define SRC(x) dct[i][x]
02206 #define DST(x,v) dct[i][x]= v
02207 for( i = 0; i < 8; i++ )
02208 DCT8_1D
02209 #undef SRC
02210 #undef DST
02211
02212 #define SRC(x) dct[x][i]
02213 #define DST(x,v) sum += FFABS(v)
02214 for( i = 0; i < 8; i++ )
02215 DCT8_1D
02216 #undef SRC
02217 #undef DST
02218 return sum;
02219 }
02220 #endif
02221
02222 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02223 MpegEncContext * const s= (MpegEncContext *)c;
02224 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02225 int sum=0, i;
02226
02227 assert(h==8);
02228
02229 s->dsp.diff_pixels(temp, src1, src2, stride);
02230 s->dsp.fdct(temp);
02231
02232 for(i=0; i<64; i++)
02233 sum= FFMAX(sum, FFABS(temp[i]));
02234
02235 return sum;
02236 }
02237
02238 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02239 MpegEncContext * const s= (MpegEncContext *)c;
02240 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02241 DCTELEM * const bak = temp+64;
02242 int sum=0, i;
02243
02244 assert(h==8);
02245 s->mb_intra=0;
02246
02247 s->dsp.diff_pixels(temp, src1, src2, stride);
02248
02249 memcpy(bak, temp, 64*sizeof(DCTELEM));
02250
02251 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02252 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02253 ff_simple_idct_8(temp);
02254
02255 for(i=0; i<64; i++)
02256 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02257
02258 return sum;
02259 }
02260
02261 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02262 MpegEncContext * const s= (MpegEncContext *)c;
02263 const uint8_t *scantable= s->intra_scantable.permutated;
02264 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02265 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02266 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02267 int i, last, run, bits, level, distortion, start_i;
02268 const int esc_length= s->ac_esc_length;
02269 uint8_t * length;
02270 uint8_t * last_length;
02271
02272 assert(h==8);
02273
02274 copy_block8(lsrc1, src1, 8, stride, 8);
02275 copy_block8(lsrc2, src2, 8, stride, 8);
02276
02277 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02278
02279 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02280
02281 bits=0;
02282
02283 if (s->mb_intra) {
02284 start_i = 1;
02285 length = s->intra_ac_vlc_length;
02286 last_length= s->intra_ac_vlc_last_length;
02287 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02288 } else {
02289 start_i = 0;
02290 length = s->inter_ac_vlc_length;
02291 last_length= s->inter_ac_vlc_last_length;
02292 }
02293
02294 if(last>=start_i){
02295 run=0;
02296 for(i=start_i; i<last; i++){
02297 int j= scantable[i];
02298 level= temp[j];
02299
02300 if(level){
02301 level+=64;
02302 if((level&(~127)) == 0){
02303 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02304 }else
02305 bits+= esc_length;
02306 run=0;
02307 }else
02308 run++;
02309 }
02310 i= scantable[last];
02311
02312 level= temp[i] + 64;
02313
02314 assert(level - 64);
02315
02316 if((level&(~127)) == 0){
02317 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02318 }else
02319 bits+= esc_length;
02320
02321 }
02322
02323 if(last>=0){
02324 if(s->mb_intra)
02325 s->dct_unquantize_intra(s, temp, 0, s->qscale);
02326 else
02327 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02328 }
02329
02330 s->dsp.idct_add(lsrc2, 8, temp);
02331
02332 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02333
02334 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02335 }
02336
02337 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02338 MpegEncContext * const s= (MpegEncContext *)c;
02339 const uint8_t *scantable= s->intra_scantable.permutated;
02340 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02341 int i, last, run, bits, level, start_i;
02342 const int esc_length= s->ac_esc_length;
02343 uint8_t * length;
02344 uint8_t * last_length;
02345
02346 assert(h==8);
02347
02348 s->dsp.diff_pixels(temp, src1, src2, stride);
02349
02350 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02351
02352 bits=0;
02353
02354 if (s->mb_intra) {
02355 start_i = 1;
02356 length = s->intra_ac_vlc_length;
02357 last_length= s->intra_ac_vlc_last_length;
02358 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02359 } else {
02360 start_i = 0;
02361 length = s->inter_ac_vlc_length;
02362 last_length= s->inter_ac_vlc_last_length;
02363 }
02364
02365 if(last>=start_i){
02366 run=0;
02367 for(i=start_i; i<last; i++){
02368 int j= scantable[i];
02369 level= temp[j];
02370
02371 if(level){
02372 level+=64;
02373 if((level&(~127)) == 0){
02374 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02375 }else
02376 bits+= esc_length;
02377 run=0;
02378 }else
02379 run++;
02380 }
02381 i= scantable[last];
02382
02383 level= temp[i] + 64;
02384
02385 assert(level - 64);
02386
02387 if((level&(~127)) == 0){
02388 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02389 }else
02390 bits+= esc_length;
02391 }
02392
02393 return bits;
02394 }
02395
02396 #define VSAD_INTRA(size) \
02397 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02398 int score=0; \
02399 int x,y; \
02400 \
02401 for(y=1; y<h; y++){ \
02402 for(x=0; x<size; x+=4){ \
02403 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
02404 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
02405 } \
02406 s+= stride; \
02407 } \
02408 \
02409 return score; \
02410 }
02411 VSAD_INTRA(8)
02412 VSAD_INTRA(16)
02413
02414 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02415 int score=0;
02416 int x,y;
02417
02418 for(y=1; y<h; y++){
02419 for(x=0; x<16; x++){
02420 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02421 }
02422 s1+= stride;
02423 s2+= stride;
02424 }
02425
02426 return score;
02427 }
02428
02429 #define SQ(a) ((a)*(a))
02430 #define VSSE_INTRA(size) \
02431 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02432 int score=0; \
02433 int x,y; \
02434 \
02435 for(y=1; y<h; y++){ \
02436 for(x=0; x<size; x+=4){ \
02437 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
02438 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
02439 } \
02440 s+= stride; \
02441 } \
02442 \
02443 return score; \
02444 }
02445 VSSE_INTRA(8)
02446 VSSE_INTRA(16)
02447
02448 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02449 int score=0;
02450 int x,y;
02451
02452 for(y=1; y<h; y++){
02453 for(x=0; x<16; x++){
02454 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02455 }
02456 s1+= stride;
02457 s2+= stride;
02458 }
02459
02460 return score;
02461 }
02462
02463 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02464 int size){
02465 int score=0;
02466 int i;
02467 for(i=0; i<size; i++)
02468 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02469 return score;
02470 }
02471
02472 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02473 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02474 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02475 #if CONFIG_GPL
02476 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02477 #endif
02478 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02479 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02480 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02481 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02482
02483 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02484 int i;
02485 for(i=0; i<len; i++)
02486 dst[i] = src0[i] * src1[i];
02487 }
02488
02489 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02490 int i;
02491 src1 += len-1;
02492 for(i=0; i<len; i++)
02493 dst[i] = src0[i] * src1[-i];
02494 }
02495
02496 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02497 int i;
02498 for(i=0; i<len; i++)
02499 dst[i] = src0[i] * src1[i] + src2[i];
02500 }
02501
02502 static void vector_fmul_window_c(float *dst, const float *src0,
02503 const float *src1, const float *win, int len)
02504 {
02505 int i,j;
02506 dst += len;
02507 win += len;
02508 src0+= len;
02509 for(i=-len, j=len-1; i<0; i++, j--) {
02510 float s0 = src0[i];
02511 float s1 = src1[j];
02512 float wi = win[i];
02513 float wj = win[j];
02514 dst[i] = s0*wj - s1*wi;
02515 dst[j] = s0*wi + s1*wj;
02516 }
02517 }
02518
02519 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02520 int len)
02521 {
02522 int i;
02523 for (i = 0; i < len; i++)
02524 dst[i] = src[i] * mul;
02525 }
02526
02527 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
02528 int len)
02529 {
02530 int i;
02531 for (i = 0; i < len; i++)
02532 dst[i] += src[i] * mul;
02533 }
02534
02535 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02536 int len)
02537 {
02538 int i;
02539 for (i = 0; i < len; i++) {
02540 float t = v1[i] - v2[i];
02541 v1[i] += v2[i];
02542 v2[i] = t;
02543 }
02544 }
02545
02546 static void butterflies_float_interleave_c(float *dst, const float *src0,
02547 const float *src1, int len)
02548 {
02549 int i;
02550 for (i = 0; i < len; i++) {
02551 float f1 = src0[i];
02552 float f2 = src1[i];
02553 dst[2*i ] = f1 + f2;
02554 dst[2*i + 1] = f1 - f2;
02555 }
02556 }
02557
02558 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02559 {
02560 float p = 0.0;
02561 int i;
02562
02563 for (i = 0; i < len; i++)
02564 p += v1[i] * v2[i];
02565
02566 return p;
02567 }
02568
02569 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02570 uint32_t maxi, uint32_t maxisign)
02571 {
02572
02573 if(a > mini) return mini;
02574 else if((a^(1U<<31)) > maxisign) return maxi;
02575 else return a;
02576 }
02577
02578 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02579 int i;
02580 uint32_t mini = *(uint32_t*)min;
02581 uint32_t maxi = *(uint32_t*)max;
02582 uint32_t maxisign = maxi ^ (1U<<31);
02583 uint32_t *dsti = (uint32_t*)dst;
02584 const uint32_t *srci = (const uint32_t*)src;
02585 for(i=0; i<len; i+=8) {
02586 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02587 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02588 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02589 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02590 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02591 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02592 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02593 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02594 }
02595 }
02596 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02597 int i;
02598 if(min < 0 && max > 0) {
02599 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02600 } else {
02601 for(i=0; i < len; i+=8) {
02602 dst[i ] = av_clipf(src[i ], min, max);
02603 dst[i + 1] = av_clipf(src[i + 1], min, max);
02604 dst[i + 2] = av_clipf(src[i + 2], min, max);
02605 dst[i + 3] = av_clipf(src[i + 3], min, max);
02606 dst[i + 4] = av_clipf(src[i + 4], min, max);
02607 dst[i + 5] = av_clipf(src[i + 5], min, max);
02608 dst[i + 6] = av_clipf(src[i + 6], min, max);
02609 dst[i + 7] = av_clipf(src[i + 7], min, max);
02610 }
02611 }
02612 }
02613
02614 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
02615 {
02616 int res = 0;
02617
02618 while (order--)
02619 res += (*v1++ * *v2++) >> shift;
02620
02621 return res;
02622 }
02623
02624 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02625 {
02626 int res = 0;
02627 while (order--) {
02628 res += *v1 * *v2++;
02629 *v1++ += mul * *v3++;
02630 }
02631 return res;
02632 }
02633
02634 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02635 const int16_t *window, unsigned int len)
02636 {
02637 int i;
02638 int len2 = len >> 1;
02639
02640 for (i = 0; i < len2; i++) {
02641 int16_t w = window[i];
02642 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
02643 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02644 }
02645 }
02646
02647 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02648 int32_t max, unsigned int len)
02649 {
02650 do {
02651 *dst++ = av_clip(*src++, min, max);
02652 *dst++ = av_clip(*src++, min, max);
02653 *dst++ = av_clip(*src++, min, max);
02654 *dst++ = av_clip(*src++, min, max);
02655 *dst++ = av_clip(*src++, min, max);
02656 *dst++ = av_clip(*src++, min, max);
02657 *dst++ = av_clip(*src++, min, max);
02658 *dst++ = av_clip(*src++, min, max);
02659 len -= 8;
02660 } while (len > 0);
02661 }
02662
02663 #define W0 2048
02664 #define W1 2841
02665 #define W2 2676
02666 #define W3 2408
02667 #define W4 2048
02668 #define W5 1609
02669 #define W6 1108
02670 #define W7 565
02671
02672 static void wmv2_idct_row(short * b)
02673 {
02674 int s1,s2;
02675 int a0,a1,a2,a3,a4,a5,a6,a7;
02676
02677 a1 = W1*b[1]+W7*b[7];
02678 a7 = W7*b[1]-W1*b[7];
02679 a5 = W5*b[5]+W3*b[3];
02680 a3 = W3*b[5]-W5*b[3];
02681 a2 = W2*b[2]+W6*b[6];
02682 a6 = W6*b[2]-W2*b[6];
02683 a0 = W0*b[0]+W0*b[4];
02684 a4 = W0*b[0]-W0*b[4];
02685
02686 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02687 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02688
02689 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02690 b[1] = (a4+a6 +s1 + (1<<7))>>8;
02691 b[2] = (a4-a6 +s2 + (1<<7))>>8;
02692 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02693 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02694 b[5] = (a4-a6 -s2 + (1<<7))>>8;
02695 b[6] = (a4+a6 -s1 + (1<<7))>>8;
02696 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02697 }
02698 static void wmv2_idct_col(short * b)
02699 {
02700 int s1,s2;
02701 int a0,a1,a2,a3,a4,a5,a6,a7;
02702
02703 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02704 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02705 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02706 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02707 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02708 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02709 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
02710 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
02711
02712 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02713 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02714
02715 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02716 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
02717 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
02718 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02719
02720 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02721 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
02722 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
02723 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02724 }
02725 void ff_wmv2_idct_c(short * block){
02726 int i;
02727
02728 for(i=0;i<64;i+=8){
02729 wmv2_idct_row(block+i);
02730 }
02731 for(i=0;i<8;i++){
02732 wmv2_idct_col(block+i);
02733 }
02734 }
02735
02736
02737 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02738 {
02739 ff_wmv2_idct_c(block);
02740 ff_put_pixels_clamped_c(block, dest, line_size);
02741 }
02742 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02743 {
02744 ff_wmv2_idct_c(block);
02745 ff_add_pixels_clamped_c(block, dest, line_size);
02746 }
02747 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02748 {
02749 j_rev_dct (block);
02750 ff_put_pixels_clamped_c(block, dest, line_size);
02751 }
02752 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02753 {
02754 j_rev_dct (block);
02755 ff_add_pixels_clamped_c(block, dest, line_size);
02756 }
02757
02758 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02759 {
02760 j_rev_dct4 (block);
02761 put_pixels_clamped4_c(block, dest, line_size);
02762 }
02763 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02764 {
02765 j_rev_dct4 (block);
02766 add_pixels_clamped4_c(block, dest, line_size);
02767 }
02768
02769 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02770 {
02771 j_rev_dct2 (block);
02772 put_pixels_clamped2_c(block, dest, line_size);
02773 }
02774 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02775 {
02776 j_rev_dct2 (block);
02777 add_pixels_clamped2_c(block, dest, line_size);
02778 }
02779
02780 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02781 {
02782 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02783
02784 dest[0] = cm[(block[0] + 4)>>3];
02785 }
02786 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02787 {
02788 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02789
02790 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
02791 }
02792
02793 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02794
02795
02796 av_cold void dsputil_static_init(void)
02797 {
02798 int i;
02799
02800 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02801 for(i=0;i<MAX_NEG_CROP;i++) {
02802 ff_cropTbl[i] = 0;
02803 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02804 }
02805
02806 for(i=0;i<512;i++) {
02807 ff_squareTbl[i] = (i - 256) * (i - 256);
02808 }
02809
02810 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02811 }
02812
02813 int ff_check_alignment(void){
02814 static int did_fail=0;
02815 LOCAL_ALIGNED_16(int, aligned, [4]);
02816
02817 if((intptr_t)aligned & 15){
02818 if(!did_fail){
02819 #if HAVE_MMX || HAVE_ALTIVEC
02820 av_log(NULL, AV_LOG_ERROR,
02821 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02822 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02823 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02824 "Do not report crashes to FFmpeg developers.\n");
02825 #endif
02826 did_fail=1;
02827 }
02828 return -1;
02829 }
02830 return 0;
02831 }
02832
02833 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
02834 {
02835 int i;
02836
02837 ff_check_alignment();
02838
02839 #if CONFIG_ENCODERS
02840 if (avctx->bits_per_raw_sample == 10) {
02841 c->fdct = ff_jpeg_fdct_islow_10;
02842 c->fdct248 = ff_fdct248_islow_10;
02843 } else {
02844 if(avctx->dct_algo==FF_DCT_FASTINT) {
02845 c->fdct = fdct_ifast;
02846 c->fdct248 = fdct_ifast248;
02847 }
02848 else if(avctx->dct_algo==FF_DCT_FAAN) {
02849 c->fdct = ff_faandct;
02850 c->fdct248 = ff_faandct248;
02851 }
02852 else {
02853 c->fdct = ff_jpeg_fdct_islow_8;
02854 c->fdct248 = ff_fdct248_islow_8;
02855 }
02856 }
02857 #endif //CONFIG_ENCODERS
02858
02859 if(avctx->lowres==1){
02860 c->idct_put= ff_jref_idct4_put;
02861 c->idct_add= ff_jref_idct4_add;
02862 c->idct = j_rev_dct4;
02863 c->idct_permutation_type= FF_NO_IDCT_PERM;
02864 }else if(avctx->lowres==2){
02865 c->idct_put= ff_jref_idct2_put;
02866 c->idct_add= ff_jref_idct2_add;
02867 c->idct = j_rev_dct2;
02868 c->idct_permutation_type= FF_NO_IDCT_PERM;
02869 }else if(avctx->lowres==3){
02870 c->idct_put= ff_jref_idct1_put;
02871 c->idct_add= ff_jref_idct1_add;
02872 c->idct = j_rev_dct1;
02873 c->idct_permutation_type= FF_NO_IDCT_PERM;
02874 }else{
02875 if (avctx->bits_per_raw_sample == 10) {
02876 c->idct_put = ff_simple_idct_put_10;
02877 c->idct_add = ff_simple_idct_add_10;
02878 c->idct = ff_simple_idct_10;
02879 c->idct_permutation_type = FF_NO_IDCT_PERM;
02880 } else {
02881 if(avctx->idct_algo==FF_IDCT_INT){
02882 c->idct_put= ff_jref_idct_put;
02883 c->idct_add= ff_jref_idct_add;
02884 c->idct = j_rev_dct;
02885 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02886 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02887 avctx->idct_algo==FF_IDCT_VP3){
02888 c->idct_put= ff_vp3_idct_put_c;
02889 c->idct_add= ff_vp3_idct_add_c;
02890 c->idct = ff_vp3_idct_c;
02891 c->idct_permutation_type= FF_NO_IDCT_PERM;
02892 }else if(avctx->idct_algo==FF_IDCT_WMV2){
02893 c->idct_put= ff_wmv2_idct_put_c;
02894 c->idct_add= ff_wmv2_idct_add_c;
02895 c->idct = ff_wmv2_idct_c;
02896 c->idct_permutation_type= FF_NO_IDCT_PERM;
02897 }else if(avctx->idct_algo==FF_IDCT_FAAN){
02898 c->idct_put= ff_faanidct_put;
02899 c->idct_add= ff_faanidct_add;
02900 c->idct = ff_faanidct;
02901 c->idct_permutation_type= FF_NO_IDCT_PERM;
02902 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02903 c->idct_put= ff_ea_idct_put_c;
02904 c->idct_permutation_type= FF_NO_IDCT_PERM;
02905 }else{
02906 c->idct_put = ff_simple_idct_put_8;
02907 c->idct_add = ff_simple_idct_add_8;
02908 c->idct = ff_simple_idct_8;
02909 c->idct_permutation_type= FF_NO_IDCT_PERM;
02910 }
02911 }
02912 }
02913
02914 c->diff_pixels = diff_pixels_c;
02915 c->put_pixels_clamped = ff_put_pixels_clamped_c;
02916 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02917 c->add_pixels_clamped = ff_add_pixels_clamped_c;
02918 c->sum_abs_dctelem = sum_abs_dctelem_c;
02919 c->gmc1 = gmc1_c;
02920 c->gmc = ff_gmc_c;
02921 c->pix_sum = pix_sum_c;
02922 c->pix_norm1 = pix_norm1_c;
02923
02924 c->fill_block_tab[0] = fill_block16_c;
02925 c->fill_block_tab[1] = fill_block8_c;
02926
02927
02928 c->pix_abs[0][0] = pix_abs16_c;
02929 c->pix_abs[0][1] = pix_abs16_x2_c;
02930 c->pix_abs[0][2] = pix_abs16_y2_c;
02931 c->pix_abs[0][3] = pix_abs16_xy2_c;
02932 c->pix_abs[1][0] = pix_abs8_c;
02933 c->pix_abs[1][1] = pix_abs8_x2_c;
02934 c->pix_abs[1][2] = pix_abs8_y2_c;
02935 c->pix_abs[1][3] = pix_abs8_xy2_c;
02936
02937 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02938 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02939 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02940 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02941 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02942 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02943 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02944 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02945 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02946
02947 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02948 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02949 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02950 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02951 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02952 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02953 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02954 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02955 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02956
02957 #define dspfunc(PFX, IDX, NUM) \
02958 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02959 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02960 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02961 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02962 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02963 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02964 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02965 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02966 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02967 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02968 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02969 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02970 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02971 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02972 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02973 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02974
02975 dspfunc(put_qpel, 0, 16);
02976 dspfunc(put_no_rnd_qpel, 0, 16);
02977
02978 dspfunc(avg_qpel, 0, 16);
02979
02980
02981 dspfunc(put_qpel, 1, 8);
02982 dspfunc(put_no_rnd_qpel, 1, 8);
02983
02984 dspfunc(avg_qpel, 1, 8);
02985
02986
02987 #undef dspfunc
02988
02989 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02990 ff_mlp_init(c, avctx);
02991 #endif
02992 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
02993 ff_intrax8dsp_init(c,avctx);
02994 #endif
02995
02996 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02997 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02998 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02999 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
03000 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
03001 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
03002 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
03003 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
03004
03005 #define SET_CMP_FUNC(name) \
03006 c->name[0]= name ## 16_c;\
03007 c->name[1]= name ## 8x8_c;
03008
03009 SET_CMP_FUNC(hadamard8_diff)
03010 c->hadamard8_diff[4]= hadamard8_intra16_c;
03011 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
03012 SET_CMP_FUNC(dct_sad)
03013 SET_CMP_FUNC(dct_max)
03014 #if CONFIG_GPL
03015 SET_CMP_FUNC(dct264_sad)
03016 #endif
03017 c->sad[0]= pix_abs16_c;
03018 c->sad[1]= pix_abs8_c;
03019 c->sse[0]= sse16_c;
03020 c->sse[1]= sse8_c;
03021 c->sse[2]= sse4_c;
03022 SET_CMP_FUNC(quant_psnr)
03023 SET_CMP_FUNC(rd)
03024 SET_CMP_FUNC(bit)
03025 c->vsad[0]= vsad16_c;
03026 c->vsad[4]= vsad_intra16_c;
03027 c->vsad[5]= vsad_intra8_c;
03028 c->vsse[0]= vsse16_c;
03029 c->vsse[4]= vsse_intra16_c;
03030 c->vsse[5]= vsse_intra8_c;
03031 c->nsse[0]= nsse16_c;
03032 c->nsse[1]= nsse8_c;
03033 #if CONFIG_DWT
03034 ff_dsputil_init_dwt(c);
03035 #endif
03036
03037 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03038
03039 c->add_bytes= add_bytes_c;
03040 c->diff_bytes= diff_bytes_c;
03041 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03042 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03043 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
03044 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03045 c->bswap_buf= bswap_buf;
03046 c->bswap16_buf = bswap16_buf;
03047
03048 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03049 c->h263_h_loop_filter= h263_h_loop_filter_c;
03050 c->h263_v_loop_filter= h263_v_loop_filter_c;
03051 }
03052
03053 if (CONFIG_VP3_DECODER) {
03054 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03055 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03056 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03057 }
03058
03059 c->h261_loop_filter= h261_loop_filter_c;
03060
03061 c->try_8x8basis= try_8x8basis_c;
03062 c->add_8x8basis= add_8x8basis_c;
03063
03064 #if CONFIG_VORBIS_DECODER
03065 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
03066 #endif
03067 #if CONFIG_AC3_DECODER
03068 c->ac3_downmix = ff_ac3_downmix_c;
03069 #endif
03070 c->vector_fmul = vector_fmul_c;
03071 c->vector_fmul_reverse = vector_fmul_reverse_c;
03072 c->vector_fmul_add = vector_fmul_add_c;
03073 c->vector_fmul_window = vector_fmul_window_c;
03074 c->vector_clipf = vector_clipf_c;
03075 c->scalarproduct_int16 = scalarproduct_int16_c;
03076 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03077 c->apply_window_int16 = apply_window_int16_c;
03078 c->vector_clip_int32 = vector_clip_int32_c;
03079 c->scalarproduct_float = scalarproduct_float_c;
03080 c->butterflies_float = butterflies_float_c;
03081 c->butterflies_float_interleave = butterflies_float_interleave_c;
03082 c->vector_fmul_scalar = vector_fmul_scalar_c;
03083 c->vector_fmac_scalar = vector_fmac_scalar_c;
03084
03085 c->shrink[0]= av_image_copy_plane;
03086 c->shrink[1]= ff_shrink22;
03087 c->shrink[2]= ff_shrink44;
03088 c->shrink[3]= ff_shrink88;
03089
03090 c->prefetch= just_return;
03091
03092 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03093 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03094
03095 #undef FUNC
03096 #undef FUNCC
03097 #define FUNC(f, depth) f ## _ ## depth
03098 #define FUNCC(f, depth) f ## _ ## depth ## _c
03099
03100 #define dspfunc1(PFX, IDX, NUM, depth)\
03101 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
03102 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03103 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03104 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03105
03106 #define dspfunc2(PFX, IDX, NUM, depth)\
03107 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03108 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03109 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03110 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03111 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03112 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03113 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03114 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03115 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03116 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03117 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03118 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03119 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03120 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03121 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03122 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03123
03124
03125 #define BIT_DEPTH_FUNCS(depth, dct)\
03126 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
03127 c->draw_edges = FUNCC(draw_edges , depth);\
03128 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
03129 c->clear_block = FUNCC(clear_block ## dct , depth);\
03130 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
03131 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
03132 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
03133 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
03134 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03135 \
03136 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
03137 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
03138 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
03139 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
03140 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
03141 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
03142 \
03143 dspfunc1(put , 0, 16, depth);\
03144 dspfunc1(put , 1, 8, depth);\
03145 dspfunc1(put , 2, 4, depth);\
03146 dspfunc1(put , 3, 2, depth);\
03147 dspfunc1(put_no_rnd, 0, 16, depth);\
03148 dspfunc1(put_no_rnd, 1, 8, depth);\
03149 dspfunc1(avg , 0, 16, depth);\
03150 dspfunc1(avg , 1, 8, depth);\
03151 dspfunc1(avg , 2, 4, depth);\
03152 dspfunc1(avg , 3, 2, depth);\
03153 dspfunc1(avg_no_rnd, 0, 16, depth);\
03154 dspfunc1(avg_no_rnd, 1, 8, depth);\
03155 \
03156 dspfunc2(put_h264_qpel, 0, 16, depth);\
03157 dspfunc2(put_h264_qpel, 1, 8, depth);\
03158 dspfunc2(put_h264_qpel, 2, 4, depth);\
03159 dspfunc2(put_h264_qpel, 3, 2, depth);\
03160 dspfunc2(avg_h264_qpel, 0, 16, depth);\
03161 dspfunc2(avg_h264_qpel, 1, 8, depth);\
03162 dspfunc2(avg_h264_qpel, 2, 4, depth);
03163
03164 switch (avctx->bits_per_raw_sample) {
03165 case 9:
03166 if (c->dct_bits == 32) {
03167 BIT_DEPTH_FUNCS(9, _32);
03168 } else {
03169 BIT_DEPTH_FUNCS(9, _16);
03170 }
03171 break;
03172 case 10:
03173 if (c->dct_bits == 32) {
03174 BIT_DEPTH_FUNCS(10, _32);
03175 } else {
03176 BIT_DEPTH_FUNCS(10, _16);
03177 }
03178 break;
03179 default:
03180 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
03181 case 8:
03182 BIT_DEPTH_FUNCS(8, _16);
03183 break;
03184 }
03185
03186
03187 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
03188 if (ARCH_ARM) dsputil_init_arm (c, avctx);
03189 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
03190 if (HAVE_VIS) dsputil_init_vis (c, avctx);
03191 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
03192 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
03193 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
03194 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
03195 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
03196
03197 for(i=0; i<64; i++){
03198 if(!c->put_2tap_qpel_pixels_tab[0][i])
03199 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
03200 if(!c->avg_2tap_qpel_pixels_tab[0][i])
03201 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
03202 }
03203
03204 ff_init_scantable_permutation(c->idct_permutation,
03205 c->idct_permutation_type);
03206 }