00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "avcodec.h"
00031 #include "dsputil.h"
00032 #include "simple_idct.h"
00033 #include "faandct.h"
00034 #include "faanidct.h"
00035 #include "mathops.h"
00036 #include "mpegvideo.h"
00037 #include "config.h"
00038 #include "lpc.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "png.h"
00042
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045
00046
00047 #define pb_7f (~0UL/255 * 0x7f)
00048 #define pb_80 (~0UL/255 * 0x80)
00049
00050 const uint8_t ff_zigzag_direct[64] = {
00051 0, 1, 8, 16, 9, 2, 3, 10,
00052 17, 24, 32, 25, 18, 11, 4, 5,
00053 12, 19, 26, 33, 40, 48, 41, 34,
00054 27, 20, 13, 6, 7, 14, 21, 28,
00055 35, 42, 49, 56, 57, 50, 43, 36,
00056 29, 22, 15, 23, 30, 37, 44, 51,
00057 58, 59, 52, 45, 38, 31, 39, 46,
00058 53, 60, 61, 54, 47, 55, 62, 63
00059 };
00060
00061
00062
00063 const uint8_t ff_zigzag248_direct[64] = {
00064 0, 8, 1, 9, 16, 24, 2, 10,
00065 17, 25, 32, 40, 48, 56, 33, 41,
00066 18, 26, 3, 11, 4, 12, 19, 27,
00067 34, 42, 49, 57, 50, 58, 35, 43,
00068 20, 28, 5, 13, 6, 14, 21, 29,
00069 36, 44, 51, 59, 52, 60, 37, 45,
00070 22, 30, 7, 15, 23, 31, 38, 46,
00071 53, 61, 54, 62, 39, 47, 55, 63,
00072 };
00073
00074
00075 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00076
00077 const uint8_t ff_alternate_horizontal_scan[64] = {
00078 0, 1, 2, 3, 8, 9, 16, 17,
00079 10, 11, 4, 5, 6, 7, 15, 14,
00080 13, 12, 19, 18, 24, 25, 32, 33,
00081 26, 27, 20, 21, 22, 23, 28, 29,
00082 30, 31, 34, 35, 40, 41, 48, 49,
00083 42, 43, 36, 37, 38, 39, 44, 45,
00084 46, 47, 50, 51, 56, 57, 58, 59,
00085 52, 53, 54, 55, 60, 61, 62, 63,
00086 };
00087
00088 const uint8_t ff_alternate_vertical_scan[64] = {
00089 0, 8, 16, 24, 1, 9, 2, 10,
00090 17, 25, 32, 40, 48, 56, 57, 49,
00091 41, 33, 26, 18, 3, 11, 4, 12,
00092 19, 27, 34, 42, 50, 58, 35, 43,
00093 51, 59, 20, 28, 5, 13, 6, 14,
00094 21, 29, 36, 44, 52, 60, 37, 45,
00095 53, 61, 22, 30, 7, 15, 23, 31,
00096 38, 46, 54, 62, 39, 47, 55, 63,
00097 };
00098
00099
00100
00101 const uint32_t ff_inverse[257]={
00102 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
00103 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
00104 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
00105 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
00106 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
00107 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
00108 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
00109 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
00110 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
00111 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
00112 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
00113 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
00114 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
00115 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
00116 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
00117 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
00118 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
00119 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
00120 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
00121 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
00122 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
00123 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
00124 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
00125 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
00126 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
00127 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
00128 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
00129 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
00130 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
00131 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
00132 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
00133 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
00134 16777216
00135 };
00136
00137
00138 static const uint8_t simple_mmx_permutation[64]={
00139 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00140 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00141 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00142 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00143 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00144 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00145 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00146 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00147 };
00148
00149 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00150
00151 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00152 int i;
00153 int end;
00154
00155 st->scantable= src_scantable;
00156
00157 for(i=0; i<64; i++){
00158 int j;
00159 j = src_scantable[i];
00160 st->permutated[i] = permutation[j];
00161 #if ARCH_PPC
00162 st->inverse[j] = i;
00163 #endif
00164 }
00165
00166 end=-1;
00167 for(i=0; i<64; i++){
00168 int j;
00169 j = st->permutated[i];
00170 if(j>end) end=j;
00171 st->raster_end[i]= end;
00172 }
00173 }
00174
00175 static int pix_sum_c(uint8_t * pix, int line_size)
00176 {
00177 int s, i, j;
00178
00179 s = 0;
00180 for (i = 0; i < 16; i++) {
00181 for (j = 0; j < 16; j += 8) {
00182 s += pix[0];
00183 s += pix[1];
00184 s += pix[2];
00185 s += pix[3];
00186 s += pix[4];
00187 s += pix[5];
00188 s += pix[6];
00189 s += pix[7];
00190 pix += 8;
00191 }
00192 pix += line_size - 16;
00193 }
00194 return s;
00195 }
00196
00197 static int pix_norm1_c(uint8_t * pix, int line_size)
00198 {
00199 int s, i, j;
00200 uint32_t *sq = ff_squareTbl + 256;
00201
00202 s = 0;
00203 for (i = 0; i < 16; i++) {
00204 for (j = 0; j < 16; j += 8) {
00205 #if 0
00206 s += sq[pix[0]];
00207 s += sq[pix[1]];
00208 s += sq[pix[2]];
00209 s += sq[pix[3]];
00210 s += sq[pix[4]];
00211 s += sq[pix[5]];
00212 s += sq[pix[6]];
00213 s += sq[pix[7]];
00214 #else
00215 #if LONG_MAX > 2147483647
00216 register uint64_t x=*(uint64_t*)pix;
00217 s += sq[x&0xff];
00218 s += sq[(x>>8)&0xff];
00219 s += sq[(x>>16)&0xff];
00220 s += sq[(x>>24)&0xff];
00221 s += sq[(x>>32)&0xff];
00222 s += sq[(x>>40)&0xff];
00223 s += sq[(x>>48)&0xff];
00224 s += sq[(x>>56)&0xff];
00225 #else
00226 register uint32_t x=*(uint32_t*)pix;
00227 s += sq[x&0xff];
00228 s += sq[(x>>8)&0xff];
00229 s += sq[(x>>16)&0xff];
00230 s += sq[(x>>24)&0xff];
00231 x=*(uint32_t*)(pix+4);
00232 s += sq[x&0xff];
00233 s += sq[(x>>8)&0xff];
00234 s += sq[(x>>16)&0xff];
00235 s += sq[(x>>24)&0xff];
00236 #endif
00237 #endif
00238 pix += 8;
00239 }
00240 pix += line_size - 16;
00241 }
00242 return s;
00243 }
00244
00245 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00246 int i;
00247
00248 for(i=0; i+8<=w; i+=8){
00249 dst[i+0]= bswap_32(src[i+0]);
00250 dst[i+1]= bswap_32(src[i+1]);
00251 dst[i+2]= bswap_32(src[i+2]);
00252 dst[i+3]= bswap_32(src[i+3]);
00253 dst[i+4]= bswap_32(src[i+4]);
00254 dst[i+5]= bswap_32(src[i+5]);
00255 dst[i+6]= bswap_32(src[i+6]);
00256 dst[i+7]= bswap_32(src[i+7]);
00257 }
00258 for(;i<w; i++){
00259 dst[i+0]= bswap_32(src[i+0]);
00260 }
00261 }
00262
00263 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00264 {
00265 int s, i;
00266 uint32_t *sq = ff_squareTbl + 256;
00267
00268 s = 0;
00269 for (i = 0; i < h; i++) {
00270 s += sq[pix1[0] - pix2[0]];
00271 s += sq[pix1[1] - pix2[1]];
00272 s += sq[pix1[2] - pix2[2]];
00273 s += sq[pix1[3] - pix2[3]];
00274 pix1 += line_size;
00275 pix2 += line_size;
00276 }
00277 return s;
00278 }
00279
00280 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00281 {
00282 int s, i;
00283 uint32_t *sq = ff_squareTbl + 256;
00284
00285 s = 0;
00286 for (i = 0; i < h; i++) {
00287 s += sq[pix1[0] - pix2[0]];
00288 s += sq[pix1[1] - pix2[1]];
00289 s += sq[pix1[2] - pix2[2]];
00290 s += sq[pix1[3] - pix2[3]];
00291 s += sq[pix1[4] - pix2[4]];
00292 s += sq[pix1[5] - pix2[5]];
00293 s += sq[pix1[6] - pix2[6]];
00294 s += sq[pix1[7] - pix2[7]];
00295 pix1 += line_size;
00296 pix2 += line_size;
00297 }
00298 return s;
00299 }
00300
00301 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00302 {
00303 int s, i;
00304 uint32_t *sq = ff_squareTbl + 256;
00305
00306 s = 0;
00307 for (i = 0; i < h; i++) {
00308 s += sq[pix1[ 0] - pix2[ 0]];
00309 s += sq[pix1[ 1] - pix2[ 1]];
00310 s += sq[pix1[ 2] - pix2[ 2]];
00311 s += sq[pix1[ 3] - pix2[ 3]];
00312 s += sq[pix1[ 4] - pix2[ 4]];
00313 s += sq[pix1[ 5] - pix2[ 5]];
00314 s += sq[pix1[ 6] - pix2[ 6]];
00315 s += sq[pix1[ 7] - pix2[ 7]];
00316 s += sq[pix1[ 8] - pix2[ 8]];
00317 s += sq[pix1[ 9] - pix2[ 9]];
00318 s += sq[pix1[10] - pix2[10]];
00319 s += sq[pix1[11] - pix2[11]];
00320 s += sq[pix1[12] - pix2[12]];
00321 s += sq[pix1[13] - pix2[13]];
00322 s += sq[pix1[14] - pix2[14]];
00323 s += sq[pix1[15] - pix2[15]];
00324
00325 pix1 += line_size;
00326 pix2 += line_size;
00327 }
00328 return s;
00329 }
00330
00331
00332
00333 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
00334 {
00335 uint8_t *ptr, *last_line;
00336 int i;
00337
00338 last_line = buf + (height - 1) * wrap;
00339 for(i=0;i<w;i++) {
00340
00341 memcpy(buf - (i + 1) * wrap, buf, width);
00342 memcpy(last_line + (i + 1) * wrap, last_line, width);
00343 }
00344
00345 ptr = buf;
00346 for(i=0;i<height;i++) {
00347 memset(ptr - w, ptr[0], w);
00348 memset(ptr + width, ptr[width-1], w);
00349 ptr += wrap;
00350 }
00351
00352 for(i=0;i<w;i++) {
00353 memset(buf - (i + 1) * wrap - w, buf[0], w);
00354 memset(buf - (i + 1) * wrap + width, buf[width-1], w);
00355 memset(last_line + (i + 1) * wrap - w, last_line[0], w);
00356 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w);
00357 }
00358 }
00359
00372 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
00373 int src_x, int src_y, int w, int h){
00374 int x, y;
00375 int start_y, start_x, end_y, end_x;
00376
00377 if(src_y>= h){
00378 src+= (h-1-src_y)*linesize;
00379 src_y=h-1;
00380 }else if(src_y<=-block_h){
00381 src+= (1-block_h-src_y)*linesize;
00382 src_y=1-block_h;
00383 }
00384 if(src_x>= w){
00385 src+= (w-1-src_x);
00386 src_x=w-1;
00387 }else if(src_x<=-block_w){
00388 src+= (1-block_w-src_x);
00389 src_x=1-block_w;
00390 }
00391
00392 start_y= FFMAX(0, -src_y);
00393 start_x= FFMAX(0, -src_x);
00394 end_y= FFMIN(block_h, h-src_y);
00395 end_x= FFMIN(block_w, w-src_x);
00396
00397
00398 for(y=start_y; y<end_y; y++){
00399 for(x=start_x; x<end_x; x++){
00400 buf[x + y*linesize]= src[x + y*linesize];
00401 }
00402 }
00403
00404
00405 for(y=0; y<start_y; y++){
00406 for(x=start_x; x<end_x; x++){
00407 buf[x + y*linesize]= buf[x + start_y*linesize];
00408 }
00409 }
00410
00411
00412 for(y=end_y; y<block_h; y++){
00413 for(x=start_x; x<end_x; x++){
00414 buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
00415 }
00416 }
00417
00418 for(y=0; y<block_h; y++){
00419
00420 for(x=0; x<start_x; x++){
00421 buf[x + y*linesize]= buf[start_x + y*linesize];
00422 }
00423
00424
00425 for(x=end_x; x<block_w; x++){
00426 buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
00427 }
00428 }
00429 }
00430
00431 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00432 {
00433 int i;
00434
00435
00436 for(i=0;i<8;i++) {
00437 block[0] = pixels[0];
00438 block[1] = pixels[1];
00439 block[2] = pixels[2];
00440 block[3] = pixels[3];
00441 block[4] = pixels[4];
00442 block[5] = pixels[5];
00443 block[6] = pixels[6];
00444 block[7] = pixels[7];
00445 pixels += line_size;
00446 block += 8;
00447 }
00448 }
00449
00450 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00451 const uint8_t *s2, int stride){
00452 int i;
00453
00454
00455 for(i=0;i<8;i++) {
00456 block[0] = s1[0] - s2[0];
00457 block[1] = s1[1] - s2[1];
00458 block[2] = s1[2] - s2[2];
00459 block[3] = s1[3] - s2[3];
00460 block[4] = s1[4] - s2[4];
00461 block[5] = s1[5] - s2[5];
00462 block[6] = s1[6] - s2[6];
00463 block[7] = s1[7] - s2[7];
00464 s1 += stride;
00465 s2 += stride;
00466 block += 8;
00467 }
00468 }
00469
00470
00471 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00472 int line_size)
00473 {
00474 int i;
00475 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00476
00477
00478 for(i=0;i<8;i++) {
00479 pixels[0] = cm[block[0]];
00480 pixels[1] = cm[block[1]];
00481 pixels[2] = cm[block[2]];
00482 pixels[3] = cm[block[3]];
00483 pixels[4] = cm[block[4]];
00484 pixels[5] = cm[block[5]];
00485 pixels[6] = cm[block[6]];
00486 pixels[7] = cm[block[7]];
00487
00488 pixels += line_size;
00489 block += 8;
00490 }
00491 }
00492
00493 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00494 int line_size)
00495 {
00496 int i;
00497 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00498
00499
00500 for(i=0;i<4;i++) {
00501 pixels[0] = cm[block[0]];
00502 pixels[1] = cm[block[1]];
00503 pixels[2] = cm[block[2]];
00504 pixels[3] = cm[block[3]];
00505
00506 pixels += line_size;
00507 block += 8;
00508 }
00509 }
00510
00511 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00512 int line_size)
00513 {
00514 int i;
00515 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00516
00517
00518 for(i=0;i<2;i++) {
00519 pixels[0] = cm[block[0]];
00520 pixels[1] = cm[block[1]];
00521
00522 pixels += line_size;
00523 block += 8;
00524 }
00525 }
00526
00527 static void put_signed_pixels_clamped_c(const DCTELEM *block,
00528 uint8_t *restrict pixels,
00529 int line_size)
00530 {
00531 int i, j;
00532
00533 for (i = 0; i < 8; i++) {
00534 for (j = 0; j < 8; j++) {
00535 if (*block < -128)
00536 *pixels = 0;
00537 else if (*block > 127)
00538 *pixels = 255;
00539 else
00540 *pixels = (uint8_t)(*block + 128);
00541 block++;
00542 pixels++;
00543 }
00544 pixels += (line_size - 8);
00545 }
00546 }
00547
00548 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00549 int line_size)
00550 {
00551 int i;
00552
00553
00554 for(i=0;i<8;i++) {
00555 pixels[0] = block[0];
00556 pixels[1] = block[1];
00557 pixels[2] = block[2];
00558 pixels[3] = block[3];
00559 pixels[4] = block[4];
00560 pixels[5] = block[5];
00561 pixels[6] = block[6];
00562 pixels[7] = block[7];
00563
00564 pixels += line_size;
00565 block += 8;
00566 }
00567 }
00568
00569 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00570 int line_size)
00571 {
00572 int i;
00573 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00574
00575
00576 for(i=0;i<8;i++) {
00577 pixels[0] = cm[pixels[0] + block[0]];
00578 pixels[1] = cm[pixels[1] + block[1]];
00579 pixels[2] = cm[pixels[2] + block[2]];
00580 pixels[3] = cm[pixels[3] + block[3]];
00581 pixels[4] = cm[pixels[4] + block[4]];
00582 pixels[5] = cm[pixels[5] + block[5]];
00583 pixels[6] = cm[pixels[6] + block[6]];
00584 pixels[7] = cm[pixels[7] + block[7]];
00585 pixels += line_size;
00586 block += 8;
00587 }
00588 }
00589
00590 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00591 int line_size)
00592 {
00593 int i;
00594 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00595
00596
00597 for(i=0;i<4;i++) {
00598 pixels[0] = cm[pixels[0] + block[0]];
00599 pixels[1] = cm[pixels[1] + block[1]];
00600 pixels[2] = cm[pixels[2] + block[2]];
00601 pixels[3] = cm[pixels[3] + block[3]];
00602 pixels += line_size;
00603 block += 8;
00604 }
00605 }
00606
00607 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00608 int line_size)
00609 {
00610 int i;
00611 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00612
00613
00614 for(i=0;i<2;i++) {
00615 pixels[0] = cm[pixels[0] + block[0]];
00616 pixels[1] = cm[pixels[1] + block[1]];
00617 pixels += line_size;
00618 block += 8;
00619 }
00620 }
00621
00622 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00623 {
00624 int i;
00625 for(i=0;i<8;i++) {
00626 pixels[0] += block[0];
00627 pixels[1] += block[1];
00628 pixels[2] += block[2];
00629 pixels[3] += block[3];
00630 pixels[4] += block[4];
00631 pixels[5] += block[5];
00632 pixels[6] += block[6];
00633 pixels[7] += block[7];
00634 pixels += line_size;
00635 block += 8;
00636 }
00637 }
00638
00639 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00640 {
00641 int i;
00642 for(i=0;i<4;i++) {
00643 pixels[0] += block[0];
00644 pixels[1] += block[1];
00645 pixels[2] += block[2];
00646 pixels[3] += block[3];
00647 pixels += line_size;
00648 block += 4;
00649 }
00650 }
00651
00652 static int sum_abs_dctelem_c(DCTELEM *block)
00653 {
00654 int sum=0, i;
00655 for(i=0; i<64; i++)
00656 sum+= FFABS(block[i]);
00657 return sum;
00658 }
00659
00660 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00661 {
00662 int i;
00663
00664 for (i = 0; i < h; i++) {
00665 memset(block, value, 16);
00666 block += line_size;
00667 }
00668 }
00669
00670 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00671 {
00672 int i;
00673
00674 for (i = 0; i < h; i++) {
00675 memset(block, value, 8);
00676 block += line_size;
00677 }
00678 }
00679
00680 static void scale_block_c(const uint8_t src[64], uint8_t *dst, int linesize)
00681 {
00682 int i, j;
00683 uint16_t *dst1 = (uint16_t *) dst;
00684 uint16_t *dst2 = (uint16_t *)(dst + linesize);
00685
00686 for (j = 0; j < 8; j++) {
00687 for (i = 0; i < 8; i++) {
00688 dst1[i] = dst2[i] = src[i] * 0x0101;
00689 }
00690 src += 8;
00691 dst1 += linesize;
00692 dst2 += linesize;
00693 }
00694 }
00695
00696 #if 0
00697
00698 #define PIXOP2(OPNAME, OP) \
00699 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00700 {\
00701 int i;\
00702 for(i=0; i<h; i++){\
00703 OP(*((uint64_t*)block), AV_RN64(pixels));\
00704 pixels+=line_size;\
00705 block +=line_size;\
00706 }\
00707 }\
00708 \
00709 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00710 {\
00711 int i;\
00712 for(i=0; i<h; i++){\
00713 const uint64_t a= AV_RN64(pixels );\
00714 const uint64_t b= AV_RN64(pixels+1);\
00715 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00716 pixels+=line_size;\
00717 block +=line_size;\
00718 }\
00719 }\
00720 \
00721 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00722 {\
00723 int i;\
00724 for(i=0; i<h; i++){\
00725 const uint64_t a= AV_RN64(pixels );\
00726 const uint64_t b= AV_RN64(pixels+1);\
00727 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00728 pixels+=line_size;\
00729 block +=line_size;\
00730 }\
00731 }\
00732 \
00733 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00734 {\
00735 int i;\
00736 for(i=0; i<h; i++){\
00737 const uint64_t a= AV_RN64(pixels );\
00738 const uint64_t b= AV_RN64(pixels+line_size);\
00739 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00740 pixels+=line_size;\
00741 block +=line_size;\
00742 }\
00743 }\
00744 \
00745 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00746 {\
00747 int i;\
00748 for(i=0; i<h; i++){\
00749 const uint64_t a= AV_RN64(pixels );\
00750 const uint64_t b= AV_RN64(pixels+line_size);\
00751 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00752 pixels+=line_size;\
00753 block +=line_size;\
00754 }\
00755 }\
00756 \
00757 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00758 {\
00759 int i;\
00760 const uint64_t a= AV_RN64(pixels );\
00761 const uint64_t b= AV_RN64(pixels+1);\
00762 uint64_t l0= (a&0x0303030303030303ULL)\
00763 + (b&0x0303030303030303ULL)\
00764 + 0x0202020202020202ULL;\
00765 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00766 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00767 uint64_t l1,h1;\
00768 \
00769 pixels+=line_size;\
00770 for(i=0; i<h; i+=2){\
00771 uint64_t a= AV_RN64(pixels );\
00772 uint64_t b= AV_RN64(pixels+1);\
00773 l1= (a&0x0303030303030303ULL)\
00774 + (b&0x0303030303030303ULL);\
00775 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00776 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00777 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00778 pixels+=line_size;\
00779 block +=line_size;\
00780 a= AV_RN64(pixels );\
00781 b= AV_RN64(pixels+1);\
00782 l0= (a&0x0303030303030303ULL)\
00783 + (b&0x0303030303030303ULL)\
00784 + 0x0202020202020202ULL;\
00785 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00786 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00787 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00788 pixels+=line_size;\
00789 block +=line_size;\
00790 }\
00791 }\
00792 \
00793 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00794 {\
00795 int i;\
00796 const uint64_t a= AV_RN64(pixels );\
00797 const uint64_t b= AV_RN64(pixels+1);\
00798 uint64_t l0= (a&0x0303030303030303ULL)\
00799 + (b&0x0303030303030303ULL)\
00800 + 0x0101010101010101ULL;\
00801 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00802 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00803 uint64_t l1,h1;\
00804 \
00805 pixels+=line_size;\
00806 for(i=0; i<h; i+=2){\
00807 uint64_t a= AV_RN64(pixels );\
00808 uint64_t b= AV_RN64(pixels+1);\
00809 l1= (a&0x0303030303030303ULL)\
00810 + (b&0x0303030303030303ULL);\
00811 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00812 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00813 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00814 pixels+=line_size;\
00815 block +=line_size;\
00816 a= AV_RN64(pixels );\
00817 b= AV_RN64(pixels+1);\
00818 l0= (a&0x0303030303030303ULL)\
00819 + (b&0x0303030303030303ULL)\
00820 + 0x0101010101010101ULL;\
00821 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00822 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00823 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00824 pixels+=line_size;\
00825 block +=line_size;\
00826 }\
00827 }\
00828 \
00829 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
00830 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
00831 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
00832 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
00833 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
00834 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
00835 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
00836
00837 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
00838 #else // 64 bit variant
00839
00840 #define PIXOP2(OPNAME, OP) \
00841 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00842 int i;\
00843 for(i=0; i<h; i++){\
00844 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
00845 pixels+=line_size;\
00846 block +=line_size;\
00847 }\
00848 }\
00849 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00850 int i;\
00851 for(i=0; i<h; i++){\
00852 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00853 pixels+=line_size;\
00854 block +=line_size;\
00855 }\
00856 }\
00857 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00858 int i;\
00859 for(i=0; i<h; i++){\
00860 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00861 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
00862 pixels+=line_size;\
00863 block +=line_size;\
00864 }\
00865 }\
00866 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00867 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
00868 }\
00869 \
00870 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00871 int src_stride1, int src_stride2, int h){\
00872 int i;\
00873 for(i=0; i<h; i++){\
00874 uint32_t a,b;\
00875 a= AV_RN32(&src1[i*src_stride1 ]);\
00876 b= AV_RN32(&src2[i*src_stride2 ]);\
00877 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
00878 a= AV_RN32(&src1[i*src_stride1+4]);\
00879 b= AV_RN32(&src2[i*src_stride2+4]);\
00880 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
00881 }\
00882 }\
00883 \
00884 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00885 int src_stride1, int src_stride2, int h){\
00886 int i;\
00887 for(i=0; i<h; i++){\
00888 uint32_t a,b;\
00889 a= AV_RN32(&src1[i*src_stride1 ]);\
00890 b= AV_RN32(&src2[i*src_stride2 ]);\
00891 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00892 a= AV_RN32(&src1[i*src_stride1+4]);\
00893 b= AV_RN32(&src2[i*src_stride2+4]);\
00894 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
00895 }\
00896 }\
00897 \
00898 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00899 int src_stride1, int src_stride2, int h){\
00900 int i;\
00901 for(i=0; i<h; i++){\
00902 uint32_t a,b;\
00903 a= AV_RN32(&src1[i*src_stride1 ]);\
00904 b= AV_RN32(&src2[i*src_stride2 ]);\
00905 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00906 }\
00907 }\
00908 \
00909 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00910 int src_stride1, int src_stride2, int h){\
00911 int i;\
00912 for(i=0; i<h; i++){\
00913 uint32_t a,b;\
00914 a= AV_RN16(&src1[i*src_stride1 ]);\
00915 b= AV_RN16(&src2[i*src_stride2 ]);\
00916 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00917 }\
00918 }\
00919 \
00920 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00921 int src_stride1, int src_stride2, int h){\
00922 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00923 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00924 }\
00925 \
00926 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00927 int src_stride1, int src_stride2, int h){\
00928 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00929 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00930 }\
00931 \
00932 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00933 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00934 }\
00935 \
00936 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00937 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00938 }\
00939 \
00940 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00941 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00942 }\
00943 \
00944 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00945 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00946 }\
00947 \
00948 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00949 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00950 int i;\
00951 for(i=0; i<h; i++){\
00952 uint32_t a, b, c, d, l0, l1, h0, h1;\
00953 a= AV_RN32(&src1[i*src_stride1]);\
00954 b= AV_RN32(&src2[i*src_stride2]);\
00955 c= AV_RN32(&src3[i*src_stride3]);\
00956 d= AV_RN32(&src4[i*src_stride4]);\
00957 l0= (a&0x03030303UL)\
00958 + (b&0x03030303UL)\
00959 + 0x02020202UL;\
00960 h0= ((a&0xFCFCFCFCUL)>>2)\
00961 + ((b&0xFCFCFCFCUL)>>2);\
00962 l1= (c&0x03030303UL)\
00963 + (d&0x03030303UL);\
00964 h1= ((c&0xFCFCFCFCUL)>>2)\
00965 + ((d&0xFCFCFCFCUL)>>2);\
00966 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00967 a= AV_RN32(&src1[i*src_stride1+4]);\
00968 b= AV_RN32(&src2[i*src_stride2+4]);\
00969 c= AV_RN32(&src3[i*src_stride3+4]);\
00970 d= AV_RN32(&src4[i*src_stride4+4]);\
00971 l0= (a&0x03030303UL)\
00972 + (b&0x03030303UL)\
00973 + 0x02020202UL;\
00974 h0= ((a&0xFCFCFCFCUL)>>2)\
00975 + ((b&0xFCFCFCFCUL)>>2);\
00976 l1= (c&0x03030303UL)\
00977 + (d&0x03030303UL);\
00978 h1= ((c&0xFCFCFCFCUL)>>2)\
00979 + ((d&0xFCFCFCFCUL)>>2);\
00980 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00981 }\
00982 }\
00983 \
00984 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00985 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00986 }\
00987 \
00988 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00989 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00990 }\
00991 \
00992 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00993 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00994 }\
00995 \
00996 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00997 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00998 }\
00999 \
01000 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
01001 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01002 int i;\
01003 for(i=0; i<h; i++){\
01004 uint32_t a, b, c, d, l0, l1, h0, h1;\
01005 a= AV_RN32(&src1[i*src_stride1]);\
01006 b= AV_RN32(&src2[i*src_stride2]);\
01007 c= AV_RN32(&src3[i*src_stride3]);\
01008 d= AV_RN32(&src4[i*src_stride4]);\
01009 l0= (a&0x03030303UL)\
01010 + (b&0x03030303UL)\
01011 + 0x01010101UL;\
01012 h0= ((a&0xFCFCFCFCUL)>>2)\
01013 + ((b&0xFCFCFCFCUL)>>2);\
01014 l1= (c&0x03030303UL)\
01015 + (d&0x03030303UL);\
01016 h1= ((c&0xFCFCFCFCUL)>>2)\
01017 + ((d&0xFCFCFCFCUL)>>2);\
01018 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01019 a= AV_RN32(&src1[i*src_stride1+4]);\
01020 b= AV_RN32(&src2[i*src_stride2+4]);\
01021 c= AV_RN32(&src3[i*src_stride3+4]);\
01022 d= AV_RN32(&src4[i*src_stride4+4]);\
01023 l0= (a&0x03030303UL)\
01024 + (b&0x03030303UL)\
01025 + 0x01010101UL;\
01026 h0= ((a&0xFCFCFCFCUL)>>2)\
01027 + ((b&0xFCFCFCFCUL)>>2);\
01028 l1= (c&0x03030303UL)\
01029 + (d&0x03030303UL);\
01030 h1= ((c&0xFCFCFCFCUL)>>2)\
01031 + ((d&0xFCFCFCFCUL)>>2);\
01032 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01033 }\
01034 }\
01035 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
01036 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01037 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01038 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01039 }\
01040 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
01041 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01042 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01043 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01044 }\
01045 \
01046 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01047 {\
01048 int i, a0, b0, a1, b1;\
01049 a0= pixels[0];\
01050 b0= pixels[1] + 2;\
01051 a0 += b0;\
01052 b0 += pixels[2];\
01053 \
01054 pixels+=line_size;\
01055 for(i=0; i<h; i+=2){\
01056 a1= pixels[0];\
01057 b1= pixels[1];\
01058 a1 += b1;\
01059 b1 += pixels[2];\
01060 \
01061 block[0]= (a1+a0)>>2; \
01062 block[1]= (b1+b0)>>2;\
01063 \
01064 pixels+=line_size;\
01065 block +=line_size;\
01066 \
01067 a0= pixels[0];\
01068 b0= pixels[1] + 2;\
01069 a0 += b0;\
01070 b0 += pixels[2];\
01071 \
01072 block[0]= (a1+a0)>>2;\
01073 block[1]= (b1+b0)>>2;\
01074 pixels+=line_size;\
01075 block +=line_size;\
01076 }\
01077 }\
01078 \
01079 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01080 {\
01081 int i;\
01082 const uint32_t a= AV_RN32(pixels );\
01083 const uint32_t b= AV_RN32(pixels+1);\
01084 uint32_t l0= (a&0x03030303UL)\
01085 + (b&0x03030303UL)\
01086 + 0x02020202UL;\
01087 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01088 + ((b&0xFCFCFCFCUL)>>2);\
01089 uint32_t l1,h1;\
01090 \
01091 pixels+=line_size;\
01092 for(i=0; i<h; i+=2){\
01093 uint32_t a= AV_RN32(pixels );\
01094 uint32_t b= AV_RN32(pixels+1);\
01095 l1= (a&0x03030303UL)\
01096 + (b&0x03030303UL);\
01097 h1= ((a&0xFCFCFCFCUL)>>2)\
01098 + ((b&0xFCFCFCFCUL)>>2);\
01099 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01100 pixels+=line_size;\
01101 block +=line_size;\
01102 a= AV_RN32(pixels );\
01103 b= AV_RN32(pixels+1);\
01104 l0= (a&0x03030303UL)\
01105 + (b&0x03030303UL)\
01106 + 0x02020202UL;\
01107 h0= ((a&0xFCFCFCFCUL)>>2)\
01108 + ((b&0xFCFCFCFCUL)>>2);\
01109 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01110 pixels+=line_size;\
01111 block +=line_size;\
01112 }\
01113 }\
01114 \
01115 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01116 {\
01117 int j;\
01118 for(j=0; j<2; j++){\
01119 int i;\
01120 const uint32_t a= AV_RN32(pixels );\
01121 const uint32_t b= AV_RN32(pixels+1);\
01122 uint32_t l0= (a&0x03030303UL)\
01123 + (b&0x03030303UL)\
01124 + 0x02020202UL;\
01125 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01126 + ((b&0xFCFCFCFCUL)>>2);\
01127 uint32_t l1,h1;\
01128 \
01129 pixels+=line_size;\
01130 for(i=0; i<h; i+=2){\
01131 uint32_t a= AV_RN32(pixels );\
01132 uint32_t b= AV_RN32(pixels+1);\
01133 l1= (a&0x03030303UL)\
01134 + (b&0x03030303UL);\
01135 h1= ((a&0xFCFCFCFCUL)>>2)\
01136 + ((b&0xFCFCFCFCUL)>>2);\
01137 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01138 pixels+=line_size;\
01139 block +=line_size;\
01140 a= AV_RN32(pixels );\
01141 b= AV_RN32(pixels+1);\
01142 l0= (a&0x03030303UL)\
01143 + (b&0x03030303UL)\
01144 + 0x02020202UL;\
01145 h0= ((a&0xFCFCFCFCUL)>>2)\
01146 + ((b&0xFCFCFCFCUL)>>2);\
01147 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01148 pixels+=line_size;\
01149 block +=line_size;\
01150 }\
01151 pixels+=4-line_size*(h+1);\
01152 block +=4-line_size*h;\
01153 }\
01154 }\
01155 \
01156 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01157 {\
01158 int j;\
01159 for(j=0; j<2; j++){\
01160 int i;\
01161 const uint32_t a= AV_RN32(pixels );\
01162 const uint32_t b= AV_RN32(pixels+1);\
01163 uint32_t l0= (a&0x03030303UL)\
01164 + (b&0x03030303UL)\
01165 + 0x01010101UL;\
01166 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01167 + ((b&0xFCFCFCFCUL)>>2);\
01168 uint32_t l1,h1;\
01169 \
01170 pixels+=line_size;\
01171 for(i=0; i<h; i+=2){\
01172 uint32_t a= AV_RN32(pixels );\
01173 uint32_t b= AV_RN32(pixels+1);\
01174 l1= (a&0x03030303UL)\
01175 + (b&0x03030303UL);\
01176 h1= ((a&0xFCFCFCFCUL)>>2)\
01177 + ((b&0xFCFCFCFCUL)>>2);\
01178 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01179 pixels+=line_size;\
01180 block +=line_size;\
01181 a= AV_RN32(pixels );\
01182 b= AV_RN32(pixels+1);\
01183 l0= (a&0x03030303UL)\
01184 + (b&0x03030303UL)\
01185 + 0x01010101UL;\
01186 h0= ((a&0xFCFCFCFCUL)>>2)\
01187 + ((b&0xFCFCFCFCUL)>>2);\
01188 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01189 pixels+=line_size;\
01190 block +=line_size;\
01191 }\
01192 pixels+=4-line_size*(h+1);\
01193 block +=4-line_size*h;\
01194 }\
01195 }\
01196 \
01197 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
01198 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
01199 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
01200 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
01201 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
01202 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
01203 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
01204 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
01205
01206 #define op_avg(a, b) a = rnd_avg32(a, b)
01207 #endif
01208 #define op_put(a, b) a = b
01209
01210 PIXOP2(avg, op_avg)
01211 PIXOP2(put, op_put)
01212 #undef op_avg
01213 #undef op_put
01214
01215 #define avg2(a,b) ((a+b+1)>>1)
01216 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
01217
01218 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01219 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
01220 }
01221
01222 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01223 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
01224 }
01225
01226 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
01227 {
01228 const int A=(16-x16)*(16-y16);
01229 const int B=( x16)*(16-y16);
01230 const int C=(16-x16)*( y16);
01231 const int D=( x16)*( y16);
01232 int i;
01233
01234 for(i=0; i<h; i++)
01235 {
01236 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
01237 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
01238 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
01239 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
01240 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
01241 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
01242 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
01243 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
01244 dst+= stride;
01245 src+= stride;
01246 }
01247 }
01248
01249 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
01250 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
01251 {
01252 int y, vx, vy;
01253 const int s= 1<<shift;
01254
01255 width--;
01256 height--;
01257
01258 for(y=0; y<h; y++){
01259 int x;
01260
01261 vx= ox;
01262 vy= oy;
01263 for(x=0; x<8; x++){
01264 int src_x, src_y, frac_x, frac_y, index;
01265
01266 src_x= vx>>16;
01267 src_y= vy>>16;
01268 frac_x= src_x&(s-1);
01269 frac_y= src_y&(s-1);
01270 src_x>>=shift;
01271 src_y>>=shift;
01272
01273 if((unsigned)src_x < width){
01274 if((unsigned)src_y < height){
01275 index= src_x + src_y*stride;
01276 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01277 + src[index +1]* frac_x )*(s-frac_y)
01278 + ( src[index+stride ]*(s-frac_x)
01279 + src[index+stride+1]* frac_x )* frac_y
01280 + r)>>(shift*2);
01281 }else{
01282 index= src_x + av_clip(src_y, 0, height)*stride;
01283 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01284 + src[index +1]* frac_x )*s
01285 + r)>>(shift*2);
01286 }
01287 }else{
01288 if((unsigned)src_y < height){
01289 index= av_clip(src_x, 0, width) + src_y*stride;
01290 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
01291 + src[index+stride ]* frac_y )*s
01292 + r)>>(shift*2);
01293 }else{
01294 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
01295 dst[y*stride + x]= src[index ];
01296 }
01297 }
01298
01299 vx+= dxx;
01300 vy+= dyx;
01301 }
01302 ox += dxy;
01303 oy += dyy;
01304 }
01305 }
01306
01307 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01308 switch(width){
01309 case 2: put_pixels2_c (dst, src, stride, height); break;
01310 case 4: put_pixels4_c (dst, src, stride, height); break;
01311 case 8: put_pixels8_c (dst, src, stride, height); break;
01312 case 16:put_pixels16_c(dst, src, stride, height); break;
01313 }
01314 }
01315
01316 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01317 int i,j;
01318 for (i=0; i < height; i++) {
01319 for (j=0; j < width; j++) {
01320 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
01321 }
01322 src += stride;
01323 dst += stride;
01324 }
01325 }
01326
01327 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01328 int i,j;
01329 for (i=0; i < height; i++) {
01330 for (j=0; j < width; j++) {
01331 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
01332 }
01333 src += stride;
01334 dst += stride;
01335 }
01336 }
01337
01338 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01339 int i,j;
01340 for (i=0; i < height; i++) {
01341 for (j=0; j < width; j++) {
01342 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
01343 }
01344 src += stride;
01345 dst += stride;
01346 }
01347 }
01348
01349 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01350 int i,j;
01351 for (i=0; i < height; i++) {
01352 for (j=0; j < width; j++) {
01353 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
01354 }
01355 src += stride;
01356 dst += stride;
01357 }
01358 }
01359
01360 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01361 int i,j;
01362 for (i=0; i < height; i++) {
01363 for (j=0; j < width; j++) {
01364 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01365 }
01366 src += stride;
01367 dst += stride;
01368 }
01369 }
01370
01371 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01372 int i,j;
01373 for (i=0; i < height; i++) {
01374 for (j=0; j < width; j++) {
01375 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
01376 }
01377 src += stride;
01378 dst += stride;
01379 }
01380 }
01381
01382 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01383 int i,j;
01384 for (i=0; i < height; i++) {
01385 for (j=0; j < width; j++) {
01386 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01387 }
01388 src += stride;
01389 dst += stride;
01390 }
01391 }
01392
01393 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01394 int i,j;
01395 for (i=0; i < height; i++) {
01396 for (j=0; j < width; j++) {
01397 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
01398 }
01399 src += stride;
01400 dst += stride;
01401 }
01402 }
01403
01404 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01405 switch(width){
01406 case 2: avg_pixels2_c (dst, src, stride, height); break;
01407 case 4: avg_pixels4_c (dst, src, stride, height); break;
01408 case 8: avg_pixels8_c (dst, src, stride, height); break;
01409 case 16:avg_pixels16_c(dst, src, stride, height); break;
01410 }
01411 }
01412
01413 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01414 int i,j;
01415 for (i=0; i < height; i++) {
01416 for (j=0; j < width; j++) {
01417 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
01418 }
01419 src += stride;
01420 dst += stride;
01421 }
01422 }
01423
01424 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01425 int i,j;
01426 for (i=0; i < height; i++) {
01427 for (j=0; j < width; j++) {
01428 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
01429 }
01430 src += stride;
01431 dst += stride;
01432 }
01433 }
01434
01435 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01436 int i,j;
01437 for (i=0; i < height; i++) {
01438 for (j=0; j < width; j++) {
01439 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
01440 }
01441 src += stride;
01442 dst += stride;
01443 }
01444 }
01445
01446 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01447 int i,j;
01448 for (i=0; i < height; i++) {
01449 for (j=0; j < width; j++) {
01450 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01451 }
01452 src += stride;
01453 dst += stride;
01454 }
01455 }
01456
01457 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01458 int i,j;
01459 for (i=0; i < height; i++) {
01460 for (j=0; j < width; j++) {
01461 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01462 }
01463 src += stride;
01464 dst += stride;
01465 }
01466 }
01467
01468 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01469 int i,j;
01470 for (i=0; i < height; i++) {
01471 for (j=0; j < width; j++) {
01472 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
01473 }
01474 src += stride;
01475 dst += stride;
01476 }
01477 }
01478
01479 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01480 int i,j;
01481 for (i=0; i < height; i++) {
01482 for (j=0; j < width; j++) {
01483 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01484 }
01485 src += stride;
01486 dst += stride;
01487 }
01488 }
01489
01490 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01491 int i,j;
01492 for (i=0; i < height; i++) {
01493 for (j=0; j < width; j++) {
01494 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01495 }
01496 src += stride;
01497 dst += stride;
01498 }
01499 }
01500 #if 0
01501 #define TPEL_WIDTH(width)\
01502 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01503 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
01504 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01505 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
01506 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01507 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
01508 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01509 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
01510 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01511 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
01512 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01513 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
01514 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01515 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
01516 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01517 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
01518 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01519 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
01520 #endif
01521
01522 #define H264_CHROMA_MC(OPNAME, OP)\
01523 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01524 const int A=(8-x)*(8-y);\
01525 const int B=( x)*(8-y);\
01526 const int C=(8-x)*( y);\
01527 const int D=( x)*( y);\
01528 int i;\
01529 \
01530 assert(x<8 && y<8 && x>=0 && y>=0);\
01531 \
01532 if(D){\
01533 for(i=0; i<h; i++){\
01534 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01535 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01536 dst+= stride;\
01537 src+= stride;\
01538 }\
01539 }else{\
01540 const int E= B+C;\
01541 const int step= C ? stride : 1;\
01542 for(i=0; i<h; i++){\
01543 OP(dst[0], (A*src[0] + E*src[step+0]));\
01544 OP(dst[1], (A*src[1] + E*src[step+1]));\
01545 dst+= stride;\
01546 src+= stride;\
01547 }\
01548 }\
01549 }\
01550 \
01551 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01552 const int A=(8-x)*(8-y);\
01553 const int B=( x)*(8-y);\
01554 const int C=(8-x)*( y);\
01555 const int D=( x)*( y);\
01556 int i;\
01557 \
01558 assert(x<8 && y<8 && x>=0 && y>=0);\
01559 \
01560 if(D){\
01561 for(i=0; i<h; i++){\
01562 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01563 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01564 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01565 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01566 dst+= stride;\
01567 src+= stride;\
01568 }\
01569 }else{\
01570 const int E= B+C;\
01571 const int step= C ? stride : 1;\
01572 for(i=0; i<h; i++){\
01573 OP(dst[0], (A*src[0] + E*src[step+0]));\
01574 OP(dst[1], (A*src[1] + E*src[step+1]));\
01575 OP(dst[2], (A*src[2] + E*src[step+2]));\
01576 OP(dst[3], (A*src[3] + E*src[step+3]));\
01577 dst+= stride;\
01578 src+= stride;\
01579 }\
01580 }\
01581 }\
01582 \
01583 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01584 const int A=(8-x)*(8-y);\
01585 const int B=( x)*(8-y);\
01586 const int C=(8-x)*( y);\
01587 const int D=( x)*( y);\
01588 int i;\
01589 \
01590 assert(x<8 && y<8 && x>=0 && y>=0);\
01591 \
01592 if(D){\
01593 for(i=0; i<h; i++){\
01594 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01595 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01596 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01597 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01598 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
01599 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
01600 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
01601 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
01602 dst+= stride;\
01603 src+= stride;\
01604 }\
01605 }else{\
01606 const int E= B+C;\
01607 const int step= C ? stride : 1;\
01608 for(i=0; i<h; i++){\
01609 OP(dst[0], (A*src[0] + E*src[step+0]));\
01610 OP(dst[1], (A*src[1] + E*src[step+1]));\
01611 OP(dst[2], (A*src[2] + E*src[step+2]));\
01612 OP(dst[3], (A*src[3] + E*src[step+3]));\
01613 OP(dst[4], (A*src[4] + E*src[step+4]));\
01614 OP(dst[5], (A*src[5] + E*src[step+5]));\
01615 OP(dst[6], (A*src[6] + E*src[step+6]));\
01616 OP(dst[7], (A*src[7] + E*src[step+7]));\
01617 dst+= stride;\
01618 src+= stride;\
01619 }\
01620 }\
01621 }
01622
01623 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
01624 #define op_put(a, b) a = (((b) + 32)>>6)
01625
01626 H264_CHROMA_MC(put_ , op_put)
01627 H264_CHROMA_MC(avg_ , op_avg)
01628 #undef op_avg
01629 #undef op_put
01630
01631 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){
01632 const int A=(8-x)*(8-y);
01633 const int B=( x)*(8-y);
01634 const int C=(8-x)*( y);
01635 const int D=( x)*( y);
01636 int i;
01637
01638 assert(x<8 && y<8 && x>=0 && y>=0);
01639
01640 for(i=0; i<h; i++)
01641 {
01642 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
01643 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
01644 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
01645 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
01646 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
01647 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
01648 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
01649 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
01650 dst+= stride;
01651 src+= stride;
01652 }
01653 }
01654
01655 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){
01656 const int A=(8-x)*(8-y);
01657 const int B=( x)*(8-y);
01658 const int C=(8-x)*( y);
01659 const int D=( x)*( y);
01660 int i;
01661
01662 assert(x<8 && y<8 && x>=0 && y>=0);
01663
01664 for(i=0; i<h; i++)
01665 {
01666 dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
01667 dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
01668 dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
01669 dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
01670 dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
01671 dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
01672 dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
01673 dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
01674 dst+= stride;
01675 src+= stride;
01676 }
01677 }
01678
01679 #define QPEL_MC(r, OPNAME, RND, OP) \
01680 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01681 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01682 int i;\
01683 for(i=0; i<h; i++)\
01684 {\
01685 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
01686 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
01687 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
01688 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
01689 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
01690 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
01691 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
01692 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
01693 dst+=dstStride;\
01694 src+=srcStride;\
01695 }\
01696 }\
01697 \
01698 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01699 const int w=8;\
01700 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01701 int i;\
01702 for(i=0; i<w; i++)\
01703 {\
01704 const int src0= src[0*srcStride];\
01705 const int src1= src[1*srcStride];\
01706 const int src2= src[2*srcStride];\
01707 const int src3= src[3*srcStride];\
01708 const int src4= src[4*srcStride];\
01709 const int src5= src[5*srcStride];\
01710 const int src6= src[6*srcStride];\
01711 const int src7= src[7*srcStride];\
01712 const int src8= src[8*srcStride];\
01713 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
01714 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
01715 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
01716 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
01717 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
01718 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
01719 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
01720 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
01721 dst++;\
01722 src++;\
01723 }\
01724 }\
01725 \
01726 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01727 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01728 int i;\
01729 \
01730 for(i=0; i<h; i++)\
01731 {\
01732 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
01733 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
01734 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
01735 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
01736 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
01737 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
01738 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
01739 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
01740 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
01741 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
01742 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
01743 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
01744 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
01745 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
01746 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
01747 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
01748 dst+=dstStride;\
01749 src+=srcStride;\
01750 }\
01751 }\
01752 \
01753 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01754 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01755 int i;\
01756 const int w=16;\
01757 for(i=0; i<w; i++)\
01758 {\
01759 const int src0= src[0*srcStride];\
01760 const int src1= src[1*srcStride];\
01761 const int src2= src[2*srcStride];\
01762 const int src3= src[3*srcStride];\
01763 const int src4= src[4*srcStride];\
01764 const int src5= src[5*srcStride];\
01765 const int src6= src[6*srcStride];\
01766 const int src7= src[7*srcStride];\
01767 const int src8= src[8*srcStride];\
01768 const int src9= src[9*srcStride];\
01769 const int src10= src[10*srcStride];\
01770 const int src11= src[11*srcStride];\
01771 const int src12= src[12*srcStride];\
01772 const int src13= src[13*srcStride];\
01773 const int src14= src[14*srcStride];\
01774 const int src15= src[15*srcStride];\
01775 const int src16= src[16*srcStride];\
01776 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
01777 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
01778 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
01779 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
01780 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
01781 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
01782 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
01783 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
01784 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
01785 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
01786 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
01787 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
01788 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
01789 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
01790 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
01791 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
01792 dst++;\
01793 src++;\
01794 }\
01795 }\
01796 \
01797 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01798 OPNAME ## pixels8_c(dst, src, stride, 8);\
01799 }\
01800 \
01801 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01802 uint8_t half[64];\
01803 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01804 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
01805 }\
01806 \
01807 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01808 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
01809 }\
01810 \
01811 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01812 uint8_t half[64];\
01813 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01814 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
01815 }\
01816 \
01817 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01818 uint8_t full[16*9];\
01819 uint8_t half[64];\
01820 copy_block9(full, src, 16, stride, 9);\
01821 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01822 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
01823 }\
01824 \
01825 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01826 uint8_t full[16*9];\
01827 copy_block9(full, src, 16, stride, 9);\
01828 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
01829 }\
01830 \
01831 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01832 uint8_t full[16*9];\
01833 uint8_t half[64];\
01834 copy_block9(full, src, 16, stride, 9);\
01835 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01836 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
01837 }\
01838 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01839 uint8_t full[16*9];\
01840 uint8_t halfH[72];\
01841 uint8_t halfV[64];\
01842 uint8_t halfHV[64];\
01843 copy_block9(full, src, 16, stride, 9);\
01844 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01845 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01846 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01847 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01848 }\
01849 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01850 uint8_t full[16*9];\
01851 uint8_t halfH[72];\
01852 uint8_t halfHV[64];\
01853 copy_block9(full, src, 16, stride, 9);\
01854 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01855 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01856 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01857 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01858 }\
01859 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01860 uint8_t full[16*9];\
01861 uint8_t halfH[72];\
01862 uint8_t halfV[64];\
01863 uint8_t halfHV[64];\
01864 copy_block9(full, src, 16, stride, 9);\
01865 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01866 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01867 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01868 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01869 }\
01870 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01871 uint8_t full[16*9];\
01872 uint8_t halfH[72];\
01873 uint8_t halfHV[64];\
01874 copy_block9(full, src, 16, stride, 9);\
01875 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01876 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01877 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01878 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01879 }\
01880 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01881 uint8_t full[16*9];\
01882 uint8_t halfH[72];\
01883 uint8_t halfV[64];\
01884 uint8_t halfHV[64];\
01885 copy_block9(full, src, 16, stride, 9);\
01886 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01887 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01888 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01889 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01890 }\
01891 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01892 uint8_t full[16*9];\
01893 uint8_t halfH[72];\
01894 uint8_t halfHV[64];\
01895 copy_block9(full, src, 16, stride, 9);\
01896 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01897 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01898 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01899 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01900 }\
01901 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01902 uint8_t full[16*9];\
01903 uint8_t halfH[72];\
01904 uint8_t halfV[64];\
01905 uint8_t halfHV[64];\
01906 copy_block9(full, src, 16, stride, 9);\
01907 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01908 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01909 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01910 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01911 }\
01912 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01913 uint8_t full[16*9];\
01914 uint8_t halfH[72];\
01915 uint8_t halfHV[64];\
01916 copy_block9(full, src, 16, stride, 9);\
01917 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01918 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01919 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01920 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01921 }\
01922 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01923 uint8_t halfH[72];\
01924 uint8_t halfHV[64];\
01925 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01926 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01927 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01928 }\
01929 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01930 uint8_t halfH[72];\
01931 uint8_t halfHV[64];\
01932 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01933 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01934 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01935 }\
01936 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01937 uint8_t full[16*9];\
01938 uint8_t halfH[72];\
01939 uint8_t halfV[64];\
01940 uint8_t halfHV[64];\
01941 copy_block9(full, src, 16, stride, 9);\
01942 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01943 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01944 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01945 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01946 }\
01947 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01948 uint8_t full[16*9];\
01949 uint8_t halfH[72];\
01950 copy_block9(full, src, 16, stride, 9);\
01951 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01952 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01953 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01954 }\
01955 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01956 uint8_t full[16*9];\
01957 uint8_t halfH[72];\
01958 uint8_t halfV[64];\
01959 uint8_t halfHV[64];\
01960 copy_block9(full, src, 16, stride, 9);\
01961 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01962 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01963 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01964 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01965 }\
01966 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01967 uint8_t full[16*9];\
01968 uint8_t halfH[72];\
01969 copy_block9(full, src, 16, stride, 9);\
01970 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01971 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01972 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01973 }\
01974 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01975 uint8_t halfH[72];\
01976 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01977 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01978 }\
01979 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01980 OPNAME ## pixels16_c(dst, src, stride, 16);\
01981 }\
01982 \
01983 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01984 uint8_t half[256];\
01985 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01986 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
01987 }\
01988 \
01989 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01990 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01991 }\
01992 \
01993 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01994 uint8_t half[256];\
01995 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01996 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
01997 }\
01998 \
01999 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02000 uint8_t full[24*17];\
02001 uint8_t half[256];\
02002 copy_block17(full, src, 24, stride, 17);\
02003 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
02004 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
02005 }\
02006 \
02007 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02008 uint8_t full[24*17];\
02009 copy_block17(full, src, 24, stride, 17);\
02010 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
02011 }\
02012 \
02013 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02014 uint8_t full[24*17];\
02015 uint8_t half[256];\
02016 copy_block17(full, src, 24, stride, 17);\
02017 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
02018 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
02019 }\
02020 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
02021 uint8_t full[24*17];\
02022 uint8_t halfH[272];\
02023 uint8_t halfV[256];\
02024 uint8_t halfHV[256];\
02025 copy_block17(full, src, 24, stride, 17);\
02026 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02027 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02028 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02029 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02030 }\
02031 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02032 uint8_t full[24*17];\
02033 uint8_t halfH[272];\
02034 uint8_t halfHV[256];\
02035 copy_block17(full, src, 24, stride, 17);\
02036 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02037 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02038 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02039 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02040 }\
02041 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
02042 uint8_t full[24*17];\
02043 uint8_t halfH[272];\
02044 uint8_t halfV[256];\
02045 uint8_t halfHV[256];\
02046 copy_block17(full, src, 24, stride, 17);\
02047 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02048 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02049 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02050 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02051 }\
02052 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02053 uint8_t full[24*17];\
02054 uint8_t halfH[272];\
02055 uint8_t halfHV[256];\
02056 copy_block17(full, src, 24, stride, 17);\
02057 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02058 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02059 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02060 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02061 }\
02062 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
02063 uint8_t full[24*17];\
02064 uint8_t halfH[272];\
02065 uint8_t halfV[256];\
02066 uint8_t halfHV[256];\
02067 copy_block17(full, src, 24, stride, 17);\
02068 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02069 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02070 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02071 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02072 }\
02073 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02074 uint8_t full[24*17];\
02075 uint8_t halfH[272];\
02076 uint8_t halfHV[256];\
02077 copy_block17(full, src, 24, stride, 17);\
02078 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02079 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02080 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02081 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02082 }\
02083 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
02084 uint8_t full[24*17];\
02085 uint8_t halfH[272];\
02086 uint8_t halfV[256];\
02087 uint8_t halfHV[256];\
02088 copy_block17(full, src, 24, stride, 17);\
02089 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
02090 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02091 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02092 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02093 }\
02094 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02095 uint8_t full[24*17];\
02096 uint8_t halfH[272];\
02097 uint8_t halfHV[256];\
02098 copy_block17(full, src, 24, stride, 17);\
02099 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02100 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02101 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02102 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02103 }\
02104 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02105 uint8_t halfH[272];\
02106 uint8_t halfHV[256];\
02107 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02108 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02109 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02110 }\
02111 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02112 uint8_t halfH[272];\
02113 uint8_t halfHV[256];\
02114 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02115 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02116 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02117 }\
02118 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
02119 uint8_t full[24*17];\
02120 uint8_t halfH[272];\
02121 uint8_t halfV[256];\
02122 uint8_t halfHV[256];\
02123 copy_block17(full, src, 24, stride, 17);\
02124 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02125 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02126 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02127 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02128 }\
02129 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02130 uint8_t full[24*17];\
02131 uint8_t halfH[272];\
02132 copy_block17(full, src, 24, stride, 17);\
02133 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02134 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02135 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02136 }\
02137 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
02138 uint8_t full[24*17];\
02139 uint8_t halfH[272];\
02140 uint8_t halfV[256];\
02141 uint8_t halfHV[256];\
02142 copy_block17(full, src, 24, stride, 17);\
02143 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02144 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02145 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02146 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02147 }\
02148 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02149 uint8_t full[24*17];\
02150 uint8_t halfH[272];\
02151 copy_block17(full, src, 24, stride, 17);\
02152 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02153 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02154 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02155 }\
02156 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02157 uint8_t halfH[272];\
02158 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02159 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02160 }
02161
02162 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02163 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
02164 #define op_put(a, b) a = cm[((b) + 16)>>5]
02165 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
02166
02167 QPEL_MC(0, put_ , _ , op_put)
02168 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
02169 QPEL_MC(0, avg_ , _ , op_avg)
02170
02171 #undef op_avg
02172 #undef op_avg_no_rnd
02173 #undef op_put
02174 #undef op_put_no_rnd
02175
02176 #if 1
02177 #define H264_LOWPASS(OPNAME, OP, OP2) \
02178 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02179 const int h=2;\
02180 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02181 int i;\
02182 for(i=0; i<h; i++)\
02183 {\
02184 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02185 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02186 dst+=dstStride;\
02187 src+=srcStride;\
02188 }\
02189 }\
02190 \
02191 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02192 const int w=2;\
02193 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02194 int i;\
02195 for(i=0; i<w; i++)\
02196 {\
02197 const int srcB= src[-2*srcStride];\
02198 const int srcA= src[-1*srcStride];\
02199 const int src0= src[0 *srcStride];\
02200 const int src1= src[1 *srcStride];\
02201 const int src2= src[2 *srcStride];\
02202 const int src3= src[3 *srcStride];\
02203 const int src4= src[4 *srcStride];\
02204 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02205 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02206 dst++;\
02207 src++;\
02208 }\
02209 }\
02210 \
02211 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02212 const int h=2;\
02213 const int w=2;\
02214 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02215 int i;\
02216 src -= 2*srcStride;\
02217 for(i=0; i<h+5; i++)\
02218 {\
02219 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02220 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02221 tmp+=tmpStride;\
02222 src+=srcStride;\
02223 }\
02224 tmp -= tmpStride*(h+5-2);\
02225 for(i=0; i<w; i++)\
02226 {\
02227 const int tmpB= tmp[-2*tmpStride];\
02228 const int tmpA= tmp[-1*tmpStride];\
02229 const int tmp0= tmp[0 *tmpStride];\
02230 const int tmp1= tmp[1 *tmpStride];\
02231 const int tmp2= tmp[2 *tmpStride];\
02232 const int tmp3= tmp[3 *tmpStride];\
02233 const int tmp4= tmp[4 *tmpStride];\
02234 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02235 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02236 dst++;\
02237 tmp++;\
02238 }\
02239 }\
02240 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02241 const int h=4;\
02242 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02243 int i;\
02244 for(i=0; i<h; i++)\
02245 {\
02246 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02247 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02248 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
02249 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
02250 dst+=dstStride;\
02251 src+=srcStride;\
02252 }\
02253 }\
02254 \
02255 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02256 const int w=4;\
02257 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02258 int i;\
02259 for(i=0; i<w; i++)\
02260 {\
02261 const int srcB= src[-2*srcStride];\
02262 const int srcA= src[-1*srcStride];\
02263 const int src0= src[0 *srcStride];\
02264 const int src1= src[1 *srcStride];\
02265 const int src2= src[2 *srcStride];\
02266 const int src3= src[3 *srcStride];\
02267 const int src4= src[4 *srcStride];\
02268 const int src5= src[5 *srcStride];\
02269 const int src6= src[6 *srcStride];\
02270 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02271 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02272 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02273 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02274 dst++;\
02275 src++;\
02276 }\
02277 }\
02278 \
02279 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02280 const int h=4;\
02281 const int w=4;\
02282 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02283 int i;\
02284 src -= 2*srcStride;\
02285 for(i=0; i<h+5; i++)\
02286 {\
02287 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02288 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02289 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
02290 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
02291 tmp+=tmpStride;\
02292 src+=srcStride;\
02293 }\
02294 tmp -= tmpStride*(h+5-2);\
02295 for(i=0; i<w; i++)\
02296 {\
02297 const int tmpB= tmp[-2*tmpStride];\
02298 const int tmpA= tmp[-1*tmpStride];\
02299 const int tmp0= tmp[0 *tmpStride];\
02300 const int tmp1= tmp[1 *tmpStride];\
02301 const int tmp2= tmp[2 *tmpStride];\
02302 const int tmp3= tmp[3 *tmpStride];\
02303 const int tmp4= tmp[4 *tmpStride];\
02304 const int tmp5= tmp[5 *tmpStride];\
02305 const int tmp6= tmp[6 *tmpStride];\
02306 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02307 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02308 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02309 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02310 dst++;\
02311 tmp++;\
02312 }\
02313 }\
02314 \
02315 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02316 const int h=8;\
02317 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02318 int i;\
02319 for(i=0; i<h; i++)\
02320 {\
02321 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
02322 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
02323 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
02324 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
02325 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
02326 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
02327 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
02328 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
02329 dst+=dstStride;\
02330 src+=srcStride;\
02331 }\
02332 }\
02333 \
02334 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02335 const int w=8;\
02336 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02337 int i;\
02338 for(i=0; i<w; i++)\
02339 {\
02340 const int srcB= src[-2*srcStride];\
02341 const int srcA= src[-1*srcStride];\
02342 const int src0= src[0 *srcStride];\
02343 const int src1= src[1 *srcStride];\
02344 const int src2= src[2 *srcStride];\
02345 const int src3= src[3 *srcStride];\
02346 const int src4= src[4 *srcStride];\
02347 const int src5= src[5 *srcStride];\
02348 const int src6= src[6 *srcStride];\
02349 const int src7= src[7 *srcStride];\
02350 const int src8= src[8 *srcStride];\
02351 const int src9= src[9 *srcStride];\
02352 const int src10=src[10*srcStride];\
02353 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02354 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02355 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02356 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02357 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
02358 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
02359 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
02360 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
02361 dst++;\
02362 src++;\
02363 }\
02364 }\
02365 \
02366 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02367 const int h=8;\
02368 const int w=8;\
02369 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02370 int i;\
02371 src -= 2*srcStride;\
02372 for(i=0; i<h+5; i++)\
02373 {\
02374 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
02375 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
02376 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
02377 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
02378 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
02379 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
02380 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
02381 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
02382 tmp+=tmpStride;\
02383 src+=srcStride;\
02384 }\
02385 tmp -= tmpStride*(h+5-2);\
02386 for(i=0; i<w; i++)\
02387 {\
02388 const int tmpB= tmp[-2*tmpStride];\
02389 const int tmpA= tmp[-1*tmpStride];\
02390 const int tmp0= tmp[0 *tmpStride];\
02391 const int tmp1= tmp[1 *tmpStride];\
02392 const int tmp2= tmp[2 *tmpStride];\
02393 const int tmp3= tmp[3 *tmpStride];\
02394 const int tmp4= tmp[4 *tmpStride];\
02395 const int tmp5= tmp[5 *tmpStride];\
02396 const int tmp6= tmp[6 *tmpStride];\
02397 const int tmp7= tmp[7 *tmpStride];\
02398 const int tmp8= tmp[8 *tmpStride];\
02399 const int tmp9= tmp[9 *tmpStride];\
02400 const int tmp10=tmp[10*tmpStride];\
02401 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02402 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02403 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02404 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02405 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
02406 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
02407 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
02408 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
02409 dst++;\
02410 tmp++;\
02411 }\
02412 }\
02413 \
02414 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02415 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02416 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02417 src += 8*srcStride;\
02418 dst += 8*dstStride;\
02419 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02420 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02421 }\
02422 \
02423 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02424 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02425 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02426 src += 8*srcStride;\
02427 dst += 8*dstStride;\
02428 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02429 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02430 }\
02431 \
02432 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02433 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02434 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02435 src += 8*srcStride;\
02436 dst += 8*dstStride;\
02437 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02438 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02439 }\
02440
02441 #define H264_MC(OPNAME, SIZE) \
02442 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
02443 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
02444 }\
02445 \
02446 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
02447 uint8_t half[SIZE*SIZE];\
02448 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02449 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
02450 }\
02451 \
02452 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
02453 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
02454 }\
02455 \
02456 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
02457 uint8_t half[SIZE*SIZE];\
02458 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02459 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
02460 }\
02461 \
02462 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02463 uint8_t full[SIZE*(SIZE+5)];\
02464 uint8_t * const full_mid= full + SIZE*2;\
02465 uint8_t half[SIZE*SIZE];\
02466 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02467 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02468 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
02469 }\
02470 \
02471 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02472 uint8_t full[SIZE*(SIZE+5)];\
02473 uint8_t * const full_mid= full + SIZE*2;\
02474 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02475 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
02476 }\
02477 \
02478 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02479 uint8_t full[SIZE*(SIZE+5)];\
02480 uint8_t * const full_mid= full + SIZE*2;\
02481 uint8_t half[SIZE*SIZE];\
02482 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02483 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02484 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
02485 }\
02486 \
02487 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02488 uint8_t full[SIZE*(SIZE+5)];\
02489 uint8_t * const full_mid= full + SIZE*2;\
02490 uint8_t halfH[SIZE*SIZE];\
02491 uint8_t halfV[SIZE*SIZE];\
02492 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02493 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02494 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02495 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02496 }\
02497 \
02498 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02499 uint8_t full[SIZE*(SIZE+5)];\
02500 uint8_t * const full_mid= full + SIZE*2;\
02501 uint8_t halfH[SIZE*SIZE];\
02502 uint8_t halfV[SIZE*SIZE];\
02503 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02504 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02505 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02506 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02507 }\
02508 \
02509 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02510 uint8_t full[SIZE*(SIZE+5)];\
02511 uint8_t * const full_mid= full + SIZE*2;\
02512 uint8_t halfH[SIZE*SIZE];\
02513 uint8_t halfV[SIZE*SIZE];\
02514 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02515 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02516 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02517 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02518 }\
02519 \
02520 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02521 uint8_t full[SIZE*(SIZE+5)];\
02522 uint8_t * const full_mid= full + SIZE*2;\
02523 uint8_t halfH[SIZE*SIZE];\
02524 uint8_t halfV[SIZE*SIZE];\
02525 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02526 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02527 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02528 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02529 }\
02530 \
02531 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02532 int16_t tmp[SIZE*(SIZE+5)];\
02533 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
02534 }\
02535 \
02536 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02537 int16_t tmp[SIZE*(SIZE+5)];\
02538 uint8_t halfH[SIZE*SIZE];\
02539 uint8_t halfHV[SIZE*SIZE];\
02540 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02541 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02542 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02543 }\
02544 \
02545 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02546 int16_t tmp[SIZE*(SIZE+5)];\
02547 uint8_t halfH[SIZE*SIZE];\
02548 uint8_t halfHV[SIZE*SIZE];\
02549 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02550 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02551 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02552 }\
02553 \
02554 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02555 uint8_t full[SIZE*(SIZE+5)];\
02556 uint8_t * const full_mid= full + SIZE*2;\
02557 int16_t tmp[SIZE*(SIZE+5)];\
02558 uint8_t halfV[SIZE*SIZE];\
02559 uint8_t halfHV[SIZE*SIZE];\
02560 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02561 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02562 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02563 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02564 }\
02565 \
02566 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02567 uint8_t full[SIZE*(SIZE+5)];\
02568 uint8_t * const full_mid= full + SIZE*2;\
02569 int16_t tmp[SIZE*(SIZE+5)];\
02570 uint8_t halfV[SIZE*SIZE];\
02571 uint8_t halfHV[SIZE*SIZE];\
02572 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02573 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02574 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02575 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02576 }\
02577
02578 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02579
02580 #define op_put(a, b) a = cm[((b) + 16)>>5]
02581 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
02582 #define op2_put(a, b) a = cm[((b) + 512)>>10]
02583
02584 H264_LOWPASS(put_ , op_put, op2_put)
02585 H264_LOWPASS(avg_ , op_avg, op2_avg)
02586 H264_MC(put_, 2)
02587 H264_MC(put_, 4)
02588 H264_MC(put_, 8)
02589 H264_MC(put_, 16)
02590 H264_MC(avg_, 4)
02591 H264_MC(avg_, 8)
02592 H264_MC(avg_, 16)
02593
02594 #undef op_avg
02595 #undef op_put
02596 #undef op2_avg
02597 #undef op2_put
02598 #endif
02599
02600 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
02601 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02602 int i;
02603
02604 for(i=0; i<h; i++){
02605 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
02606 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
02607 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
02608 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
02609 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
02610 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
02611 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
02612 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
02613 dst+=dstStride;
02614 src+=srcStride;
02615 }
02616 }
02617
02618 #if CONFIG_CAVS_DECODER
02619
02620 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02621 put_pixels8_c(dst, src, stride, 8);
02622 }
02623 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02624 avg_pixels8_c(dst, src, stride, 8);
02625 }
02626 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02627 put_pixels16_c(dst, src, stride, 16);
02628 }
02629 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02630 avg_pixels16_c(dst, src, stride, 16);
02631 }
02632 #endif
02633
02634 #if CONFIG_VC1_DECODER
02635
02636 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
02637 put_pixels8_c(dst, src, stride, 8);
02638 }
02639 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
02640 avg_pixels8_c(dst, src, stride, 8);
02641 }
02642 #endif
02643
02644 #if CONFIG_RV40_DECODER
02645 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02646 put_pixels16_xy2_c(dst, src, stride, 16);
02647 }
02648 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02649 avg_pixels16_xy2_c(dst, src, stride, 16);
02650 }
02651 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02652 put_pixels8_xy2_c(dst, src, stride, 8);
02653 }
02654 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02655 avg_pixels8_xy2_c(dst, src, stride, 8);
02656 }
02657 #endif
02658
02659 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
02660 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02661 int i;
02662
02663 for(i=0; i<w; i++){
02664 const int src_1= src[ -srcStride];
02665 const int src0 = src[0 ];
02666 const int src1 = src[ srcStride];
02667 const int src2 = src[2*srcStride];
02668 const int src3 = src[3*srcStride];
02669 const int src4 = src[4*srcStride];
02670 const int src5 = src[5*srcStride];
02671 const int src6 = src[6*srcStride];
02672 const int src7 = src[7*srcStride];
02673 const int src8 = src[8*srcStride];
02674 const int src9 = src[9*srcStride];
02675 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
02676 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
02677 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
02678 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
02679 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
02680 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
02681 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
02682 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
02683 src++;
02684 dst++;
02685 }
02686 }
02687
02688 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
02689 put_pixels8_c(dst, src, stride, 8);
02690 }
02691
02692 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
02693 uint8_t half[64];
02694 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02695 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
02696 }
02697
02698 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
02699 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
02700 }
02701
02702 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
02703 uint8_t half[64];
02704 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02705 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
02706 }
02707
02708 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
02709 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
02710 }
02711
02712 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
02713 uint8_t halfH[88];
02714 uint8_t halfV[64];
02715 uint8_t halfHV[64];
02716 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02717 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
02718 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02719 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02720 }
02721 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
02722 uint8_t halfH[88];
02723 uint8_t halfV[64];
02724 uint8_t halfHV[64];
02725 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02726 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
02727 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02728 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02729 }
02730 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
02731 uint8_t halfH[88];
02732 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02733 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
02734 }
02735
02736 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
02737 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
02738 int x;
02739 const int strength= ff_h263_loop_filter_strength[qscale];
02740
02741 for(x=0; x<8; x++){
02742 int d1, d2, ad1;
02743 int p0= src[x-2*stride];
02744 int p1= src[x-1*stride];
02745 int p2= src[x+0*stride];
02746 int p3= src[x+1*stride];
02747 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02748
02749 if (d<-2*strength) d1= 0;
02750 else if(d<- strength) d1=-2*strength - d;
02751 else if(d< strength) d1= d;
02752 else if(d< 2*strength) d1= 2*strength - d;
02753 else d1= 0;
02754
02755 p1 += d1;
02756 p2 -= d1;
02757 if(p1&256) p1= ~(p1>>31);
02758 if(p2&256) p2= ~(p2>>31);
02759
02760 src[x-1*stride] = p1;
02761 src[x+0*stride] = p2;
02762
02763 ad1= FFABS(d1)>>1;
02764
02765 d2= av_clip((p0-p3)/4, -ad1, ad1);
02766
02767 src[x-2*stride] = p0 - d2;
02768 src[x+ stride] = p3 + d2;
02769 }
02770 }
02771 }
02772
02773 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
02774 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
02775 int y;
02776 const int strength= ff_h263_loop_filter_strength[qscale];
02777
02778 for(y=0; y<8; y++){
02779 int d1, d2, ad1;
02780 int p0= src[y*stride-2];
02781 int p1= src[y*stride-1];
02782 int p2= src[y*stride+0];
02783 int p3= src[y*stride+1];
02784 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02785
02786 if (d<-2*strength) d1= 0;
02787 else if(d<- strength) d1=-2*strength - d;
02788 else if(d< strength) d1= d;
02789 else if(d< 2*strength) d1= 2*strength - d;
02790 else d1= 0;
02791
02792 p1 += d1;
02793 p2 -= d1;
02794 if(p1&256) p1= ~(p1>>31);
02795 if(p2&256) p2= ~(p2>>31);
02796
02797 src[y*stride-1] = p1;
02798 src[y*stride+0] = p2;
02799
02800 ad1= FFABS(d1)>>1;
02801
02802 d2= av_clip((p0-p3)/4, -ad1, ad1);
02803
02804 src[y*stride-2] = p0 - d2;
02805 src[y*stride+1] = p3 + d2;
02806 }
02807 }
02808 }
02809
02810 static void h261_loop_filter_c(uint8_t *src, int stride){
02811 int x,y,xy,yz;
02812 int temp[64];
02813
02814 for(x=0; x<8; x++){
02815 temp[x ] = 4*src[x ];
02816 temp[x + 7*8] = 4*src[x + 7*stride];
02817 }
02818 for(y=1; y<7; y++){
02819 for(x=0; x<8; x++){
02820 xy = y * stride + x;
02821 yz = y * 8 + x;
02822 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
02823 }
02824 }
02825
02826 for(y=0; y<8; y++){
02827 src[ y*stride] = (temp[ y*8] + 2)>>2;
02828 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
02829 for(x=1; x<7; x++){
02830 xy = y * stride + x;
02831 yz = y * 8 + x;
02832 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
02833 }
02834 }
02835 }
02836
02837 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02838 {
02839 int s, i;
02840
02841 s = 0;
02842 for(i=0;i<h;i++) {
02843 s += abs(pix1[0] - pix2[0]);
02844 s += abs(pix1[1] - pix2[1]);
02845 s += abs(pix1[2] - pix2[2]);
02846 s += abs(pix1[3] - pix2[3]);
02847 s += abs(pix1[4] - pix2[4]);
02848 s += abs(pix1[5] - pix2[5]);
02849 s += abs(pix1[6] - pix2[6]);
02850 s += abs(pix1[7] - pix2[7]);
02851 s += abs(pix1[8] - pix2[8]);
02852 s += abs(pix1[9] - pix2[9]);
02853 s += abs(pix1[10] - pix2[10]);
02854 s += abs(pix1[11] - pix2[11]);
02855 s += abs(pix1[12] - pix2[12]);
02856 s += abs(pix1[13] - pix2[13]);
02857 s += abs(pix1[14] - pix2[14]);
02858 s += abs(pix1[15] - pix2[15]);
02859 pix1 += line_size;
02860 pix2 += line_size;
02861 }
02862 return s;
02863 }
02864
02865 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02866 {
02867 int s, i;
02868
02869 s = 0;
02870 for(i=0;i<h;i++) {
02871 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02872 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02873 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02874 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02875 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02876 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02877 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02878 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02879 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
02880 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
02881 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
02882 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
02883 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
02884 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
02885 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
02886 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
02887 pix1 += line_size;
02888 pix2 += line_size;
02889 }
02890 return s;
02891 }
02892
02893 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02894 {
02895 int s, i;
02896 uint8_t *pix3 = pix2 + line_size;
02897
02898 s = 0;
02899 for(i=0;i<h;i++) {
02900 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02901 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02902 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02903 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02904 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02905 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02906 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02907 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02908 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
02909 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
02910 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
02911 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
02912 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
02913 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
02914 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
02915 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
02916 pix1 += line_size;
02917 pix2 += line_size;
02918 pix3 += line_size;
02919 }
02920 return s;
02921 }
02922
02923 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02924 {
02925 int s, i;
02926 uint8_t *pix3 = pix2 + line_size;
02927
02928 s = 0;
02929 for(i=0;i<h;i++) {
02930 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02931 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02932 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02933 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02934 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02935 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02936 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02937 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02938 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
02939 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
02940 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
02941 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
02942 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
02943 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
02944 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
02945 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
02946 pix1 += line_size;
02947 pix2 += line_size;
02948 pix3 += line_size;
02949 }
02950 return s;
02951 }
02952
02953 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02954 {
02955 int s, i;
02956
02957 s = 0;
02958 for(i=0;i<h;i++) {
02959 s += abs(pix1[0] - pix2[0]);
02960 s += abs(pix1[1] - pix2[1]);
02961 s += abs(pix1[2] - pix2[2]);
02962 s += abs(pix1[3] - pix2[3]);
02963 s += abs(pix1[4] - pix2[4]);
02964 s += abs(pix1[5] - pix2[5]);
02965 s += abs(pix1[6] - pix2[6]);
02966 s += abs(pix1[7] - pix2[7]);
02967 pix1 += line_size;
02968 pix2 += line_size;
02969 }
02970 return s;
02971 }
02972
02973 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02974 {
02975 int s, i;
02976
02977 s = 0;
02978 for(i=0;i<h;i++) {
02979 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02980 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02981 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02982 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02983 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02984 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02985 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02986 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02987 pix1 += line_size;
02988 pix2 += line_size;
02989 }
02990 return s;
02991 }
02992
02993 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02994 {
02995 int s, i;
02996 uint8_t *pix3 = pix2 + line_size;
02997
02998 s = 0;
02999 for(i=0;i<h;i++) {
03000 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
03001 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
03002 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
03003 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
03004 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
03005 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
03006 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
03007 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
03008 pix1 += line_size;
03009 pix2 += line_size;
03010 pix3 += line_size;
03011 }
03012 return s;
03013 }
03014
03015 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03016 {
03017 int s, i;
03018 uint8_t *pix3 = pix2 + line_size;
03019
03020 s = 0;
03021 for(i=0;i<h;i++) {
03022 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
03023 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
03024 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
03025 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
03026 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
03027 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
03028 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
03029 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
03030 pix1 += line_size;
03031 pix2 += line_size;
03032 pix3 += line_size;
03033 }
03034 return s;
03035 }
03036
03037 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
03038 MpegEncContext *c = v;
03039 int score1=0;
03040 int score2=0;
03041 int x,y;
03042
03043 for(y=0; y<h; y++){
03044 for(x=0; x<16; x++){
03045 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03046 }
03047 if(y+1<h){
03048 for(x=0; x<15; x++){
03049 score2+= FFABS( s1[x ] - s1[x +stride]
03050 - s1[x+1] + s1[x+1+stride])
03051 -FFABS( s2[x ] - s2[x +stride]
03052 - s2[x+1] + s2[x+1+stride]);
03053 }
03054 }
03055 s1+= stride;
03056 s2+= stride;
03057 }
03058
03059 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03060 else return score1 + FFABS(score2)*8;
03061 }
03062
03063 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
03064 MpegEncContext *c = v;
03065 int score1=0;
03066 int score2=0;
03067 int x,y;
03068
03069 for(y=0; y<h; y++){
03070 for(x=0; x<8; x++){
03071 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03072 }
03073 if(y+1<h){
03074 for(x=0; x<7; x++){
03075 score2+= FFABS( s1[x ] - s1[x +stride]
03076 - s1[x+1] + s1[x+1+stride])
03077 -FFABS( s2[x ] - s2[x +stride]
03078 - s2[x+1] + s2[x+1+stride]);
03079 }
03080 }
03081 s1+= stride;
03082 s2+= stride;
03083 }
03084
03085 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03086 else return score1 + FFABS(score2)*8;
03087 }
03088
03089 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
03090 int i;
03091 unsigned int sum=0;
03092
03093 for(i=0; i<8*8; i++){
03094 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
03095 int w= weight[i];
03096 b>>= RECON_SHIFT;
03097 assert(-512<b && b<512);
03098
03099 sum += (w*b)*(w*b)>>4;
03100 }
03101 return sum>>2;
03102 }
03103
03104 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
03105 int i;
03106
03107 for(i=0; i<8*8; i++){
03108 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
03109 }
03110 }
03111
03120 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
03121 {
03122 int i;
03123 DCTELEM temp[64];
03124
03125 if(last<=0) return;
03126
03127
03128 for(i=0; i<=last; i++){
03129 const int j= scantable[i];
03130 temp[j]= block[j];
03131 block[j]=0;
03132 }
03133
03134 for(i=0; i<=last; i++){
03135 const int j= scantable[i];
03136 const int perm_j= permutation[j];
03137 block[perm_j]= temp[j];
03138 }
03139 }
03140
03141 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
03142 return 0;
03143 }
03144
03145 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
03146 int i;
03147
03148 memset(cmp, 0, sizeof(void*)*6);
03149
03150 for(i=0; i<6; i++){
03151 switch(type&0xFF){
03152 case FF_CMP_SAD:
03153 cmp[i]= c->sad[i];
03154 break;
03155 case FF_CMP_SATD:
03156 cmp[i]= c->hadamard8_diff[i];
03157 break;
03158 case FF_CMP_SSE:
03159 cmp[i]= c->sse[i];
03160 break;
03161 case FF_CMP_DCT:
03162 cmp[i]= c->dct_sad[i];
03163 break;
03164 case FF_CMP_DCT264:
03165 cmp[i]= c->dct264_sad[i];
03166 break;
03167 case FF_CMP_DCTMAX:
03168 cmp[i]= c->dct_max[i];
03169 break;
03170 case FF_CMP_PSNR:
03171 cmp[i]= c->quant_psnr[i];
03172 break;
03173 case FF_CMP_BIT:
03174 cmp[i]= c->bit[i];
03175 break;
03176 case FF_CMP_RD:
03177 cmp[i]= c->rd[i];
03178 break;
03179 case FF_CMP_VSAD:
03180 cmp[i]= c->vsad[i];
03181 break;
03182 case FF_CMP_VSSE:
03183 cmp[i]= c->vsse[i];
03184 break;
03185 case FF_CMP_ZERO:
03186 cmp[i]= zero_cmp;
03187 break;
03188 case FF_CMP_NSSE:
03189 cmp[i]= c->nsse[i];
03190 break;
03191 #if CONFIG_DWT
03192 case FF_CMP_W53:
03193 cmp[i]= c->w53[i];
03194 break;
03195 case FF_CMP_W97:
03196 cmp[i]= c->w97[i];
03197 break;
03198 #endif
03199 default:
03200 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
03201 }
03202 }
03203 }
03204
03205 static void clear_block_c(DCTELEM *block)
03206 {
03207 memset(block, 0, sizeof(DCTELEM)*64);
03208 }
03209
03213 static void clear_blocks_c(DCTELEM *blocks)
03214 {
03215 memset(blocks, 0, sizeof(DCTELEM)*6*64);
03216 }
03217
03218 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
03219 long i;
03220 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03221 long a = *(long*)(src+i);
03222 long b = *(long*)(dst+i);
03223 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03224 }
03225 for(; i<w; i++)
03226 dst[i+0] += src[i+0];
03227 }
03228
03229 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03230 long i;
03231 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03232 long a = *(long*)(src1+i);
03233 long b = *(long*)(src2+i);
03234 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03235 }
03236 for(; i<w; i++)
03237 dst[i] = src1[i]+src2[i];
03238 }
03239
03240 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03241 long i;
03242 #if !HAVE_FAST_UNALIGNED
03243 if((long)src2 & (sizeof(long)-1)){
03244 for(i=0; i+7<w; i+=8){
03245 dst[i+0] = src1[i+0]-src2[i+0];
03246 dst[i+1] = src1[i+1]-src2[i+1];
03247 dst[i+2] = src1[i+2]-src2[i+2];
03248 dst[i+3] = src1[i+3]-src2[i+3];
03249 dst[i+4] = src1[i+4]-src2[i+4];
03250 dst[i+5] = src1[i+5]-src2[i+5];
03251 dst[i+6] = src1[i+6]-src2[i+6];
03252 dst[i+7] = src1[i+7]-src2[i+7];
03253 }
03254 }else
03255 #endif
03256 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03257 long a = *(long*)(src1+i);
03258 long b = *(long*)(src2+i);
03259 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
03260 }
03261 for(; i<w; i++)
03262 dst[i+0] = src1[i+0]-src2[i+0];
03263 }
03264
03265 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
03266 int i;
03267 uint8_t l, lt;
03268
03269 l= *left;
03270 lt= *left_top;
03271
03272 for(i=0; i<w; i++){
03273 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
03274 lt= src1[i];
03275 dst[i]= l;
03276 }
03277
03278 *left= l;
03279 *left_top= lt;
03280 }
03281
03282 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
03283 int i;
03284 uint8_t l, lt;
03285
03286 l= *left;
03287 lt= *left_top;
03288
03289 for(i=0; i<w; i++){
03290 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
03291 lt= src1[i];
03292 l= src2[i];
03293 dst[i]= l - pred;
03294 }
03295
03296 *left= l;
03297 *left_top= lt;
03298 }
03299
03300 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
03301 int i;
03302
03303 for(i=0; i<w-1; i++){
03304 acc+= src[i];
03305 dst[i]= acc;
03306 i++;
03307 acc+= src[i];
03308 dst[i]= acc;
03309 }
03310
03311 for(; i<w; i++){
03312 acc+= src[i];
03313 dst[i]= acc;
03314 }
03315
03316 return acc;
03317 }
03318
03319 #if HAVE_BIGENDIAN
03320 #define B 3
03321 #define G 2
03322 #define R 1
03323 #define A 0
03324 #else
03325 #define B 0
03326 #define G 1
03327 #define R 2
03328 #define A 3
03329 #endif
03330 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
03331 int i;
03332 int r,g,b,a;
03333 r= *red;
03334 g= *green;
03335 b= *blue;
03336 a= *alpha;
03337
03338 for(i=0; i<w; i++){
03339 b+= src[4*i+B];
03340 g+= src[4*i+G];
03341 r+= src[4*i+R];
03342 a+= src[4*i+A];
03343
03344 dst[4*i+B]= b;
03345 dst[4*i+G]= g;
03346 dst[4*i+R]= r;
03347 dst[4*i+A]= a;
03348 }
03349
03350 *red= r;
03351 *green= g;
03352 *blue= b;
03353 *alpha= a;
03354 }
03355 #undef B
03356 #undef G
03357 #undef R
03358 #undef A
03359
03360 #define BUTTERFLY2(o1,o2,i1,i2) \
03361 o1= (i1)+(i2);\
03362 o2= (i1)-(i2);
03363
03364 #define BUTTERFLY1(x,y) \
03365 {\
03366 int a,b;\
03367 a= x;\
03368 b= y;\
03369 x= a+b;\
03370 y= a-b;\
03371 }
03372
03373 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
03374
03375 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
03376 int i;
03377 int temp[64];
03378 int sum=0;
03379
03380 assert(h==8);
03381
03382 for(i=0; i<8; i++){
03383
03384 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
03385 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
03386 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
03387 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
03388
03389 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03390 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03391 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03392 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03393
03394 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03395 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03396 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03397 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03398 }
03399
03400 for(i=0; i<8; i++){
03401 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03402 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03403 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03404 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03405
03406 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03407 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03408 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03409 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03410
03411 sum +=
03412 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03413 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03414 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03415 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03416 }
03417 #if 0
03418 static int maxi=0;
03419 if(sum>maxi){
03420 maxi=sum;
03421 printf("MAX:%d\n", maxi);
03422 }
03423 #endif
03424 return sum;
03425 }
03426
03427 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
03428 int i;
03429 int temp[64];
03430 int sum=0;
03431
03432 assert(h==8);
03433
03434 for(i=0; i<8; i++){
03435
03436 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
03437 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
03438 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
03439 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
03440
03441 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03442 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03443 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03444 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03445
03446 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03447 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03448 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03449 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03450 }
03451
03452 for(i=0; i<8; i++){
03453 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03454 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03455 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03456 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03457
03458 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03459 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03460 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03461 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03462
03463 sum +=
03464 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03465 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03466 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03467 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03468 }
03469
03470 sum -= FFABS(temp[8*0] + temp[8*4]);
03471
03472 return sum;
03473 }
03474
03475 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03476 MpegEncContext * const s= (MpegEncContext *)c;
03477 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03478
03479 assert(h==8);
03480
03481 s->dsp.diff_pixels(temp, src1, src2, stride);
03482 s->dsp.fdct(temp);
03483 return s->dsp.sum_abs_dctelem(temp);
03484 }
03485
03486 #if CONFIG_GPL
03487 #define DCT8_1D {\
03488 const int s07 = SRC(0) + SRC(7);\
03489 const int s16 = SRC(1) + SRC(6);\
03490 const int s25 = SRC(2) + SRC(5);\
03491 const int s34 = SRC(3) + SRC(4);\
03492 const int a0 = s07 + s34;\
03493 const int a1 = s16 + s25;\
03494 const int a2 = s07 - s34;\
03495 const int a3 = s16 - s25;\
03496 const int d07 = SRC(0) - SRC(7);\
03497 const int d16 = SRC(1) - SRC(6);\
03498 const int d25 = SRC(2) - SRC(5);\
03499 const int d34 = SRC(3) - SRC(4);\
03500 const int a4 = d16 + d25 + (d07 + (d07>>1));\
03501 const int a5 = d07 - d34 - (d25 + (d25>>1));\
03502 const int a6 = d07 + d34 - (d16 + (d16>>1));\
03503 const int a7 = d16 - d25 + (d34 + (d34>>1));\
03504 DST(0, a0 + a1 ) ;\
03505 DST(1, a4 + (a7>>2)) ;\
03506 DST(2, a2 + (a3>>1)) ;\
03507 DST(3, a5 + (a6>>2)) ;\
03508 DST(4, a0 - a1 ) ;\
03509 DST(5, a6 - (a5>>2)) ;\
03510 DST(6, (a2>>1) - a3 ) ;\
03511 DST(7, (a4>>2) - a7 ) ;\
03512 }
03513
03514 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03515 MpegEncContext * const s= (MpegEncContext *)c;
03516 DCTELEM dct[8][8];
03517 int i;
03518 int sum=0;
03519
03520 s->dsp.diff_pixels(dct[0], src1, src2, stride);
03521
03522 #define SRC(x) dct[i][x]
03523 #define DST(x,v) dct[i][x]= v
03524 for( i = 0; i < 8; i++ )
03525 DCT8_1D
03526 #undef SRC
03527 #undef DST
03528
03529 #define SRC(x) dct[x][i]
03530 #define DST(x,v) sum += FFABS(v)
03531 for( i = 0; i < 8; i++ )
03532 DCT8_1D
03533 #undef SRC
03534 #undef DST
03535 return sum;
03536 }
03537 #endif
03538
03539 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03540 MpegEncContext * const s= (MpegEncContext *)c;
03541 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03542 int sum=0, i;
03543
03544 assert(h==8);
03545
03546 s->dsp.diff_pixels(temp, src1, src2, stride);
03547 s->dsp.fdct(temp);
03548
03549 for(i=0; i<64; i++)
03550 sum= FFMAX(sum, FFABS(temp[i]));
03551
03552 return sum;
03553 }
03554
03555 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03556 MpegEncContext * const s= (MpegEncContext *)c;
03557 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
03558 DCTELEM * const bak = temp+64;
03559 int sum=0, i;
03560
03561 assert(h==8);
03562 s->mb_intra=0;
03563
03564 s->dsp.diff_pixels(temp, src1, src2, stride);
03565
03566 memcpy(bak, temp, 64*sizeof(DCTELEM));
03567
03568 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03569 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03570 ff_simple_idct(temp);
03571
03572 for(i=0; i<64; i++)
03573 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
03574
03575 return sum;
03576 }
03577
03578 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03579 MpegEncContext * const s= (MpegEncContext *)c;
03580 const uint8_t *scantable= s->intra_scantable.permutated;
03581 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03582 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
03583 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
03584 int i, last, run, bits, level, distortion, start_i;
03585 const int esc_length= s->ac_esc_length;
03586 uint8_t * length;
03587 uint8_t * last_length;
03588
03589 assert(h==8);
03590
03591 copy_block8(lsrc1, src1, 8, stride, 8);
03592 copy_block8(lsrc2, src2, 8, stride, 8);
03593
03594 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
03595
03596 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03597
03598 bits=0;
03599
03600 if (s->mb_intra) {
03601 start_i = 1;
03602 length = s->intra_ac_vlc_length;
03603 last_length= s->intra_ac_vlc_last_length;
03604 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03605 } else {
03606 start_i = 0;
03607 length = s->inter_ac_vlc_length;
03608 last_length= s->inter_ac_vlc_last_length;
03609 }
03610
03611 if(last>=start_i){
03612 run=0;
03613 for(i=start_i; i<last; i++){
03614 int j= scantable[i];
03615 level= temp[j];
03616
03617 if(level){
03618 level+=64;
03619 if((level&(~127)) == 0){
03620 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03621 }else
03622 bits+= esc_length;
03623 run=0;
03624 }else
03625 run++;
03626 }
03627 i= scantable[last];
03628
03629 level= temp[i] + 64;
03630
03631 assert(level - 64);
03632
03633 if((level&(~127)) == 0){
03634 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03635 }else
03636 bits+= esc_length;
03637
03638 }
03639
03640 if(last>=0){
03641 if(s->mb_intra)
03642 s->dct_unquantize_intra(s, temp, 0, s->qscale);
03643 else
03644 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03645 }
03646
03647 s->dsp.idct_add(lsrc2, 8, temp);
03648
03649 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
03650
03651 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
03652 }
03653
03654 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03655 MpegEncContext * const s= (MpegEncContext *)c;
03656 const uint8_t *scantable= s->intra_scantable.permutated;
03657 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03658 int i, last, run, bits, level, start_i;
03659 const int esc_length= s->ac_esc_length;
03660 uint8_t * length;
03661 uint8_t * last_length;
03662
03663 assert(h==8);
03664
03665 s->dsp.diff_pixels(temp, src1, src2, stride);
03666
03667 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03668
03669 bits=0;
03670
03671 if (s->mb_intra) {
03672 start_i = 1;
03673 length = s->intra_ac_vlc_length;
03674 last_length= s->intra_ac_vlc_last_length;
03675 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03676 } else {
03677 start_i = 0;
03678 length = s->inter_ac_vlc_length;
03679 last_length= s->inter_ac_vlc_last_length;
03680 }
03681
03682 if(last>=start_i){
03683 run=0;
03684 for(i=start_i; i<last; i++){
03685 int j= scantable[i];
03686 level= temp[j];
03687
03688 if(level){
03689 level+=64;
03690 if((level&(~127)) == 0){
03691 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03692 }else
03693 bits+= esc_length;
03694 run=0;
03695 }else
03696 run++;
03697 }
03698 i= scantable[last];
03699
03700 level= temp[i] + 64;
03701
03702 assert(level - 64);
03703
03704 if((level&(~127)) == 0){
03705 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03706 }else
03707 bits+= esc_length;
03708 }
03709
03710 return bits;
03711 }
03712
03713 #define VSAD_INTRA(size) \
03714 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03715 int score=0; \
03716 int x,y; \
03717 \
03718 for(y=1; y<h; y++){ \
03719 for(x=0; x<size; x+=4){ \
03720 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
03721 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
03722 } \
03723 s+= stride; \
03724 } \
03725 \
03726 return score; \
03727 }
03728 VSAD_INTRA(8)
03729 VSAD_INTRA(16)
03730
03731 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03732 int score=0;
03733 int x,y;
03734
03735 for(y=1; y<h; y++){
03736 for(x=0; x<16; x++){
03737 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03738 }
03739 s1+= stride;
03740 s2+= stride;
03741 }
03742
03743 return score;
03744 }
03745
03746 #define SQ(a) ((a)*(a))
03747 #define VSSE_INTRA(size) \
03748 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03749 int score=0; \
03750 int x,y; \
03751 \
03752 for(y=1; y<h; y++){ \
03753 for(x=0; x<size; x+=4){ \
03754 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
03755 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
03756 } \
03757 s+= stride; \
03758 } \
03759 \
03760 return score; \
03761 }
03762 VSSE_INTRA(8)
03763 VSSE_INTRA(16)
03764
03765 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03766 int score=0;
03767 int x,y;
03768
03769 for(y=1; y<h; y++){
03770 for(x=0; x<16; x++){
03771 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03772 }
03773 s1+= stride;
03774 s2+= stride;
03775 }
03776
03777 return score;
03778 }
03779
03780 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
03781 int size){
03782 int score=0;
03783 int i;
03784 for(i=0; i<size; i++)
03785 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
03786 return score;
03787 }
03788
03789 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
03790 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
03791 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
03792 #if CONFIG_GPL
03793 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
03794 #endif
03795 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
03796 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
03797 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
03798 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
03799
03800 static void vector_fmul_c(float *dst, const float *src, int len){
03801 int i;
03802 for(i=0; i<len; i++)
03803 dst[i] *= src[i];
03804 }
03805
03806 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
03807 int i;
03808 src1 += len-1;
03809 for(i=0; i<len; i++)
03810 dst[i] = src0[i] * src1[-i];
03811 }
03812
03813 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
03814 int i;
03815 for(i=0; i<len; i++)
03816 dst[i] = src0[i] * src1[i] + src2[i];
03817 }
03818
03819 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
03820 int i,j;
03821 dst += len;
03822 win += len;
03823 src0+= len;
03824 for(i=-len, j=len-1; i<0; i++, j--) {
03825 float s0 = src0[i];
03826 float s1 = src1[j];
03827 float wi = win[i];
03828 float wj = win[j];
03829 dst[i] = s0*wj - s1*wi + add_bias;
03830 dst[j] = s0*wi + s1*wj + add_bias;
03831 }
03832 }
03833
03834 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
03835 int len)
03836 {
03837 int i;
03838 for (i = 0; i < len; i++)
03839 dst[i] = src[i] * mul;
03840 }
03841
03842 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
03843 const float **sv, float mul, int len)
03844 {
03845 int i;
03846 for (i = 0; i < len; i += 2, sv++) {
03847 dst[i ] = src[i ] * sv[0][0] * mul;
03848 dst[i+1] = src[i+1] * sv[0][1] * mul;
03849 }
03850 }
03851
03852 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
03853 const float **sv, float mul, int len)
03854 {
03855 int i;
03856 for (i = 0; i < len; i += 4, sv++) {
03857 dst[i ] = src[i ] * sv[0][0] * mul;
03858 dst[i+1] = src[i+1] * sv[0][1] * mul;
03859 dst[i+2] = src[i+2] * sv[0][2] * mul;
03860 dst[i+3] = src[i+3] * sv[0][3] * mul;
03861 }
03862 }
03863
03864 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
03865 int len)
03866 {
03867 int i;
03868 for (i = 0; i < len; i += 2, sv++) {
03869 dst[i ] = sv[0][0] * mul;
03870 dst[i+1] = sv[0][1] * mul;
03871 }
03872 }
03873
03874 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
03875 int len)
03876 {
03877 int i;
03878 for (i = 0; i < len; i += 4, sv++) {
03879 dst[i ] = sv[0][0] * mul;
03880 dst[i+1] = sv[0][1] * mul;
03881 dst[i+2] = sv[0][2] * mul;
03882 dst[i+3] = sv[0][3] * mul;
03883 }
03884 }
03885
03886 static void butterflies_float_c(float *restrict v1, float *restrict v2,
03887 int len)
03888 {
03889 int i;
03890 for (i = 0; i < len; i++) {
03891 float t = v1[i] - v2[i];
03892 v1[i] += v2[i];
03893 v2[i] = t;
03894 }
03895 }
03896
03897 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
03898 {
03899 float p = 0.0;
03900 int i;
03901
03902 for (i = 0; i < len; i++)
03903 p += v1[i] * v2[i];
03904
03905 return p;
03906 }
03907
03908 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
03909 int i;
03910 for(i=0; i<len; i++)
03911 dst[i] = src[i] * mul;
03912 }
03913
03914 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
03915 uint32_t maxi, uint32_t maxisign)
03916 {
03917
03918 if(a > mini) return mini;
03919 else if((a^(1<<31)) > maxisign) return maxi;
03920 else return a;
03921 }
03922
03923 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
03924 int i;
03925 uint32_t mini = *(uint32_t*)min;
03926 uint32_t maxi = *(uint32_t*)max;
03927 uint32_t maxisign = maxi ^ (1<<31);
03928 uint32_t *dsti = (uint32_t*)dst;
03929 const uint32_t *srci = (const uint32_t*)src;
03930 for(i=0; i<len; i+=8) {
03931 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
03932 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
03933 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
03934 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
03935 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
03936 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
03937 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
03938 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
03939 }
03940 }
03941 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
03942 int i;
03943 if(min < 0 && max > 0) {
03944 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
03945 } else {
03946 for(i=0; i < len; i+=8) {
03947 dst[i ] = av_clipf(src[i ], min, max);
03948 dst[i + 1] = av_clipf(src[i + 1], min, max);
03949 dst[i + 2] = av_clipf(src[i + 2], min, max);
03950 dst[i + 3] = av_clipf(src[i + 3], min, max);
03951 dst[i + 4] = av_clipf(src[i + 4], min, max);
03952 dst[i + 5] = av_clipf(src[i + 5], min, max);
03953 dst[i + 6] = av_clipf(src[i + 6], min, max);
03954 dst[i + 7] = av_clipf(src[i + 7], min, max);
03955 }
03956 }
03957 }
03958
03959 static av_always_inline int float_to_int16_one(const float *src){
03960 int_fast32_t tmp = *(const int32_t*)src;
03961 if(tmp & 0xf0000){
03962 tmp = (0x43c0ffff - tmp)>>31;
03963
03964
03965
03966 }
03967 return tmp - 0x8000;
03968 }
03969
03970 void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
03971 int i;
03972 for(i=0; i<len; i++)
03973 dst[i] = float_to_int16_one(src+i);
03974 }
03975
03976 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
03977 int i,j,c;
03978 if(channels==2){
03979 for(i=0; i<len; i++){
03980 dst[2*i] = float_to_int16_one(src[0]+i);
03981 dst[2*i+1] = float_to_int16_one(src[1]+i);
03982 }
03983 }else{
03984 for(c=0; c<channels; c++)
03985 for(i=0, j=c; i<len; i++, j+=channels)
03986 dst[j] = float_to_int16_one(src[c]+i);
03987 }
03988 }
03989
03990 static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
03991 {
03992 int res = 0;
03993
03994 while (order--)
03995 res += (*v1++ * *v2++) >> shift;
03996
03997 return res;
03998 }
03999
04000 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)
04001 {
04002 int res = 0;
04003 while (order--) {
04004 res += *v1 * *v2++;
04005 *v1++ += mul * *v3++;
04006 }
04007 return res;
04008 }
04009
04010 #define W0 2048
04011 #define W1 2841
04012 #define W2 2676
04013 #define W3 2408
04014 #define W4 2048
04015 #define W5 1609
04016 #define W6 1108
04017 #define W7 565
04018
04019 static void wmv2_idct_row(short * b)
04020 {
04021 int s1,s2;
04022 int a0,a1,a2,a3,a4,a5,a6,a7;
04023
04024 a1 = W1*b[1]+W7*b[7];
04025 a7 = W7*b[1]-W1*b[7];
04026 a5 = W5*b[5]+W3*b[3];
04027 a3 = W3*b[5]-W5*b[3];
04028 a2 = W2*b[2]+W6*b[6];
04029 a6 = W6*b[2]-W2*b[6];
04030 a0 = W0*b[0]+W0*b[4];
04031 a4 = W0*b[0]-W0*b[4];
04032
04033 s1 = (181*(a1-a5+a7-a3)+128)>>8;
04034 s2 = (181*(a1-a5-a7+a3)+128)>>8;
04035
04036 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
04037 b[1] = (a4+a6 +s1 + (1<<7))>>8;
04038 b[2] = (a4-a6 +s2 + (1<<7))>>8;
04039 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
04040 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
04041 b[5] = (a4-a6 -s2 + (1<<7))>>8;
04042 b[6] = (a4+a6 -s1 + (1<<7))>>8;
04043 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
04044 }
04045 static void wmv2_idct_col(short * b)
04046 {
04047 int s1,s2;
04048 int a0,a1,a2,a3,a4,a5,a6,a7;
04049
04050 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
04051 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
04052 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
04053 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
04054 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
04055 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
04056 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
04057 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
04058
04059 s1 = (181*(a1-a5+a7-a3)+128)>>8;
04060 s2 = (181*(a1-a5-a7+a3)+128)>>8;
04061
04062 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
04063 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
04064 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
04065 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
04066
04067 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
04068 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
04069 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
04070 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
04071 }
04072 void ff_wmv2_idct_c(short * block){
04073 int i;
04074
04075 for(i=0;i<64;i+=8){
04076 wmv2_idct_row(block+i);
04077 }
04078 for(i=0;i<8;i++){
04079 wmv2_idct_col(block+i);
04080 }
04081 }
04082
04083
04084 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
04085 {
04086 ff_wmv2_idct_c(block);
04087 put_pixels_clamped_c(block, dest, line_size);
04088 }
04089 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
04090 {
04091 ff_wmv2_idct_c(block);
04092 add_pixels_clamped_c(block, dest, line_size);
04093 }
04094 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
04095 {
04096 j_rev_dct (block);
04097 put_pixels_clamped_c(block, dest, line_size);
04098 }
04099 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
04100 {
04101 j_rev_dct (block);
04102 add_pixels_clamped_c(block, dest, line_size);
04103 }
04104
04105 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
04106 {
04107 j_rev_dct4 (block);
04108 put_pixels_clamped4_c(block, dest, line_size);
04109 }
04110 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
04111 {
04112 j_rev_dct4 (block);
04113 add_pixels_clamped4_c(block, dest, line_size);
04114 }
04115
04116 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
04117 {
04118 j_rev_dct2 (block);
04119 put_pixels_clamped2_c(block, dest, line_size);
04120 }
04121 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
04122 {
04123 j_rev_dct2 (block);
04124 add_pixels_clamped2_c(block, dest, line_size);
04125 }
04126
04127 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
04128 {
04129 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04130
04131 dest[0] = cm[(block[0] + 4)>>3];
04132 }
04133 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
04134 {
04135 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04136
04137 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
04138 }
04139
04140 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
04141
04142
04143 av_cold void dsputil_static_init(void)
04144 {
04145 int i;
04146
04147 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
04148 for(i=0;i<MAX_NEG_CROP;i++) {
04149 ff_cropTbl[i] = 0;
04150 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
04151 }
04152
04153 for(i=0;i<512;i++) {
04154 ff_squareTbl[i] = (i - 256) * (i - 256);
04155 }
04156
04157 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
04158 }
04159
04160 int ff_check_alignment(void){
04161 static int did_fail=0;
04162 DECLARE_ALIGNED(16, int, aligned);
04163
04164 if((intptr_t)&aligned & 15){
04165 if(!did_fail){
04166 #if HAVE_MMX || HAVE_ALTIVEC
04167 av_log(NULL, AV_LOG_ERROR,
04168 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
04169 "and may be very slow or crash. This is not a bug in libavcodec,\n"
04170 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
04171 "Do not report crashes to FFmpeg developers.\n");
04172 #endif
04173 did_fail=1;
04174 }
04175 return -1;
04176 }
04177 return 0;
04178 }
04179
04180 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
04181 {
04182 int i;
04183
04184 ff_check_alignment();
04185
04186 #if CONFIG_ENCODERS
04187 if(avctx->dct_algo==FF_DCT_FASTINT) {
04188 c->fdct = fdct_ifast;
04189 c->fdct248 = fdct_ifast248;
04190 }
04191 else if(avctx->dct_algo==FF_DCT_FAAN) {
04192 c->fdct = ff_faandct;
04193 c->fdct248 = ff_faandct248;
04194 }
04195 else {
04196 c->fdct = ff_jpeg_fdct_islow;
04197 c->fdct248 = ff_fdct248_islow;
04198 }
04199 #endif //CONFIG_ENCODERS
04200
04201 if(avctx->lowres==1){
04202 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
04203 c->idct_put= ff_jref_idct4_put;
04204 c->idct_add= ff_jref_idct4_add;
04205 }else{
04206 c->idct_put= ff_h264_lowres_idct_put_c;
04207 c->idct_add= ff_h264_lowres_idct_add_c;
04208 }
04209 c->idct = j_rev_dct4;
04210 c->idct_permutation_type= FF_NO_IDCT_PERM;
04211 }else if(avctx->lowres==2){
04212 c->idct_put= ff_jref_idct2_put;
04213 c->idct_add= ff_jref_idct2_add;
04214 c->idct = j_rev_dct2;
04215 c->idct_permutation_type= FF_NO_IDCT_PERM;
04216 }else if(avctx->lowres==3){
04217 c->idct_put= ff_jref_idct1_put;
04218 c->idct_add= ff_jref_idct1_add;
04219 c->idct = j_rev_dct1;
04220 c->idct_permutation_type= FF_NO_IDCT_PERM;
04221 }else{
04222 if(avctx->idct_algo==FF_IDCT_INT){
04223 c->idct_put= ff_jref_idct_put;
04224 c->idct_add= ff_jref_idct_add;
04225 c->idct = j_rev_dct;
04226 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
04227 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
04228 avctx->idct_algo==FF_IDCT_VP3){
04229 c->idct_put= ff_vp3_idct_put_c;
04230 c->idct_add= ff_vp3_idct_add_c;
04231 c->idct = ff_vp3_idct_c;
04232 c->idct_permutation_type= FF_NO_IDCT_PERM;
04233 }else if(avctx->idct_algo==FF_IDCT_WMV2){
04234 c->idct_put= ff_wmv2_idct_put_c;
04235 c->idct_add= ff_wmv2_idct_add_c;
04236 c->idct = ff_wmv2_idct_c;
04237 c->idct_permutation_type= FF_NO_IDCT_PERM;
04238 }else if(avctx->idct_algo==FF_IDCT_FAAN){
04239 c->idct_put= ff_faanidct_put;
04240 c->idct_add= ff_faanidct_add;
04241 c->idct = ff_faanidct;
04242 c->idct_permutation_type= FF_NO_IDCT_PERM;
04243 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
04244 c->idct_put= ff_ea_idct_put_c;
04245 c->idct_permutation_type= FF_NO_IDCT_PERM;
04246 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
04247 c->idct = ff_bink_idct_c;
04248 c->idct_add = ff_bink_idct_add_c;
04249 c->idct_put = ff_bink_idct_put_c;
04250 c->idct_permutation_type = FF_NO_IDCT_PERM;
04251 }else{
04252 c->idct_put= ff_simple_idct_put;
04253 c->idct_add= ff_simple_idct_add;
04254 c->idct = ff_simple_idct;
04255 c->idct_permutation_type= FF_NO_IDCT_PERM;
04256 }
04257 }
04258
04259 c->get_pixels = get_pixels_c;
04260 c->diff_pixels = diff_pixels_c;
04261 c->put_pixels_clamped = put_pixels_clamped_c;
04262 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
04263 c->put_pixels_nonclamped = put_pixels_nonclamped_c;
04264 c->add_pixels_clamped = add_pixels_clamped_c;
04265 c->add_pixels8 = add_pixels8_c;
04266 c->add_pixels4 = add_pixels4_c;
04267 c->sum_abs_dctelem = sum_abs_dctelem_c;
04268 c->gmc1 = gmc1_c;
04269 c->gmc = ff_gmc_c;
04270 c->clear_block = clear_block_c;
04271 c->clear_blocks = clear_blocks_c;
04272 c->pix_sum = pix_sum_c;
04273 c->pix_norm1 = pix_norm1_c;
04274
04275 c->fill_block_tab[0] = fill_block16_c;
04276 c->fill_block_tab[1] = fill_block8_c;
04277 c->scale_block = scale_block_c;
04278
04279
04280 c->pix_abs[0][0] = pix_abs16_c;
04281 c->pix_abs[0][1] = pix_abs16_x2_c;
04282 c->pix_abs[0][2] = pix_abs16_y2_c;
04283 c->pix_abs[0][3] = pix_abs16_xy2_c;
04284 c->pix_abs[1][0] = pix_abs8_c;
04285 c->pix_abs[1][1] = pix_abs8_x2_c;
04286 c->pix_abs[1][2] = pix_abs8_y2_c;
04287 c->pix_abs[1][3] = pix_abs8_xy2_c;
04288
04289 #define dspfunc(PFX, IDX, NUM) \
04290 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
04291 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
04292 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
04293 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
04294
04295 dspfunc(put, 0, 16);
04296 dspfunc(put_no_rnd, 0, 16);
04297 dspfunc(put, 1, 8);
04298 dspfunc(put_no_rnd, 1, 8);
04299 dspfunc(put, 2, 4);
04300 dspfunc(put, 3, 2);
04301
04302 dspfunc(avg, 0, 16);
04303 dspfunc(avg_no_rnd, 0, 16);
04304 dspfunc(avg, 1, 8);
04305 dspfunc(avg_no_rnd, 1, 8);
04306 dspfunc(avg, 2, 4);
04307 dspfunc(avg, 3, 2);
04308 #undef dspfunc
04309
04310 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
04311 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
04312
04313 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
04314 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
04315 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
04316 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
04317 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
04318 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
04319 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
04320 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
04321 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
04322
04323 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
04324 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
04325 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
04326 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
04327 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
04328 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
04329 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
04330 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
04331 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
04332
04333 #define dspfunc(PFX, IDX, NUM) \
04334 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
04335 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
04336 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
04337 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
04338 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
04339 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
04340 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
04341 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
04342 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
04343 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
04344 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
04345 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
04346 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
04347 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
04348 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
04349 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
04350
04351 dspfunc(put_qpel, 0, 16);
04352 dspfunc(put_no_rnd_qpel, 0, 16);
04353
04354 dspfunc(avg_qpel, 0, 16);
04355
04356
04357 dspfunc(put_qpel, 1, 8);
04358 dspfunc(put_no_rnd_qpel, 1, 8);
04359
04360 dspfunc(avg_qpel, 1, 8);
04361
04362
04363 dspfunc(put_h264_qpel, 0, 16);
04364 dspfunc(put_h264_qpel, 1, 8);
04365 dspfunc(put_h264_qpel, 2, 4);
04366 dspfunc(put_h264_qpel, 3, 2);
04367 dspfunc(avg_h264_qpel, 0, 16);
04368 dspfunc(avg_h264_qpel, 1, 8);
04369 dspfunc(avg_h264_qpel, 2, 4);
04370
04371 #undef dspfunc
04372 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
04373 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
04374 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
04375 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
04376 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
04377 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
04378 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
04379 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
04380
04381 c->draw_edges = draw_edges_c;
04382
04383 #if CONFIG_CAVS_DECODER
04384 ff_cavsdsp_init(c,avctx);
04385 #endif
04386
04387 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
04388 ff_mlp_init(c, avctx);
04389 #endif
04390 #if CONFIG_VC1_DECODER
04391 ff_vc1dsp_init(c,avctx);
04392 #endif
04393 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
04394 ff_intrax8dsp_init(c,avctx);
04395 #endif
04396 #if CONFIG_RV30_DECODER
04397 ff_rv30dsp_init(c,avctx);
04398 #endif
04399 #if CONFIG_RV40_DECODER
04400 ff_rv40dsp_init(c,avctx);
04401 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
04402 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
04403 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
04404 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
04405 #endif
04406
04407 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
04408 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
04409 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
04410 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
04411 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
04412 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
04413 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
04414 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
04415
04416 #define SET_CMP_FUNC(name) \
04417 c->name[0]= name ## 16_c;\
04418 c->name[1]= name ## 8x8_c;
04419
04420 SET_CMP_FUNC(hadamard8_diff)
04421 c->hadamard8_diff[4]= hadamard8_intra16_c;
04422 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
04423 SET_CMP_FUNC(dct_sad)
04424 SET_CMP_FUNC(dct_max)
04425 #if CONFIG_GPL
04426 SET_CMP_FUNC(dct264_sad)
04427 #endif
04428 c->sad[0]= pix_abs16_c;
04429 c->sad[1]= pix_abs8_c;
04430 c->sse[0]= sse16_c;
04431 c->sse[1]= sse8_c;
04432 c->sse[2]= sse4_c;
04433 SET_CMP_FUNC(quant_psnr)
04434 SET_CMP_FUNC(rd)
04435 SET_CMP_FUNC(bit)
04436 c->vsad[0]= vsad16_c;
04437 c->vsad[4]= vsad_intra16_c;
04438 c->vsad[5]= vsad_intra8_c;
04439 c->vsse[0]= vsse16_c;
04440 c->vsse[4]= vsse_intra16_c;
04441 c->vsse[5]= vsse_intra8_c;
04442 c->nsse[0]= nsse16_c;
04443 c->nsse[1]= nsse8_c;
04444 #if CONFIG_DWT
04445 ff_dsputil_init_dwt(c);
04446 #endif
04447
04448 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
04449
04450 c->add_bytes= add_bytes_c;
04451 c->add_bytes_l2= add_bytes_l2_c;
04452 c->diff_bytes= diff_bytes_c;
04453 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
04454 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
04455 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
04456 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
04457 c->bswap_buf= bswap_buf;
04458 #if CONFIG_PNG_DECODER
04459 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
04460 #endif
04461
04462 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
04463 c->h263_h_loop_filter= h263_h_loop_filter_c;
04464 c->h263_v_loop_filter= h263_v_loop_filter_c;
04465 }
04466
04467 if (CONFIG_VP3_DECODER) {
04468 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
04469 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
04470 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
04471 }
04472 if (CONFIG_VP6_DECODER) {
04473 c->vp6_filter_diag4= ff_vp6_filter_diag4_c;
04474 }
04475
04476 c->h261_loop_filter= h261_loop_filter_c;
04477
04478 c->try_8x8basis= try_8x8basis_c;
04479 c->add_8x8basis= add_8x8basis_c;
04480
04481 #if CONFIG_VORBIS_DECODER
04482 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
04483 #endif
04484 #if CONFIG_AC3_DECODER
04485 c->ac3_downmix = ff_ac3_downmix_c;
04486 #endif
04487 #if CONFIG_LPC
04488 c->lpc_compute_autocorr = ff_lpc_compute_autocorr;
04489 #endif
04490 c->vector_fmul = vector_fmul_c;
04491 c->vector_fmul_reverse = vector_fmul_reverse_c;
04492 c->vector_fmul_add = vector_fmul_add_c;
04493 c->vector_fmul_window = ff_vector_fmul_window_c;
04494 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
04495 c->vector_clipf = vector_clipf_c;
04496 c->float_to_int16 = ff_float_to_int16_c;
04497 c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
04498 c->scalarproduct_int16 = scalarproduct_int16_c;
04499 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
04500 c->scalarproduct_float = scalarproduct_float_c;
04501 c->butterflies_float = butterflies_float_c;
04502 c->vector_fmul_scalar = vector_fmul_scalar_c;
04503
04504 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
04505 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
04506
04507 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
04508 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
04509
04510 c->shrink[0]= ff_img_copy_plane;
04511 c->shrink[1]= ff_shrink22;
04512 c->shrink[2]= ff_shrink44;
04513 c->shrink[3]= ff_shrink88;
04514
04515 c->prefetch= just_return;
04516
04517 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
04518 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
04519
04520 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
04521 if (ARCH_ARM) dsputil_init_arm (c, avctx);
04522 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
04523 if (HAVE_VIS) dsputil_init_vis (c, avctx);
04524 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
04525 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
04526 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
04527 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
04528 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
04529
04530 for(i=0; i<64; i++){
04531 if(!c->put_2tap_qpel_pixels_tab[0][i])
04532 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
04533 if(!c->avg_2tap_qpel_pixels_tab[0][i])
04534 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
04535 }
04536
04537 switch(c->idct_permutation_type){
04538 case FF_NO_IDCT_PERM:
04539 for(i=0; i<64; i++)
04540 c->idct_permutation[i]= i;
04541 break;
04542 case FF_LIBMPEG2_IDCT_PERM:
04543 for(i=0; i<64; i++)
04544 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
04545 break;
04546 case FF_SIMPLE_IDCT_PERM:
04547 for(i=0; i<64; i++)
04548 c->idct_permutation[i]= simple_mmx_permutation[i];
04549 break;
04550 case FF_TRANSPOSE_IDCT_PERM:
04551 for(i=0; i<64; i++)
04552 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
04553 break;
04554 case FF_PARTTRANS_IDCT_PERM:
04555 for(i=0; i<64; i++)
04556 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
04557 break;
04558 case FF_SSE2_IDCT_PERM:
04559 for(i=0; i<64; i++)
04560 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
04561 break;
04562 default:
04563 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
04564 }
04565 }
04566