00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00028
00029
00030
00031
00032 #include "avcodec.h"
00033 #include "dsputil.h"
00034 #include "mathops.h"
00035 #include "simple_idct.h"
00036
00037 #if 0
00038 #define W1 2841
00039 #define W2 2676
00040 #define W3 2408
00041 #define W4 2048
00042 #define W5 1609
00043 #define W6 1108
00044 #define W7 565
00045 #define ROW_SHIFT 8
00046 #define COL_SHIFT 17
00047 #else
00048 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00049 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00050 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00051 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00052 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00053 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00054 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00055 #define ROW_SHIFT 11
00056 #define COL_SHIFT 20 // 6
00057 #endif
00058
00059 static inline void idctRowCondDC (DCTELEM * row)
00060 {
00061 int a0, a1, a2, a3, b0, b1, b2, b3;
00062 #if HAVE_FAST_64BIT
00063 uint64_t temp;
00064 #else
00065 uint32_t temp;
00066 #endif
00067
00068 #if HAVE_FAST_64BIT
00069 #if HAVE_BIGENDIAN
00070 #define ROW0_MASK 0xffff000000000000LL
00071 #else
00072 #define ROW0_MASK 0xffffLL
00073 #endif
00074 if(sizeof(DCTELEM)==2){
00075 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
00076 ((uint64_t *)row)[1]) == 0) {
00077 temp = (row[0] << 3) & 0xffff;
00078 temp += temp << 16;
00079 temp += temp << 32;
00080 ((uint64_t *)row)[0] = temp;
00081 ((uint64_t *)row)[1] = temp;
00082 return;
00083 }
00084 }else{
00085 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
00086 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
00087 return;
00088 }
00089 }
00090 #else
00091 if(sizeof(DCTELEM)==2){
00092 if (!(((uint32_t*)row)[1] |
00093 ((uint32_t*)row)[2] |
00094 ((uint32_t*)row)[3] |
00095 row[1])) {
00096 temp = (row[0] << 3) & 0xffff;
00097 temp += temp << 16;
00098 ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
00099 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
00100 return;
00101 }
00102 }else{
00103 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
00104 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
00105 return;
00106 }
00107 }
00108 #endif
00109
00110 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
00111 a1 = a0;
00112 a2 = a0;
00113 a3 = a0;
00114
00115
00116 a0 += W2 * row[2];
00117 a1 += W6 * row[2];
00118 a2 -= W6 * row[2];
00119 a3 -= W2 * row[2];
00120
00121 b0 = MUL16(W1, row[1]);
00122 MAC16(b0, W3, row[3]);
00123 b1 = MUL16(W3, row[1]);
00124 MAC16(b1, -W7, row[3]);
00125 b2 = MUL16(W5, row[1]);
00126 MAC16(b2, -W1, row[3]);
00127 b3 = MUL16(W7, row[1]);
00128 MAC16(b3, -W5, row[3]);
00129
00130 #if HAVE_FAST_64BIT
00131 temp = ((uint64_t*)row)[1];
00132 #else
00133 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
00134 #endif
00135 if (temp != 0) {
00136 a0 += W4*row[4] + W6*row[6];
00137 a1 += - W4*row[4] - W2*row[6];
00138 a2 += - W4*row[4] + W2*row[6];
00139 a3 += W4*row[4] - W6*row[6];
00140
00141 MAC16(b0, W5, row[5]);
00142 MAC16(b0, W7, row[7]);
00143
00144 MAC16(b1, -W1, row[5]);
00145 MAC16(b1, -W5, row[7]);
00146
00147 MAC16(b2, W7, row[5]);
00148 MAC16(b2, W3, row[7]);
00149
00150 MAC16(b3, W3, row[5]);
00151 MAC16(b3, -W1, row[7]);
00152 }
00153
00154 row[0] = (a0 + b0) >> ROW_SHIFT;
00155 row[7] = (a0 - b0) >> ROW_SHIFT;
00156 row[1] = (a1 + b1) >> ROW_SHIFT;
00157 row[6] = (a1 - b1) >> ROW_SHIFT;
00158 row[2] = (a2 + b2) >> ROW_SHIFT;
00159 row[5] = (a2 - b2) >> ROW_SHIFT;
00160 row[3] = (a3 + b3) >> ROW_SHIFT;
00161 row[4] = (a3 - b3) >> ROW_SHIFT;
00162 }
00163
00164 static inline void idctSparseColPut (uint8_t *dest, int line_size,
00165 DCTELEM * col)
00166 {
00167 int a0, a1, a2, a3, b0, b1, b2, b3;
00168 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00169
00170
00171 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
00172 a1 = a0;
00173 a2 = a0;
00174 a3 = a0;
00175
00176 a0 += + W2*col[8*2];
00177 a1 += + W6*col[8*2];
00178 a2 += - W6*col[8*2];
00179 a3 += - W2*col[8*2];
00180
00181 b0 = MUL16(W1, col[8*1]);
00182 b1 = MUL16(W3, col[8*1]);
00183 b2 = MUL16(W5, col[8*1]);
00184 b3 = MUL16(W7, col[8*1]);
00185
00186 MAC16(b0, + W3, col[8*3]);
00187 MAC16(b1, - W7, col[8*3]);
00188 MAC16(b2, - W1, col[8*3]);
00189 MAC16(b3, - W5, col[8*3]);
00190
00191 if(col[8*4]){
00192 a0 += + W4*col[8*4];
00193 a1 += - W4*col[8*4];
00194 a2 += - W4*col[8*4];
00195 a3 += + W4*col[8*4];
00196 }
00197
00198 if (col[8*5]) {
00199 MAC16(b0, + W5, col[8*5]);
00200 MAC16(b1, - W1, col[8*5]);
00201 MAC16(b2, + W7, col[8*5]);
00202 MAC16(b3, + W3, col[8*5]);
00203 }
00204
00205 if(col[8*6]){
00206 a0 += + W6*col[8*6];
00207 a1 += - W2*col[8*6];
00208 a2 += + W2*col[8*6];
00209 a3 += - W6*col[8*6];
00210 }
00211
00212 if (col[8*7]) {
00213 MAC16(b0, + W7, col[8*7]);
00214 MAC16(b1, - W5, col[8*7]);
00215 MAC16(b2, + W3, col[8*7]);
00216 MAC16(b3, - W1, col[8*7]);
00217 }
00218
00219 dest[0] = cm[(a0 + b0) >> COL_SHIFT];
00220 dest += line_size;
00221 dest[0] = cm[(a1 + b1) >> COL_SHIFT];
00222 dest += line_size;
00223 dest[0] = cm[(a2 + b2) >> COL_SHIFT];
00224 dest += line_size;
00225 dest[0] = cm[(a3 + b3) >> COL_SHIFT];
00226 dest += line_size;
00227 dest[0] = cm[(a3 - b3) >> COL_SHIFT];
00228 dest += line_size;
00229 dest[0] = cm[(a2 - b2) >> COL_SHIFT];
00230 dest += line_size;
00231 dest[0] = cm[(a1 - b1) >> COL_SHIFT];
00232 dest += line_size;
00233 dest[0] = cm[(a0 - b0) >> COL_SHIFT];
00234 }
00235
00236 static inline void idctSparseColAdd (uint8_t *dest, int line_size,
00237 DCTELEM * col)
00238 {
00239 int a0, a1, a2, a3, b0, b1, b2, b3;
00240 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00241
00242
00243 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
00244 a1 = a0;
00245 a2 = a0;
00246 a3 = a0;
00247
00248 a0 += + W2*col[8*2];
00249 a1 += + W6*col[8*2];
00250 a2 += - W6*col[8*2];
00251 a3 += - W2*col[8*2];
00252
00253 b0 = MUL16(W1, col[8*1]);
00254 b1 = MUL16(W3, col[8*1]);
00255 b2 = MUL16(W5, col[8*1]);
00256 b3 = MUL16(W7, col[8*1]);
00257
00258 MAC16(b0, + W3, col[8*3]);
00259 MAC16(b1, - W7, col[8*3]);
00260 MAC16(b2, - W1, col[8*3]);
00261 MAC16(b3, - W5, col[8*3]);
00262
00263 if(col[8*4]){
00264 a0 += + W4*col[8*4];
00265 a1 += - W4*col[8*4];
00266 a2 += - W4*col[8*4];
00267 a3 += + W4*col[8*4];
00268 }
00269
00270 if (col[8*5]) {
00271 MAC16(b0, + W5, col[8*5]);
00272 MAC16(b1, - W1, col[8*5]);
00273 MAC16(b2, + W7, col[8*5]);
00274 MAC16(b3, + W3, col[8*5]);
00275 }
00276
00277 if(col[8*6]){
00278 a0 += + W6*col[8*6];
00279 a1 += - W2*col[8*6];
00280 a2 += + W2*col[8*6];
00281 a3 += - W6*col[8*6];
00282 }
00283
00284 if (col[8*7]) {
00285 MAC16(b0, + W7, col[8*7]);
00286 MAC16(b1, - W5, col[8*7]);
00287 MAC16(b2, + W3, col[8*7]);
00288 MAC16(b3, - W1, col[8*7]);
00289 }
00290
00291 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
00292 dest += line_size;
00293 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
00294 dest += line_size;
00295 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
00296 dest += line_size;
00297 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
00298 dest += line_size;
00299 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
00300 dest += line_size;
00301 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
00302 dest += line_size;
00303 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
00304 dest += line_size;
00305 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
00306 }
00307
00308 static inline void idctSparseCol (DCTELEM * col)
00309 {
00310 int a0, a1, a2, a3, b0, b1, b2, b3;
00311
00312
00313 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
00314 a1 = a0;
00315 a2 = a0;
00316 a3 = a0;
00317
00318 a0 += + W2*col[8*2];
00319 a1 += + W6*col[8*2];
00320 a2 += - W6*col[8*2];
00321 a3 += - W2*col[8*2];
00322
00323 b0 = MUL16(W1, col[8*1]);
00324 b1 = MUL16(W3, col[8*1]);
00325 b2 = MUL16(W5, col[8*1]);
00326 b3 = MUL16(W7, col[8*1]);
00327
00328 MAC16(b0, + W3, col[8*3]);
00329 MAC16(b1, - W7, col[8*3]);
00330 MAC16(b2, - W1, col[8*3]);
00331 MAC16(b3, - W5, col[8*3]);
00332
00333 if(col[8*4]){
00334 a0 += + W4*col[8*4];
00335 a1 += - W4*col[8*4];
00336 a2 += - W4*col[8*4];
00337 a3 += + W4*col[8*4];
00338 }
00339
00340 if (col[8*5]) {
00341 MAC16(b0, + W5, col[8*5]);
00342 MAC16(b1, - W1, col[8*5]);
00343 MAC16(b2, + W7, col[8*5]);
00344 MAC16(b3, + W3, col[8*5]);
00345 }
00346
00347 if(col[8*6]){
00348 a0 += + W6*col[8*6];
00349 a1 += - W2*col[8*6];
00350 a2 += + W2*col[8*6];
00351 a3 += - W6*col[8*6];
00352 }
00353
00354 if (col[8*7]) {
00355 MAC16(b0, + W7, col[8*7]);
00356 MAC16(b1, - W5, col[8*7]);
00357 MAC16(b2, + W3, col[8*7]);
00358 MAC16(b3, - W1, col[8*7]);
00359 }
00360
00361 col[0 ] = ((a0 + b0) >> COL_SHIFT);
00362 col[8 ] = ((a1 + b1) >> COL_SHIFT);
00363 col[16] = ((a2 + b2) >> COL_SHIFT);
00364 col[24] = ((a3 + b3) >> COL_SHIFT);
00365 col[32] = ((a3 - b3) >> COL_SHIFT);
00366 col[40] = ((a2 - b2) >> COL_SHIFT);
00367 col[48] = ((a1 - b1) >> COL_SHIFT);
00368 col[56] = ((a0 - b0) >> COL_SHIFT);
00369 }
00370
00371 void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
00372 {
00373 int i;
00374 for(i=0; i<8; i++)
00375 idctRowCondDC(block + i*8);
00376
00377 for(i=0; i<8; i++)
00378 idctSparseColPut(dest + i, line_size, block + i);
00379 }
00380
00381 void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
00382 {
00383 int i;
00384 for(i=0; i<8; i++)
00385 idctRowCondDC(block + i*8);
00386
00387 for(i=0; i<8; i++)
00388 idctSparseColAdd(dest + i, line_size, block + i);
00389 }
00390
00391 void ff_simple_idct(DCTELEM *block)
00392 {
00393 int i;
00394 for(i=0; i<8; i++)
00395 idctRowCondDC(block + i*8);
00396
00397 for(i=0; i<8; i++)
00398 idctSparseCol(block + i);
00399 }
00400
00401
00402
00403 #define CN_SHIFT 12
00404 #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
00405 #define C1 C_FIX(0.6532814824)
00406 #define C2 C_FIX(0.2705980501)
00407
00408
00409
00410 #define C_SHIFT (4+1+12)
00411
00412 static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col)
00413 {
00414 int c0, c1, c2, c3, a0, a1, a2, a3;
00415 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00416
00417 a0 = col[8*0];
00418 a1 = col[8*2];
00419 a2 = col[8*4];
00420 a3 = col[8*6];
00421 c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
00422 c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
00423 c1 = a1 * C1 + a3 * C2;
00424 c3 = a1 * C2 - a3 * C1;
00425 dest[0] = cm[(c0 + c1) >> C_SHIFT];
00426 dest += line_size;
00427 dest[0] = cm[(c2 + c3) >> C_SHIFT];
00428 dest += line_size;
00429 dest[0] = cm[(c2 - c3) >> C_SHIFT];
00430 dest += line_size;
00431 dest[0] = cm[(c0 - c1) >> C_SHIFT];
00432 }
00433
00434 #define BF(k) \
00435 {\
00436 int a0, a1;\
00437 a0 = ptr[k];\
00438 a1 = ptr[8 + k];\
00439 ptr[k] = a0 + a1;\
00440 ptr[8 + k] = a0 - a1;\
00441 }
00442
00443
00444
00445
00446
00447
00448
00449 void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
00450 {
00451 int i;
00452 DCTELEM *ptr;
00453
00454
00455 ptr = block;
00456 for(i=0;i<4;i++) {
00457 BF(0);
00458 BF(1);
00459 BF(2);
00460 BF(3);
00461 BF(4);
00462 BF(5);
00463 BF(6);
00464 BF(7);
00465 ptr += 2 * 8;
00466 }
00467
00468
00469 for(i=0; i<8; i++) {
00470 idctRowCondDC(block + i*8);
00471 }
00472
00473
00474 for(i=0;i<8;i++) {
00475 idct4col_put(dest + i, 2 * line_size, block + i);
00476 idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i);
00477 }
00478 }
00479
00480
00481 #undef CN_SHIFT
00482 #undef C_SHIFT
00483 #undef C_FIX
00484 #undef C1
00485 #undef C2
00486 #define CN_SHIFT 12
00487 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
00488 #define C1 C_FIX(0.6532814824)
00489 #define C2 C_FIX(0.2705980501)
00490 #define C3 C_FIX(0.5)
00491 #define C_SHIFT (4+1+12)
00492 static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
00493 {
00494 int c0, c1, c2, c3, a0, a1, a2, a3;
00495 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00496
00497 a0 = col[8*0];
00498 a1 = col[8*1];
00499 a2 = col[8*2];
00500 a3 = col[8*3];
00501 c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
00502 c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
00503 c1 = a1 * C1 + a3 * C2;
00504 c3 = a1 * C2 - a3 * C1;
00505 dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
00506 dest += line_size;
00507 dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
00508 dest += line_size;
00509 dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
00510 dest += line_size;
00511 dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
00512 }
00513
00514 #define RN_SHIFT 15
00515 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
00516 #define R1 R_FIX(0.6532814824)
00517 #define R2 R_FIX(0.2705980501)
00518 #define R3 R_FIX(0.5)
00519 #define R_SHIFT 11
00520 static inline void idct4row(DCTELEM *row)
00521 {
00522 int c0, c1, c2, c3, a0, a1, a2, a3;
00523
00524
00525 a0 = row[0];
00526 a1 = row[1];
00527 a2 = row[2];
00528 a3 = row[3];
00529 c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
00530 c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
00531 c1 = a1 * R1 + a3 * R2;
00532 c3 = a1 * R2 - a3 * R1;
00533 row[0]= (c0 + c1) >> R_SHIFT;
00534 row[1]= (c2 + c3) >> R_SHIFT;
00535 row[2]= (c2 - c3) >> R_SHIFT;
00536 row[3]= (c0 - c1) >> R_SHIFT;
00537 }
00538
00539 void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
00540 {
00541 int i;
00542
00543
00544 for(i=0; i<4; i++) {
00545 idctRowCondDC(block + i*8);
00546 }
00547
00548
00549 for(i=0;i<8;i++) {
00550 idct4col_add(dest + i, line_size, block + i);
00551 }
00552 }
00553
00554 void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
00555 {
00556 int i;
00557
00558
00559 for(i=0; i<8; i++) {
00560 idct4row(block + i*8);
00561 }
00562
00563
00564 for(i=0; i<4; i++){
00565 idctSparseColAdd(dest + i, line_size, block + i);
00566 }
00567 }
00568
00569 void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
00570 {
00571 int i;
00572
00573
00574 for(i=0; i<4; i++) {
00575 idct4row(block + i*8);
00576 }
00577
00578
00579 for(i=0; i<4; i++){
00580 idct4col_add(dest + i, line_size, block + i);
00581 }
00582 }