00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include "mathops.h"
00029
00030 #include "bit_depth_template.c"
00031
00032 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
00033 pixel *src = (pixel*)_src;
00034 int stride = _stride>>(sizeof(pixel)-1);
00035 const pixel4 a= AV_RN4PA(src-stride);
00036
00037 AV_WN4PA(src+0*stride, a);
00038 AV_WN4PA(src+1*stride, a);
00039 AV_WN4PA(src+2*stride, a);
00040 AV_WN4PA(src+3*stride, a);
00041 }
00042
00043 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){
00044 pixel *src = (pixel*)_src;
00045 int stride = _stride>>(sizeof(pixel)-1);
00046 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
00047 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
00048 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
00049 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
00050 }
00051
00052 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00053 pixel *src = (pixel*)_src;
00054 int stride = _stride>>(sizeof(pixel)-1);
00055 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
00056 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
00057 const pixel4 a = PIXEL_SPLAT_X4(dc);
00058
00059 AV_WN4PA(src+0*stride, a);
00060 AV_WN4PA(src+1*stride, a);
00061 AV_WN4PA(src+2*stride, a);
00062 AV_WN4PA(src+3*stride, a);
00063 }
00064
00065 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00066 pixel *src = (pixel*)_src;
00067 int stride = _stride>>(sizeof(pixel)-1);
00068 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
00069 const pixel4 a = PIXEL_SPLAT_X4(dc);
00070
00071 AV_WN4PA(src+0*stride, a);
00072 AV_WN4PA(src+1*stride, a);
00073 AV_WN4PA(src+2*stride, a);
00074 AV_WN4PA(src+3*stride, a);
00075 }
00076
00077 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00078 pixel *src = (pixel*)_src;
00079 int stride = _stride>>(sizeof(pixel)-1);
00080 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
00081 const pixel4 a = PIXEL_SPLAT_X4(dc);
00082
00083 AV_WN4PA(src+0*stride, a);
00084 AV_WN4PA(src+1*stride, a);
00085 AV_WN4PA(src+2*stride, a);
00086 AV_WN4PA(src+3*stride, a);
00087 }
00088
00089 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00090 pixel *src = (pixel*)_src;
00091 int stride = _stride>>(sizeof(pixel)-1);
00092 const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
00093
00094 AV_WN4PA(src+0*stride, a);
00095 AV_WN4PA(src+1*stride, a);
00096 AV_WN4PA(src+2*stride, a);
00097 AV_WN4PA(src+3*stride, a);
00098 }
00099
00100 static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00101 pixel *src = (pixel*)_src;
00102 int stride = _stride>>(sizeof(pixel)-1);
00103 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
00104
00105 AV_WN4PA(src+0*stride, a);
00106 AV_WN4PA(src+1*stride, a);
00107 AV_WN4PA(src+2*stride, a);
00108 AV_WN4PA(src+3*stride, a);
00109 }
00110
00111 static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00112 pixel *src = (pixel*)_src;
00113 int stride = _stride>>(sizeof(pixel)-1);
00114 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
00115
00116 AV_WN4PA(src+0*stride, a);
00117 AV_WN4PA(src+1*stride, a);
00118 AV_WN4PA(src+2*stride, a);
00119 AV_WN4PA(src+3*stride, a);
00120 }
00121
00122
00123 #define LOAD_TOP_RIGHT_EDGE\
00124 const unsigned av_unused t4 = topright[0];\
00125 const unsigned av_unused t5 = topright[1];\
00126 const unsigned av_unused t6 = topright[2];\
00127 const unsigned av_unused t7 = topright[3];\
00128
00129 #define LOAD_DOWN_LEFT_EDGE\
00130 const unsigned av_unused l4 = src[-1+4*stride];\
00131 const unsigned av_unused l5 = src[-1+5*stride];\
00132 const unsigned av_unused l6 = src[-1+6*stride];\
00133 const unsigned av_unused l7 = src[-1+7*stride];\
00134
00135 #define LOAD_LEFT_EDGE\
00136 const unsigned av_unused l0 = src[-1+0*stride];\
00137 const unsigned av_unused l1 = src[-1+1*stride];\
00138 const unsigned av_unused l2 = src[-1+2*stride];\
00139 const unsigned av_unused l3 = src[-1+3*stride];\
00140
00141 #define LOAD_TOP_EDGE\
00142 const unsigned av_unused t0 = src[ 0-1*stride];\
00143 const unsigned av_unused t1 = src[ 1-1*stride];\
00144 const unsigned av_unused t2 = src[ 2-1*stride];\
00145 const unsigned av_unused t3 = src[ 3-1*stride];\
00146
00147 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){
00148 pixel *src = (pixel*)_src;
00149 int stride = _stride>>(sizeof(pixel)-1);
00150 const int lt= src[-1-1*stride];
00151 LOAD_TOP_EDGE
00152 LOAD_LEFT_EDGE
00153
00154 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
00155 src[0+2*stride]=
00156 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
00157 src[0+1*stride]=
00158 src[1+2*stride]=
00159 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
00160 src[0+0*stride]=
00161 src[1+1*stride]=
00162 src[2+2*stride]=
00163 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
00164 src[1+0*stride]=
00165 src[2+1*stride]=
00166 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
00167 src[2+0*stride]=
00168 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00169 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00170 }
00171
00172 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
00173 pixel *src = (pixel*)_src;
00174 const pixel *topright = (const pixel*)_topright;
00175 int stride = _stride>>(sizeof(pixel)-1);
00176 LOAD_TOP_EDGE
00177 LOAD_TOP_RIGHT_EDGE
00178
00179
00180 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
00181 src[1+0*stride]=
00182 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
00183 src[2+0*stride]=
00184 src[1+1*stride]=
00185 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
00186 src[3+0*stride]=
00187 src[2+1*stride]=
00188 src[1+2*stride]=
00189 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
00190 src[3+1*stride]=
00191 src[2+2*stride]=
00192 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
00193 src[3+2*stride]=
00194 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
00195 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
00196 }
00197
00198 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){
00199 pixel *src = (pixel*)_src;
00200 int stride = _stride>>(sizeof(pixel)-1);
00201 const int lt= src[-1-1*stride];
00202 LOAD_TOP_EDGE
00203 LOAD_LEFT_EDGE
00204
00205 src[0+0*stride]=
00206 src[1+2*stride]=(lt + t0 + 1)>>1;
00207 src[1+0*stride]=
00208 src[2+2*stride]=(t0 + t1 + 1)>>1;
00209 src[2+0*stride]=
00210 src[3+2*stride]=(t1 + t2 + 1)>>1;
00211 src[3+0*stride]=(t2 + t3 + 1)>>1;
00212 src[0+1*stride]=
00213 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
00214 src[1+1*stride]=
00215 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
00216 src[2+1*stride]=
00217 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00218 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00219 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
00220 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
00221 }
00222
00223 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
00224 pixel *src = (pixel*)_src;
00225 const pixel *topright = (const pixel*)_topright;
00226 int stride = _stride>>(sizeof(pixel)-1);
00227 LOAD_TOP_EDGE
00228 LOAD_TOP_RIGHT_EDGE
00229
00230 src[0+0*stride]=(t0 + t1 + 1)>>1;
00231 src[1+0*stride]=
00232 src[0+2*stride]=(t1 + t2 + 1)>>1;
00233 src[2+0*stride]=
00234 src[1+2*stride]=(t2 + t3 + 1)>>1;
00235 src[3+0*stride]=
00236 src[2+2*stride]=(t3 + t4+ 1)>>1;
00237 src[3+2*stride]=(t4 + t5+ 1)>>1;
00238 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00239 src[1+1*stride]=
00240 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00241 src[2+1*stride]=
00242 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
00243 src[3+1*stride]=
00244 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
00245 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
00246 }
00247
00248 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){
00249 pixel *src = (pixel*)_src;
00250 int stride = _stride>>(sizeof(pixel)-1);
00251 LOAD_LEFT_EDGE
00252
00253 src[0+0*stride]=(l0 + l1 + 1)>>1;
00254 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
00255 src[2+0*stride]=
00256 src[0+1*stride]=(l1 + l2 + 1)>>1;
00257 src[3+0*stride]=
00258 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
00259 src[2+1*stride]=
00260 src[0+2*stride]=(l2 + l3 + 1)>>1;
00261 src[3+1*stride]=
00262 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
00263 src[3+2*stride]=
00264 src[1+3*stride]=
00265 src[0+3*stride]=
00266 src[2+2*stride]=
00267 src[2+3*stride]=
00268 src[3+3*stride]=l3;
00269 }
00270
00271 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){
00272 pixel *src = (pixel*)_src;
00273 int stride = _stride>>(sizeof(pixel)-1);
00274 const int lt= src[-1-1*stride];
00275 LOAD_TOP_EDGE
00276 LOAD_LEFT_EDGE
00277
00278 src[0+0*stride]=
00279 src[2+1*stride]=(lt + l0 + 1)>>1;
00280 src[1+0*stride]=
00281 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
00282 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
00283 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00284 src[0+1*stride]=
00285 src[2+2*stride]=(l0 + l1 + 1)>>1;
00286 src[1+1*stride]=
00287 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
00288 src[0+2*stride]=
00289 src[2+3*stride]=(l1 + l2+ 1)>>1;
00290 src[1+2*stride]=
00291 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
00292 src[0+3*stride]=(l2 + l3 + 1)>>1;
00293 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
00294 }
00295
00296 static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){
00297 int i;
00298 pixel *src = (pixel*)_src;
00299 int stride = _stride>>(sizeof(pixel)-1);
00300 const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
00301 const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
00302 const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
00303 const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
00304
00305 for(i=0; i<16; i++){
00306 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00307 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
00308 AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
00309 AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
00310 }
00311 }
00312
00313 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){
00314 int i;
00315 pixel *src = (pixel*)_src;
00316 stride >>= sizeof(pixel)-1;
00317
00318 for(i=0; i<16; i++){
00319 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
00320
00321 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00322 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
00323 AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
00324 AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
00325 }
00326 }
00327
00328 #define PREDICT_16x16_DC(v)\
00329 for(i=0; i<16; i++){\
00330 AV_WN4PA(src+ 0, v);\
00331 AV_WN4PA(src+ 4, v);\
00332 AV_WN4PA(src+ 8, v);\
00333 AV_WN4PA(src+12, v);\
00334 src += stride;\
00335 }
00336
00337 static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){
00338 int i, dc=0;
00339 pixel *src = (pixel*)_src;
00340 pixel4 dcsplat;
00341 stride >>= sizeof(pixel)-1;
00342
00343 for(i=0;i<16; i++){
00344 dc+= src[-1+i*stride];
00345 }
00346
00347 for(i=0;i<16; i++){
00348 dc+= src[i-stride];
00349 }
00350
00351 dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
00352 PREDICT_16x16_DC(dcsplat);
00353 }
00354
00355 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){
00356 int i, dc=0;
00357 pixel *src = (pixel*)_src;
00358 pixel4 dcsplat;
00359 stride >>= sizeof(pixel)-1;
00360
00361 for(i=0;i<16; i++){
00362 dc+= src[-1+i*stride];
00363 }
00364
00365 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
00366 PREDICT_16x16_DC(dcsplat);
00367 }
00368
00369 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){
00370 int i, dc=0;
00371 pixel *src = (pixel*)_src;
00372 pixel4 dcsplat;
00373 stride >>= sizeof(pixel)-1;
00374
00375 for(i=0;i<16; i++){
00376 dc+= src[i-stride];
00377 }
00378
00379 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
00380 PREDICT_16x16_DC(dcsplat);
00381 }
00382
00383 #define PRED16x16_X(n, v) \
00384 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\
00385 int i;\
00386 pixel *src = (pixel*)_src;\
00387 stride >>= sizeof(pixel)-1;\
00388 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
00389 }
00390
00391 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
00392 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
00393 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
00394
00395 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *p_src, int p_stride, const int svq3, const int rv40){
00396 int i, j, k;
00397 int a;
00398 INIT_CLIP
00399 pixel *src = (pixel*)p_src;
00400 int stride = p_stride>>(sizeof(pixel)-1);
00401 const pixel * const src0 = src +7-stride;
00402 const pixel * src1 = src +8*stride-1;
00403 const pixel * src2 = src1-2*stride;
00404 int H = src0[1] - src0[-1];
00405 int V = src1[0] - src2[ 0];
00406 for(k=2; k<=8; ++k) {
00407 src1 += stride; src2 -= stride;
00408 H += k*(src0[k] - src0[-k]);
00409 V += k*(src1[0] - src2[ 0]);
00410 }
00411 if(svq3){
00412 H = ( 5*(H/4) ) / 16;
00413 V = ( 5*(V/4) ) / 16;
00414
00415
00416 i = H; H = V; V = i;
00417 }else if(rv40){
00418 H = ( H + (H>>2) ) >> 4;
00419 V = ( V + (V>>2) ) >> 4;
00420 }else{
00421 H = ( 5*H+32 ) >> 6;
00422 V = ( 5*V+32 ) >> 6;
00423 }
00424
00425 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
00426 for(j=16; j>0; --j) {
00427 int b = a;
00428 a += V;
00429 for(i=-16; i<0; i+=4) {
00430 src[16+i] = CLIP((b ) >> 5);
00431 src[17+i] = CLIP((b+ H) >> 5);
00432 src[18+i] = CLIP((b+2*H) >> 5);
00433 src[19+i] = CLIP((b+3*H) >> 5);
00434 b += 4*H;
00435 }
00436 src += stride;
00437 }
00438 }
00439
00440 static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){
00441 FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
00442 }
00443
00444 static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
00445 int i;
00446 pixel *src = (pixel*)_src;
00447 int stride = _stride>>(sizeof(pixel)-1);
00448 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
00449 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
00450
00451 for(i=0; i<8; i++){
00452 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00453 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
00454 }
00455 }
00456
00457 static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){
00458 int i;
00459 pixel *src = (pixel*)_src;
00460 int stride = _stride>>(sizeof(pixel)-1);
00461 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
00462 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
00463
00464 for(i=0; i<16; i++){
00465 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00466 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
00467 }
00468 }
00469
00470 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
00471 int i;
00472 pixel *src = (pixel*)_src;
00473 stride >>= sizeof(pixel)-1;
00474
00475 for(i=0; i<8; i++){
00476 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
00477 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00478 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
00479 }
00480 }
00481
00482 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){
00483 int i;
00484 pixel *src = (pixel*)_src;
00485 stride >>= sizeof(pixel)-1;
00486 for(i=0; i<16; i++){
00487 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
00488 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00489 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
00490 }
00491 }
00492
00493 #define PRED8x8_X(n, v)\
00494 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
00495 int i;\
00496 const pixel4 a = PIXEL_SPLAT_X4(v);\
00497 pixel *src = (pixel*)_src;\
00498 stride >>= sizeof(pixel)-1;\
00499 for(i=0; i<8; i++){\
00500 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
00501 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
00502 }\
00503 }
00504
00505 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
00506 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
00507 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
00508
00509 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){
00510 FUNCC(pred8x8_128_dc)(_src, stride);
00511 FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
00512 }
00513
00514 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
00515 int i;
00516 int dc0, dc2;
00517 pixel4 dc0splat, dc2splat;
00518 pixel *src = (pixel*)_src;
00519 stride >>= sizeof(pixel)-1;
00520
00521 dc0=dc2=0;
00522 for(i=0;i<4; i++){
00523 dc0+= src[-1+i*stride];
00524 dc2+= src[-1+(i+4)*stride];
00525 }
00526 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
00527 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
00528
00529 for(i=0; i<4; i++){
00530 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00531 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
00532 }
00533 for(i=4; i<8; i++){
00534 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
00535 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
00536 }
00537 }
00538
00539 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){
00540 FUNCC(pred8x8_left_dc)(_src, stride);
00541 FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
00542 }
00543
00544 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
00545 int i;
00546 int dc0, dc1;
00547 pixel4 dc0splat, dc1splat;
00548 pixel *src = (pixel*)_src;
00549 stride >>= sizeof(pixel)-1;
00550
00551 dc0=dc1=0;
00552 for(i=0;i<4; i++){
00553 dc0+= src[i-stride];
00554 dc1+= src[4+i-stride];
00555 }
00556 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
00557 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
00558
00559 for(i=0; i<4; i++){
00560 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00561 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00562 }
00563 for(i=4; i<8; i++){
00564 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00565 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00566 }
00567 }
00568
00569 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){
00570 int i;
00571 int dc0, dc1;
00572 pixel4 dc0splat, dc1splat;
00573 pixel *src = (pixel*)_src;
00574 stride >>= sizeof(pixel)-1;
00575
00576 dc0=dc1=0;
00577 for(i=0;i<4; i++){
00578 dc0+= src[i-stride];
00579 dc1+= src[4+i-stride];
00580 }
00581 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
00582 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
00583
00584 for(i=0; i<16; i++){
00585 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00586 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00587 }
00588 }
00589
00590 static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
00591 int i;
00592 int dc0, dc1, dc2;
00593 pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
00594 pixel *src = (pixel*)_src;
00595 stride >>= sizeof(pixel)-1;
00596
00597 dc0=dc1=dc2=0;
00598 for(i=0;i<4; i++){
00599 dc0+= src[-1+i*stride] + src[i-stride];
00600 dc1+= src[4+i-stride];
00601 dc2+= src[-1+(i+4)*stride];
00602 }
00603 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
00604 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
00605 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
00606 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
00607
00608 for(i=0; i<4; i++){
00609 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00610 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00611 }
00612 for(i=4; i<8; i++){
00613 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
00614 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
00615 }
00616 }
00617
00618 static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){
00619 int i;
00620 int dc0, dc1, dc2, dc3, dc4;
00621 pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
00622 pixel *src = (pixel*)_src;
00623 stride >>= sizeof(pixel)-1;
00624
00625 dc0=dc1=dc2=dc3=dc4=0;
00626 for(i=0;i<4; i++){
00627 dc0+= src[-1+i*stride] + src[i-stride];
00628 dc1+= src[4+i-stride];
00629 dc2+= src[-1+(i+4)*stride];
00630 dc3+= src[-1+(i+8)*stride];
00631 dc4+= src[-1+(i+12)*stride];
00632 }
00633 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
00634 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
00635 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
00636 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
00637 dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
00638 dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
00639 dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
00640 dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
00641
00642 for(i=0; i<4; i++){
00643 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00644 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00645 }
00646 for(i=4; i<8; i++){
00647 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
00648 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
00649 }
00650 for(i=8; i<12; i++){
00651 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
00652 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
00653 }
00654 for(i=12; i<16; i++){
00655 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
00656 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
00657 }
00658 }
00659
00660
00661 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
00662 FUNCC(pred8x8_top_dc)(src, stride);
00663 FUNCC(pred4x4_dc)(src, NULL, stride);
00664 }
00665
00666 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){
00667 FUNCC(pred8x16_top_dc)(src, stride);
00668 FUNCC(pred4x4_dc)(src, NULL, stride);
00669 }
00670
00671 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
00672 FUNCC(pred8x8_dc)(src, stride);
00673 FUNCC(pred4x4_top_dc)(src, NULL, stride);
00674 }
00675
00676 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){
00677 FUNCC(pred8x16_dc)(src, stride);
00678 FUNCC(pred4x4_top_dc)(src, NULL, stride);
00679 }
00680
00681 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
00682 FUNCC(pred8x8_left_dc)(src, stride);
00683 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
00684 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
00685 }
00686
00687 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){
00688 FUNCC(pred8x16_left_dc)(src, stride);
00689 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
00690 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
00691 }
00692
00693 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
00694 FUNCC(pred8x8_left_dc)(src, stride);
00695 FUNCC(pred4x4_128_dc)(src , NULL, stride);
00696 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
00697 }
00698
00699 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){
00700 FUNCC(pred8x16_left_dc)(src, stride);
00701 FUNCC(pred4x4_128_dc)(src , NULL, stride);
00702 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
00703 }
00704
00705 static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
00706 int j, k;
00707 int a;
00708 INIT_CLIP
00709 pixel *src = (pixel*)_src;
00710 int stride = _stride>>(sizeof(pixel)-1);
00711 const pixel * const src0 = src +3-stride;
00712 const pixel * src1 = src +4*stride-1;
00713 const pixel * src2 = src1-2*stride;
00714 int H = src0[1] - src0[-1];
00715 int V = src1[0] - src2[ 0];
00716 for(k=2; k<=4; ++k) {
00717 src1 += stride; src2 -= stride;
00718 H += k*(src0[k] - src0[-k]);
00719 V += k*(src1[0] - src2[ 0]);
00720 }
00721 H = ( 17*H+16 ) >> 5;
00722 V = ( 17*V+16 ) >> 5;
00723
00724 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
00725 for(j=8; j>0; --j) {
00726 int b = a;
00727 a += V;
00728 src[0] = CLIP((b ) >> 5);
00729 src[1] = CLIP((b+ H) >> 5);
00730 src[2] = CLIP((b+2*H) >> 5);
00731 src[3] = CLIP((b+3*H) >> 5);
00732 src[4] = CLIP((b+4*H) >> 5);
00733 src[5] = CLIP((b+5*H) >> 5);
00734 src[6] = CLIP((b+6*H) >> 5);
00735 src[7] = CLIP((b+7*H) >> 5);
00736 src += stride;
00737 }
00738 }
00739
00740 static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){
00741 int j, k;
00742 int a;
00743 INIT_CLIP
00744 pixel *src = (pixel*)_src;
00745 int stride = _stride>>(sizeof(pixel)-1);
00746 const pixel * const src0 = src +3-stride;
00747 const pixel * src1 = src +8*stride-1;
00748 const pixel * src2 = src1-2*stride;
00749 int H = src0[1] - src0[-1];
00750 int V = src1[0] - src2[ 0];
00751
00752 for (k = 2; k <= 4; ++k) {
00753 src1 += stride; src2 -= stride;
00754 H += k*(src0[k] - src0[-k]);
00755 V += k*(src1[0] - src2[ 0]);
00756 }
00757 for (; k <= 8; ++k) {
00758 src1 += stride; src2 -= stride;
00759 V += k*(src1[0] - src2[0]);
00760 }
00761
00762 H = (17*H+16) >> 5;
00763 V = (5*V+32) >> 6;
00764
00765 a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
00766 for(j=16; j>0; --j) {
00767 int b = a;
00768 a += V;
00769 src[0] = CLIP((b ) >> 5);
00770 src[1] = CLIP((b+ H) >> 5);
00771 src[2] = CLIP((b+2*H) >> 5);
00772 src[3] = CLIP((b+3*H) >> 5);
00773 src[4] = CLIP((b+4*H) >> 5);
00774 src[5] = CLIP((b+5*H) >> 5);
00775 src[6] = CLIP((b+6*H) >> 5);
00776 src[7] = CLIP((b+7*H) >> 5);
00777 src += stride;
00778 }
00779 }
00780
00781 #define SRC(x,y) src[(x)+(y)*stride]
00782 #define PL(y) \
00783 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
00784 #define PREDICT_8x8_LOAD_LEFT \
00785 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
00786 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
00787 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
00788 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
00789
00790 #define PT(x) \
00791 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
00792 #define PREDICT_8x8_LOAD_TOP \
00793 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
00794 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
00795 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
00796 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
00797 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
00798
00799 #define PTR(x) \
00800 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
00801 #define PREDICT_8x8_LOAD_TOPRIGHT \
00802 int t8, t9, t10, t11, t12, t13, t14, t15; \
00803 if(has_topright) { \
00804 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
00805 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
00806 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
00807
00808 #define PREDICT_8x8_LOAD_TOPLEFT \
00809 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
00810
00811 #define PREDICT_8x8_DC(v) \
00812 int y; \
00813 for( y = 0; y < 8; y++ ) { \
00814 AV_WN4PA(((pixel4*)src)+0, v); \
00815 AV_WN4PA(((pixel4*)src)+1, v); \
00816 src += stride; \
00817 }
00818
00819 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00820 {
00821 pixel *src = (pixel*)_src;
00822 int stride = _stride>>(sizeof(pixel)-1);
00823
00824 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
00825 }
00826 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00827 {
00828 pixel *src = (pixel*)_src;
00829 int stride = _stride>>(sizeof(pixel)-1);
00830
00831 PREDICT_8x8_LOAD_LEFT;
00832 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
00833 PREDICT_8x8_DC(dc);
00834 }
00835 static void FUNCC(pred8x8l_top_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
00836 {
00837 pixel *src = (pixel*)p_src;
00838 int stride = p_stride>>(sizeof(pixel)-1);
00839
00840 PREDICT_8x8_LOAD_TOP;
00841 const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
00842 PREDICT_8x8_DC(dc);
00843 }
00844 static void FUNCC(pred8x8l_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
00845 {
00846 pixel *src = (pixel*)p_src;
00847 int stride = p_stride>>(sizeof(pixel)-1);
00848
00849 PREDICT_8x8_LOAD_LEFT;
00850 PREDICT_8x8_LOAD_TOP;
00851 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
00852 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
00853 PREDICT_8x8_DC(dc);
00854 }
00855 static void FUNCC(pred8x8l_horizontal)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
00856 {
00857 pixel *src = (pixel*)p_src;
00858 int stride = p_stride>>(sizeof(pixel)-1);
00859 pixel4 a;
00860
00861 PREDICT_8x8_LOAD_LEFT;
00862 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
00863 AV_WN4PA(src+y*stride, a); \
00864 AV_WN4PA(src+y*stride+4, a);
00865 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
00866 #undef ROW
00867 }
00868 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00869 {
00870 int y;
00871 pixel *src = (pixel*)_src;
00872 int stride = _stride>>(sizeof(pixel)-1);
00873 pixel4 a, b;
00874
00875 PREDICT_8x8_LOAD_TOP;
00876 src[0] = t0;
00877 src[1] = t1;
00878 src[2] = t2;
00879 src[3] = t3;
00880 src[4] = t4;
00881 src[5] = t5;
00882 src[6] = t6;
00883 src[7] = t7;
00884 a = AV_RN4PA(((pixel4*)src)+0);
00885 b = AV_RN4PA(((pixel4*)src)+1);
00886 for( y = 1; y < 8; y++ ) {
00887 AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
00888 AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
00889 }
00890 }
00891 static void FUNCC(pred8x8l_down_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
00892 {
00893 pixel *src = (pixel*)p_src;
00894 int stride = p_stride>>(sizeof(pixel)-1);
00895 PREDICT_8x8_LOAD_TOP;
00896 PREDICT_8x8_LOAD_TOPRIGHT;
00897 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
00898 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
00899 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
00900 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
00901 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
00902 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
00903 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
00904 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
00905 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
00906 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
00907 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
00908 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
00909 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
00910 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
00911 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
00912 }
00913 static void FUNCC(pred8x8l_down_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
00914 {
00915 pixel *src = (pixel*)p_src;
00916 int stride = p_stride>>(sizeof(pixel)-1);
00917 PREDICT_8x8_LOAD_TOP;
00918 PREDICT_8x8_LOAD_LEFT;
00919 PREDICT_8x8_LOAD_TOPLEFT;
00920 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
00921 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
00922 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
00923 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
00924 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
00925 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
00926 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
00927 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
00928 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
00929 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
00930 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
00931 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
00932 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
00933 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
00934 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
00935 }
00936 static void FUNCC(pred8x8l_vertical_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
00937 {
00938 pixel *src = (pixel*)p_src;
00939 int stride = p_stride>>(sizeof(pixel)-1);
00940 PREDICT_8x8_LOAD_TOP;
00941 PREDICT_8x8_LOAD_LEFT;
00942 PREDICT_8x8_LOAD_TOPLEFT;
00943 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
00944 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
00945 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
00946 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
00947 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
00948 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
00949 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
00950 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
00951 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
00952 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
00953 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
00954 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
00955 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
00956 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
00957 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
00958 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
00959 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
00960 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
00961 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
00962 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
00963 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
00964 SRC(7,0)= (t6 + t7 + 1) >> 1;
00965 }
00966 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
00967 {
00968 pixel *src = (pixel*)p_src;
00969 int stride = p_stride>>(sizeof(pixel)-1);
00970 PREDICT_8x8_LOAD_TOP;
00971 PREDICT_8x8_LOAD_LEFT;
00972 PREDICT_8x8_LOAD_TOPLEFT;
00973 SRC(0,7)= (l6 + l7 + 1) >> 1;
00974 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
00975 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
00976 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
00977 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
00978 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
00979 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
00980 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
00981 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
00982 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
00983 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
00984 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
00985 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
00986 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
00987 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
00988 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
00989 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
00990 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
00991 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
00992 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
00993 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
00994 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
00995 }
00996 static void FUNCC(pred8x8l_vertical_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
00997 {
00998 pixel *src = (pixel*)p_src;
00999 int stride = p_stride>>(sizeof(pixel)-1);
01000 PREDICT_8x8_LOAD_TOP;
01001 PREDICT_8x8_LOAD_TOPRIGHT;
01002 SRC(0,0)= (t0 + t1 + 1) >> 1;
01003 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
01004 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
01005 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
01006 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
01007 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
01008 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
01009 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
01010 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
01011 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
01012 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
01013 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
01014 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
01015 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
01016 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
01017 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
01018 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
01019 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
01020 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
01021 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
01022 SRC(7,6)= (t10 + t11 + 1) >> 1;
01023 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
01024 }
01025 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
01026 {
01027 pixel *src = (pixel*)p_src;
01028 int stride = p_stride>>(sizeof(pixel)-1);
01029 PREDICT_8x8_LOAD_LEFT;
01030 SRC(0,0)= (l0 + l1 + 1) >> 1;
01031 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
01032 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
01033 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
01034 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
01035 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
01036 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
01037 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
01038 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
01039 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
01040 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
01041 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
01042 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
01043 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
01044 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
01045 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
01046 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
01047 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
01048 }
01049 #undef PREDICT_8x8_LOAD_LEFT
01050 #undef PREDICT_8x8_LOAD_TOP
01051 #undef PREDICT_8x8_LOAD_TOPLEFT
01052 #undef PREDICT_8x8_LOAD_TOPRIGHT
01053 #undef PREDICT_8x8_DC
01054 #undef PTR
01055 #undef PT
01056 #undef PL
01057 #undef SRC
01058
01059 static void FUNCC(pred4x4_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
01060 int i;
01061 pixel *pix = (pixel*)p_pix;
01062 const dctcoef *block = (const dctcoef*)p_block;
01063 stride >>= sizeof(pixel)-1;
01064 pix -= stride;
01065 for(i=0; i<4; i++){
01066 pixel v = pix[0];
01067 pix[1*stride]= v += block[0];
01068 pix[2*stride]= v += block[4];
01069 pix[3*stride]= v += block[8];
01070 pix[4*stride]= v + block[12];
01071 pix++;
01072 block++;
01073 }
01074 }
01075
01076 static void FUNCC(pred4x4_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
01077 int i;
01078 pixel *pix = (pixel*)p_pix;
01079 const dctcoef *block = (const dctcoef*)p_block;
01080 stride >>= sizeof(pixel)-1;
01081 for(i=0; i<4; i++){
01082 pixel v = pix[-1];
01083 pix[0]= v += block[0];
01084 pix[1]= v += block[1];
01085 pix[2]= v += block[2];
01086 pix[3]= v + block[3];
01087 pix+= stride;
01088 block+= 4;
01089 }
01090 }
01091
01092 static void FUNCC(pred8x8l_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
01093 int i;
01094 pixel *pix = (pixel*)p_pix;
01095 const dctcoef *block = (const dctcoef*)p_block;
01096 stride >>= sizeof(pixel)-1;
01097 pix -= stride;
01098 for(i=0; i<8; i++){
01099 pixel v = pix[0];
01100 pix[1*stride]= v += block[0];
01101 pix[2*stride]= v += block[8];
01102 pix[3*stride]= v += block[16];
01103 pix[4*stride]= v += block[24];
01104 pix[5*stride]= v += block[32];
01105 pix[6*stride]= v += block[40];
01106 pix[7*stride]= v += block[48];
01107 pix[8*stride]= v + block[56];
01108 pix++;
01109 block++;
01110 }
01111 }
01112
01113 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
01114 int i;
01115 pixel *pix = (pixel*)p_pix;
01116 const dctcoef *block = (const dctcoef*)p_block;
01117 stride >>= sizeof(pixel)-1;
01118 for(i=0; i<8; i++){
01119 pixel v = pix[-1];
01120 pix[0]= v += block[0];
01121 pix[1]= v += block[1];
01122 pix[2]= v += block[2];
01123 pix[3]= v += block[3];
01124 pix[4]= v += block[4];
01125 pix[5]= v += block[5];
01126 pix[6]= v += block[6];
01127 pix[7]= v + block[7];
01128 pix+= stride;
01129 block+= 8;
01130 }
01131 }
01132
01133 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01134 int i;
01135 for(i=0; i<16; i++)
01136 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01137 }
01138
01139 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01140 int i;
01141 for(i=0; i<16; i++)
01142 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01143 }
01144
01145 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01146 int i;
01147 for(i=0; i<4; i++)
01148 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01149 }
01150
01151 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01152 int i;
01153 for(i=0; i<4; i++)
01154 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01155 for(i=4; i<8; i++)
01156 FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
01157 }
01158
01159 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01160 int i;
01161 for(i=0; i<4; i++)
01162 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01163 }
01164
01165 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01166 int i;
01167 for(i=0; i<4; i++)
01168 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01169 for(i=4; i<8; i++)
01170 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
01171 }