45 #define pixeltmp int16_t
55 #define pixeltmp int32_t
65 #define pixeltmp int16_t
71 #define pb_7f (~0UL/255 * 0x7f)
72 #define pb_80 (~0UL/255 * 0x80)
75 0, 1, 8, 16, 9, 2, 3, 10,
76 17, 24, 32, 25, 18, 11, 4, 5,
77 12, 19, 26, 33, 40, 48, 41, 34,
78 27, 20, 13, 6, 7, 14, 21, 28,
79 35, 42, 49, 56, 57, 50, 43, 36,
80 29, 22, 15, 23, 30, 37, 44, 51,
81 58, 59, 52, 45, 38, 31, 39, 46,
82 53, 60, 61, 54, 47, 55, 62, 63
88 0, 8, 1, 9, 16, 24, 2, 10,
89 17, 25, 32, 40, 48, 56, 33, 41,
90 18, 26, 3, 11, 4, 12, 19, 27,
91 34, 42, 49, 57, 50, 58, 35, 43,
92 20, 28, 5, 13, 6, 14, 21, 29,
93 36, 44, 51, 59, 52, 60, 37, 45,
94 22, 30, 7, 15, 23, 31, 38, 46,
95 53, 61, 54, 62, 39, 47, 55, 63,
102 0, 1, 2, 3, 8, 9, 16, 17,
103 10, 11, 4, 5, 6, 7, 15, 14,
104 13, 12, 19, 18, 24, 25, 32, 33,
105 26, 27, 20, 21, 22, 23, 28, 29,
106 30, 31, 34, 35, 40, 41, 48, 49,
107 42, 43, 36, 37, 38, 39, 44, 45,
108 46, 47, 50, 51, 56, 57, 58, 59,
109 52, 53, 54, 55, 60, 61, 62, 63,
113 0, 8, 16, 24, 1, 9, 2, 10,
114 17, 25, 32, 40, 48, 56, 57, 49,
115 41, 33, 26, 18, 3, 11, 4, 12,
116 19, 27, 34, 42, 50, 58, 35, 43,
117 51, 59, 20, 28, 5, 13, 6, 14,
118 21, 29, 36, 44, 52, 60, 37, 45,
119 53, 61, 22, 30, 7, 15, 23, 31,
120 38, 46, 54, 62, 39, 47, 55, 63,
125 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
126 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
127 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
128 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
129 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
130 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
131 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
132 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
145 j = src_scantable[i];
159 int idct_permutation_type)
163 switch(idct_permutation_type){
166 idct_permutation[i]= i;
170 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
178 idct_permutation[i]= ((i&7)<<3) | (i>>3);
182 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
198 for (i = 0; i < 16; i++) {
199 for (j = 0; j < 16; j += 8) {
210 pix += line_size - 16;
221 for (i = 0; i < 16; i++) {
222 for (j = 0; j < 16; j += 8) {
234 register uint64_t x=*(uint64_t*)pix;
236 s += sq[(x>>8)&0xff];
237 s += sq[(x>>16)&0xff];
238 s += sq[(x>>24)&0xff];
239 s += sq[(x>>32)&0xff];
240 s += sq[(x>>40)&0xff];
241 s += sq[(x>>48)&0xff];
242 s += sq[(x>>56)&0xff];
244 register uint32_t x=*(uint32_t*)pix;
246 s += sq[(x>>8)&0xff];
247 s += sq[(x>>16)&0xff];
248 s += sq[(x>>24)&0xff];
249 x=*(uint32_t*)(pix+4);
251 s += sq[(x>>8)&0xff];
252 s += sq[(x>>16)&0xff];
253 s += sq[(x>>24)&0xff];
258 pix += line_size - 16;
266 for(i=0; i+8<=w; i+=8){
293 for (i = 0; i < h; i++) {
294 s += sq[pix1[0] - pix2[0]];
295 s += sq[pix1[1] - pix2[1]];
296 s += sq[pix1[2] - pix2[2]];
297 s += sq[pix1[3] - pix2[3]];
310 for (i = 0; i < h; i++) {
311 s += sq[pix1[0] - pix2[0]];
312 s += sq[pix1[1] - pix2[1]];
313 s += sq[pix1[2] - pix2[2]];
314 s += sq[pix1[3] - pix2[3]];
315 s += sq[pix1[4] - pix2[4]];
316 s += sq[pix1[5] - pix2[5]];
317 s += sq[pix1[6] - pix2[6]];
318 s += sq[pix1[7] - pix2[7]];
331 for (i = 0; i < h; i++) {
332 s += sq[pix1[ 0] - pix2[ 0]];
333 s += sq[pix1[ 1] - pix2[ 1]];
334 s += sq[pix1[ 2] - pix2[ 2]];
335 s += sq[pix1[ 3] - pix2[ 3]];
336 s += sq[pix1[ 4] - pix2[ 4]];
337 s += sq[pix1[ 5] - pix2[ 5]];
338 s += sq[pix1[ 6] - pix2[ 6]];
339 s += sq[pix1[ 7] - pix2[ 7]];
340 s += sq[pix1[ 8] - pix2[ 8]];
341 s += sq[pix1[ 9] - pix2[ 9]];
342 s += sq[pix1[10] - pix2[10]];
343 s += sq[pix1[11] - pix2[11]];
344 s += sq[pix1[12] - pix2[12]];
345 s += sq[pix1[13] - pix2[13]];
346 s += sq[pix1[14] - pix2[14]];
347 s += sq[pix1[15] - pix2[15]];
361 block[0] = s1[0] - s2[0];
362 block[1] = s1[1] - s2[1];
363 block[2] = s1[2] - s2[2];
364 block[3] = s1[3] - s2[3];
365 block[4] = s1[4] - s2[4];
366 block[5] = s1[5] - s2[5];
367 block[6] = s1[6] - s2[6];
368 block[7] = s1[7] - s2[7];
383 pixels[0] = av_clip_uint8(block[0]);
384 pixels[1] = av_clip_uint8(block[1]);
385 pixels[2] = av_clip_uint8(block[2]);
386 pixels[3] = av_clip_uint8(block[3]);
387 pixels[4] = av_clip_uint8(block[4]);
388 pixels[5] = av_clip_uint8(block[5]);
389 pixels[6] = av_clip_uint8(block[6]);
390 pixels[7] = av_clip_uint8(block[7]);
404 pixels[0] = av_clip_uint8(block[0]);
405 pixels[1] = av_clip_uint8(block[1]);
406 pixels[2] = av_clip_uint8(block[2]);
407 pixels[3] = av_clip_uint8(block[3]);
421 pixels[0] = av_clip_uint8(block[0]);
422 pixels[1] = av_clip_uint8(block[1]);
435 for (i = 0; i < 8; i++) {
436 for (j = 0; j < 8; j++) {
439 else if (*block > 127)
442 *pixels = (
uint8_t)(*block + 128);
446 pixels += (line_size - 8);
457 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
458 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
459 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
460 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
461 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
462 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
463 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
464 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
477 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
478 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
479 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
480 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
493 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
494 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
504 sum+=
FFABS(block[i]);
512 for (i = 0; i < h; i++) {
513 memset(block, value, 16);
522 for (i = 0; i < h; i++) {
523 memset(block, value, 8);
528 #define avg2(a,b) ((a+b+1)>>1)
529 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
533 const int A=(16-x16)*(16-y16);
534 const int B=( x16)*(16-y16);
535 const int C=(16-x16)*( y16);
536 const int D=( x16)*( y16);
541 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] +
rounder)>>8;
542 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] +
rounder)>>8;
543 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] +
rounder)>>8;
544 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] +
rounder)>>8;
545 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] +
rounder)>>8;
546 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] +
rounder)>>8;
547 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] +
rounder)>>8;
548 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] +
rounder)>>8;
558 const int s= 1<<
shift;
569 int src_x, src_y, frac_x, frac_y,
index;
578 if((
unsigned)src_x <
width){
579 if((
unsigned)src_y <
height){
580 index= src_x + src_y*
stride;
581 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
582 + src[index +1]* frac_x )*(s-frac_y)
583 + ( src[index+stride ]*(s-frac_x)
584 + src[index+stride+1]* frac_x )* frac_y
587 index= src_x + av_clip(src_y, 0, height)*
stride;
588 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
589 + src[index +1]* frac_x )*s
593 if((
unsigned)src_y <
height){
594 index= av_clip(src_x, 0, width) + src_y*
stride;
595 dst[y*stride + x]= ( ( src[
index ]*(s-frac_y)
596 + src[index+stride ]* frac_y )*s
599 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*
stride;
600 dst[y*stride + x]= src[
index ];
614 case 2: put_pixels2_8_c (dst, src, stride, height);
break;
615 case 4: put_pixels4_8_c (dst, src, stride, height);
break;
616 case 8: put_pixels8_8_c (dst, src, stride, height);
break;
617 case 16:put_pixels16_8_c(dst, src, stride, height);
break;
623 for (i=0; i <
height; i++) {
624 for (j=0; j <
width; j++) {
625 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
634 for (i=0; i <
height; i++) {
635 for (j=0; j <
width; j++) {
636 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
645 for (i=0; i <
height; i++) {
646 for (j=0; j <
width; j++) {
647 dst[j] = (683*(2*src[j] + src[j+
stride] + 1)) >> 11;
656 for (i=0; i <
height; i++) {
657 for (j=0; j <
width; j++) {
658 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15;
667 for (i=0; i <
height; i++) {
668 for (j=0; j <
width; j++) {
669 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
678 for (i=0; i <
height; i++) {
679 for (j=0; j <
width; j++) {
680 dst[j] = (683*(src[j] + 2*src[j+
stride] + 1)) >> 11;
689 for (i=0; i <
height; i++) {
690 for (j=0; j <
width; j++) {
691 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
700 for (i=0; i <
height; i++) {
701 for (j=0; j <
width; j++) {
702 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15;
711 case 2: avg_pixels2_8_c (dst, src, stride, height);
break;
712 case 4: avg_pixels4_8_c (dst, src, stride, height);
break;
713 case 8: avg_pixels8_8_c (dst, src, stride, height);
break;
714 case 16:avg_pixels16_8_c(dst, src, stride, height);
break;
720 for (i=0; i <
height; i++) {
721 for (j=0; j <
width; j++) {
722 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
731 for (i=0; i <
height; i++) {
732 for (j=0; j <
width; j++) {
733 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
742 for (i=0; i <
height; i++) {
743 for (j=0; j <
width; j++) {
744 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+
stride] + 1)) >> 11) + 1) >> 1;
753 for (i=0; i <
height; i++) {
754 for (j=0; j <
width; j++) {
755 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
764 for (i=0; i <
height; i++) {
765 for (j=0; j <
width; j++) {
766 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
775 for (i=0; i <
height; i++) {
776 for (j=0; j <
width; j++) {
777 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+
stride] + 1)) >> 11) + 1) >> 1;
786 for (i=0; i <
height; i++) {
787 for (j=0; j <
width; j++) {
788 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
797 for (i=0; i <
height; i++) {
798 for (j=0; j <
width; j++) {
799 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
806 #define QPEL_MC(r, OPNAME, RND, OP) \
807 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
808 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
812 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
813 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
814 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
815 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
816 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
817 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
818 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
819 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
825 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
827 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
831 const int src0= src[0*srcStride];\
832 const int src1= src[1*srcStride];\
833 const int src2= src[2*srcStride];\
834 const int src3= src[3*srcStride];\
835 const int src4= src[4*srcStride];\
836 const int src5= src[5*srcStride];\
837 const int src6= src[6*srcStride];\
838 const int src7= src[7*srcStride];\
839 const int src8= src[8*srcStride];\
840 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
841 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
842 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
843 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
844 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
845 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
846 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
847 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
853 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
854 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
859 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
860 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
861 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
862 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
863 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
864 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
865 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
866 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
867 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
868 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
869 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
870 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
871 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
872 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
873 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
874 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
880 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
881 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
886 const int src0= src[0*srcStride];\
887 const int src1= src[1*srcStride];\
888 const int src2= src[2*srcStride];\
889 const int src3= src[3*srcStride];\
890 const int src4= src[4*srcStride];\
891 const int src5= src[5*srcStride];\
892 const int src6= src[6*srcStride];\
893 const int src7= src[7*srcStride];\
894 const int src8= src[8*srcStride];\
895 const int src9= src[9*srcStride];\
896 const int src10= src[10*srcStride];\
897 const int src11= src[11*srcStride];\
898 const int src12= src[12*srcStride];\
899 const int src13= src[13*srcStride];\
900 const int src14= src[14*srcStride];\
901 const int src15= src[15*srcStride];\
902 const int src16= src[16*srcStride];\
903 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
904 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
905 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
906 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
907 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
908 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
909 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
910 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
911 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
912 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
913 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
914 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
915 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
916 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
917 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
918 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
924 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
926 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
927 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
930 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
931 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
934 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
936 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
937 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
940 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
943 copy_block9(full, src, 16, stride, 9);\
944 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
945 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
948 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
950 copy_block9(full, src, 16, stride, 9);\
951 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
954 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
957 copy_block9(full, src, 16, stride, 9);\
958 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
959 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
961 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
966 copy_block9(full, src, 16, stride, 9);\
967 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
968 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
969 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
970 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
972 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
976 copy_block9(full, src, 16, stride, 9);\
977 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
978 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
980 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
982 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
987 copy_block9(full, src, 16, stride, 9);\
988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
989 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
991 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
993 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
997 copy_block9(full, src, 16, stride, 9);\
998 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
999 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1000 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1001 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1003 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1004 uint8_t full[16*9];\
1007 uint8_t halfHV[64];\
1008 copy_block9(full, src, 16, stride, 9);\
1009 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1010 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1011 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1012 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1014 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1015 uint8_t full[16*9];\
1017 uint8_t halfHV[64];\
1018 copy_block9(full, src, 16, stride, 9);\
1019 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1020 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1021 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1022 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1024 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1025 uint8_t full[16*9];\
1028 uint8_t halfHV[64];\
1029 copy_block9(full, src, 16, stride, 9);\
1030 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1031 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1032 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1033 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1035 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1036 uint8_t full[16*9];\
1038 uint8_t halfHV[64];\
1039 copy_block9(full, src, 16, stride, 9);\
1040 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1041 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1042 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1043 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1045 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1047 uint8_t halfHV[64];\
1048 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1049 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1050 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1052 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1054 uint8_t halfHV[64];\
1055 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1056 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1057 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1059 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1060 uint8_t full[16*9];\
1063 uint8_t halfHV[64];\
1064 copy_block9(full, src, 16, stride, 9);\
1065 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1066 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1067 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1068 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1070 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1071 uint8_t full[16*9];\
1073 copy_block9(full, src, 16, stride, 9);\
1074 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1075 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1076 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1078 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1079 uint8_t full[16*9];\
1082 uint8_t halfHV[64];\
1083 copy_block9(full, src, 16, stride, 9);\
1084 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1085 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1086 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1087 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1089 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1090 uint8_t full[16*9];\
1092 copy_block9(full, src, 16, stride, 9);\
1093 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1094 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1095 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1097 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1099 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1100 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1103 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1105 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1106 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1109 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1110 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1113 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1115 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1116 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1119 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1120 uint8_t full[24*17];\
1122 copy_block17(full, src, 24, stride, 17);\
1123 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1124 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1127 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1128 uint8_t full[24*17];\
1129 copy_block17(full, src, 24, stride, 17);\
1130 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1133 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1134 uint8_t full[24*17];\
1136 copy_block17(full, src, 24, stride, 17);\
1137 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1138 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1140 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1141 uint8_t full[24*17];\
1142 uint8_t halfH[272];\
1143 uint8_t halfV[256];\
1144 uint8_t halfHV[256];\
1145 copy_block17(full, src, 24, stride, 17);\
1146 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1147 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1148 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1149 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1151 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1152 uint8_t full[24*17];\
1153 uint8_t halfH[272];\
1154 uint8_t halfHV[256];\
1155 copy_block17(full, src, 24, stride, 17);\
1156 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1157 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1158 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1159 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1161 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1162 uint8_t full[24*17];\
1163 uint8_t halfH[272];\
1164 uint8_t halfV[256];\
1165 uint8_t halfHV[256];\
1166 copy_block17(full, src, 24, stride, 17);\
1167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1168 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1170 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1172 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1173 uint8_t full[24*17];\
1174 uint8_t halfH[272];\
1175 uint8_t halfHV[256];\
1176 copy_block17(full, src, 24, stride, 17);\
1177 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1178 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1179 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1180 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1182 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1183 uint8_t full[24*17];\
1184 uint8_t halfH[272];\
1185 uint8_t halfV[256];\
1186 uint8_t halfHV[256];\
1187 copy_block17(full, src, 24, stride, 17);\
1188 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1189 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1190 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1191 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1193 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1194 uint8_t full[24*17];\
1195 uint8_t halfH[272];\
1196 uint8_t halfHV[256];\
1197 copy_block17(full, src, 24, stride, 17);\
1198 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1199 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1200 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1201 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1203 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1204 uint8_t full[24*17];\
1205 uint8_t halfH[272];\
1206 uint8_t halfV[256];\
1207 uint8_t halfHV[256];\
1208 copy_block17(full, src, 24, stride, 17);\
1209 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1210 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1211 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1212 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1214 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1215 uint8_t full[24*17];\
1216 uint8_t halfH[272];\
1217 uint8_t halfHV[256];\
1218 copy_block17(full, src, 24, stride, 17);\
1219 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1220 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1221 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1222 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1224 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1225 uint8_t halfH[272];\
1226 uint8_t halfHV[256];\
1227 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1228 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1229 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1231 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1232 uint8_t halfH[272];\
1233 uint8_t halfHV[256];\
1234 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1235 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1236 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1238 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1239 uint8_t full[24*17];\
1240 uint8_t halfH[272];\
1241 uint8_t halfV[256];\
1242 uint8_t halfHV[256];\
1243 copy_block17(full, src, 24, stride, 17);\
1244 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1245 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1246 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1247 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1249 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1250 uint8_t full[24*17];\
1251 uint8_t halfH[272];\
1252 copy_block17(full, src, 24, stride, 17);\
1253 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1254 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1255 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1257 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1258 uint8_t full[24*17];\
1259 uint8_t halfH[272];\
1260 uint8_t halfV[256];\
1261 uint8_t halfHV[256];\
1262 copy_block17(full, src, 24, stride, 17);\
1263 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1264 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1265 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1266 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1268 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1269 uint8_t full[24*17];\
1270 uint8_t halfH[272];\
1271 copy_block17(full, src, 24, stride, 17);\
1272 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1273 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1274 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1276 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1277 uint8_t halfH[272];\
1278 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1279 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1282 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1283 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1284 #define op_put(a, b) a = cm[((b) + 16)>>5]
1285 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1289 QPEL_MC(0, avg_ , _ ,
op_avg)
1292 #undef op_avg_no_rnd
1294 #undef op_put_no_rnd
1296 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1297 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1298 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1299 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1300 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1301 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1308 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1309 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1310 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1311 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1312 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1313 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1314 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1315 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1321 #if CONFIG_RV40_DECODER
1323 put_pixels16_xy2_8_c(dst, src, stride, 16);
1326 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1329 put_pixels8_xy2_8_c(dst, src, stride, 8);
1332 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1336 #if CONFIG_DIRAC_DECODER
1337 #define DIRAC_MC(OPNAME)\
1338 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1340 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
1342 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1344 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
1346 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1348 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
1349 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
1351 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1353 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1355 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1357 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1359 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1361 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
1362 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
1364 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1366 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1368 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1370 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1372 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1374 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
1375 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
1386 const int src_1= src[ -srcStride];
1387 const int src0 = src[0 ];
1388 const int src1 = src[ srcStride];
1389 const int src2 = src[2*srcStride];
1390 const int src3 = src[3*srcStride];
1391 const int src4 = src[4*srcStride];
1392 const int src5 = src[5*srcStride];
1393 const int src6 = src[6*srcStride];
1394 const int src7 = src[7*srcStride];
1395 const int src8 = src[8*srcStride];
1396 const int src9 = src[9*srcStride];
1397 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1398 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1399 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1400 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1401 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1402 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1403 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1404 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1413 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1423 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1437 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1446 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1455 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1465 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1467 if (d<-2*strength) d1= 0;
1468 else if(d<- strength) d1=-2*strength - d;
1469 else if(d< strength) d1= d;
1470 else if(d< 2*strength) d1= 2*strength - d;
1475 if(p1&256) p1= ~(p1>>31);
1476 if(p2&256) p2= ~(p2>>31);
1483 d2= av_clip((p0-p3)/4, -ad1, ad1);
1485 src[x-2*
stride] = p0 - d2;
1486 src[x+
stride] = p3 + d2;
1492 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1498 int p0= src[y*stride-2];
1499 int p1= src[y*stride-1];
1500 int p2= src[y*stride+0];
1501 int p3= src[y*stride+1];
1502 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1504 if (d<-2*strength) d1= 0;
1505 else if(d<- strength) d1=-2*strength - d;
1506 else if(d< strength) d1= d;
1507 else if(d< 2*strength) d1= 2*strength - d;
1512 if(p1&256) p1= ~(p1>>31);
1513 if(p2&256) p2= ~(p2>>31);
1515 src[y*stride-1] = p1;
1516 src[y*stride+0] = p2;
1520 d2= av_clip((p0-p3)/4, -ad1, ad1);
1522 src[y*stride-2] = p0 - d2;
1523 src[y*stride+1] = p3 + d2;
1533 temp[x ] = 4*src[x ];
1534 temp[x + 7*8] = 4*src[x + 7*
stride];
1538 xy = y * stride + x;
1540 temp[yz] = src[xy -
stride] + 2*src[xy] + src[xy +
stride];
1545 src[ y*
stride] = (temp[ y*8] + 2)>>2;
1546 src[7+y*
stride] = (temp[7+y*8] + 2)>>2;
1548 xy = y * stride + x;
1550 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1561 s += abs(pix1[0] - pix2[0]);
1562 s += abs(pix1[1] - pix2[1]);
1563 s += abs(pix1[2] - pix2[2]);
1564 s += abs(pix1[3] - pix2[3]);
1565 s += abs(pix1[4] - pix2[4]);
1566 s += abs(pix1[5] - pix2[5]);
1567 s += abs(pix1[6] - pix2[6]);
1568 s += abs(pix1[7] - pix2[7]);
1569 s += abs(pix1[8] - pix2[8]);
1570 s += abs(pix1[9] - pix2[9]);
1571 s += abs(pix1[10] - pix2[10]);
1572 s += abs(pix1[11] - pix2[11]);
1573 s += abs(pix1[12] - pix2[12]);
1574 s += abs(pix1[13] - pix2[13]);
1575 s += abs(pix1[14] - pix2[14]);
1576 s += abs(pix1[15] - pix2[15]);
1589 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1590 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1591 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1592 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1593 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1594 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1595 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1596 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1597 s += abs(pix1[8] -
avg2(pix2[8], pix2[9]));
1598 s += abs(pix1[9] -
avg2(pix2[9], pix2[10]));
1599 s += abs(pix1[10] -
avg2(pix2[10], pix2[11]));
1600 s += abs(pix1[11] -
avg2(pix2[11], pix2[12]));
1601 s += abs(pix1[12] -
avg2(pix2[12], pix2[13]));
1602 s += abs(pix1[13] -
avg2(pix2[13], pix2[14]));
1603 s += abs(pix1[14] -
avg2(pix2[14], pix2[15]));
1604 s += abs(pix1[15] -
avg2(pix2[15], pix2[16]));
1614 uint8_t *pix3 = pix2 + line_size;
1618 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1619 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1620 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1621 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1622 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1623 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1624 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1625 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1626 s += abs(pix1[8] -
avg2(pix2[8], pix3[8]));
1627 s += abs(pix1[9] -
avg2(pix2[9], pix3[9]));
1628 s += abs(pix1[10] -
avg2(pix2[10], pix3[10]));
1629 s += abs(pix1[11] -
avg2(pix2[11], pix3[11]));
1630 s += abs(pix1[12] -
avg2(pix2[12], pix3[12]));
1631 s += abs(pix1[13] -
avg2(pix2[13], pix3[13]));
1632 s += abs(pix1[14] -
avg2(pix2[14], pix3[14]));
1633 s += abs(pix1[15] -
avg2(pix2[15], pix3[15]));
1644 uint8_t *pix3 = pix2 + line_size;
1648 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1649 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1650 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1651 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1652 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1653 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1654 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1655 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1656 s += abs(pix1[8] -
avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1657 s += abs(pix1[9] -
avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1658 s += abs(pix1[10] -
avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1659 s += abs(pix1[11] -
avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1660 s += abs(pix1[12] -
avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1661 s += abs(pix1[13] -
avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1662 s += abs(pix1[14] -
avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1663 s += abs(pix1[15] -
avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1677 s += abs(pix1[0] - pix2[0]);
1678 s += abs(pix1[1] - pix2[1]);
1679 s += abs(pix1[2] - pix2[2]);
1680 s += abs(pix1[3] - pix2[3]);
1681 s += abs(pix1[4] - pix2[4]);
1682 s += abs(pix1[5] - pix2[5]);
1683 s += abs(pix1[6] - pix2[6]);
1684 s += abs(pix1[7] - pix2[7]);
1697 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1698 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1699 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1700 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1701 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1702 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1703 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1704 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1714 uint8_t *pix3 = pix2 + line_size;
1718 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1719 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1720 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1721 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1722 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1723 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1724 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1725 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1736 uint8_t *pix3 = pix2 + line_size;
1740 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1741 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1742 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1743 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1744 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1745 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1746 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1747 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1762 for(x=0; x<16; x++){
1763 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1766 for(x=0; x<15; x++){
1767 score2+=
FFABS( s1[x ] - s1[x +stride]
1768 - s1[x+1] + s1[x+1+stride])
1769 -
FFABS( s2[x ] - s2[x +stride]
1770 - s2[x+1] + s2[x+1+stride]);
1778 else return score1 +
FFABS(score2)*8;
1789 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1793 score2+=
FFABS( s1[x ] - s1[x +stride]
1794 - s1[x+1] + s1[x+1+stride])
1795 -
FFABS( s2[x ] - s2[x +stride]
1796 - s2[x+1] + s2[x+1+stride]);
1804 else return score1 +
FFABS(score2)*8;
1811 for(i=0; i<8*8; i++){
1817 sum += (w*
b)*(w*b)>>4;
1825 for(i=0; i<8*8; i++){
1846 for(i=0; i<=last; i++){
1847 const int j= scantable[i];
1852 for(i=0; i<=last; i++){
1853 const int j= scantable[i];
1854 const int perm_j= permutation[j];
1855 block[perm_j]= temp[j];
1866 memset(cmp, 0,
sizeof(
void*)*6);
1925 for(i=0; i<=w-
sizeof(long); i+=
sizeof(long)){
1926 long a = *(
long*)(src+i);
1927 long b = *(
long*)(dst+i);
1931 dst[i+0] += src[i+0];
1936 #if !HAVE_FAST_UNALIGNED
1937 if((
long)src2 & (
sizeof(
long)-1)){
1938 for(i=0; i+7<w; i+=8){
1939 dst[i+0] = src1[i+0]-src2[i+0];
1940 dst[i+1] = src1[i+1]-src2[i+1];
1941 dst[i+2] = src1[i+2]-src2[i+2];
1942 dst[i+3] = src1[i+3]-src2[i+3];
1943 dst[i+4] = src1[i+4]-src2[i+4];
1944 dst[i+5] = src1[i+5]-src2[i+5];
1945 dst[i+6] = src1[i+6]-src2[i+6];
1946 dst[i+7] = src1[i+7]-src2[i+7];
1950 for(i=0; i<=w-
sizeof(long); i+=
sizeof(long)){
1951 long a = *(
long*)(src1+i);
1952 long b = *(
long*)(src2+i);
1956 dst[i+0] = src1[i+0]-src2[i+0];
1967 l=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1984 const int pred=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1997 for(i=0; i<w-1; i++){
2054 #define BUTTERFLY2(o1,o2,i1,i2) \
2058 #define BUTTERFLY1(x,y) \
2067 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2078 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2079 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2080 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2081 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2123 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2124 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2125 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2126 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2157 sum -=
FFABS(temp[8*0] + temp[8*4]);
2175 const int s07 = SRC(0) + SRC(7);\
2176 const int s16 = SRC(1) + SRC(6);\
2177 const int s25 = SRC(2) + SRC(5);\
2178 const int s34 = SRC(3) + SRC(4);\
2179 const int a0 = s07 + s34;\
2180 const int a1 = s16 + s25;\
2181 const int a2 = s07 - s34;\
2182 const int a3 = s16 - s25;\
2183 const int d07 = SRC(0) - SRC(7);\
2184 const int d16 = SRC(1) - SRC(6);\
2185 const int d25 = SRC(2) - SRC(5);\
2186 const int d34 = SRC(3) - SRC(4);\
2187 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2188 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2189 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2190 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2192 DST(1, a4 + (a7>>2)) ;\
2193 DST(2, a2 + (a3>>1)) ;\
2194 DST(3, a5 + (a6>>2)) ;\
2196 DST(5, a6 - (a5>>2)) ;\
2197 DST(6, (a2>>1) - a3 ) ;\
2198 DST(7, (a4>>2) - a7 ) ;\
2209 #define SRC(x) dct[i][x]
2210 #define DST(x,v) dct[i][x]= v
2211 for( i = 0; i < 8; i++ )
2216 #define
SRC(x) dct[x][i]
2217 #define DST(x,v) sum += FFABS(v)
2218 for( i = 0; i < 8; i++ )
2260 sum+= (
temp[i]-bak[i])*(
temp[i]-bak[i]);
2300 for(i=start_i; i<last; i++){
2301 int j= scantable[i];
2306 if((level&(~127)) == 0){
2316 level=
temp[i] + 64;
2320 if((level&(~127)) == 0){
2336 distortion= s->
dsp.
sse[1](
NULL, lsrc2, lsrc1, 8, 8);
2338 return distortion + ((bits*s->
qscale*s->
qscale*109 + 64)>>7);
2371 for(i=start_i; i<last; i++){
2372 int j= scantable[i];
2377 if((level&(~127)) == 0){
2387 level=
temp[i] + 64;
2391 if((level&(~127)) == 0){
2400 #define VSAD_INTRA(size) \
2401 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2405 for(y=1; y<h; y++){ \
2406 for(x=0; x<size; x+=4){ \
2407 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2408 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2423 for(x=0; x<16; x++){
2424 score+=
FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2433 #define SQ(a) ((a)*(a))
2434 #define VSSE_INTRA(size) \
2435 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2439 for(y=1; y<h; y++){ \
2440 for(x=0; x<size; x+=4){ \
2441 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2442 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2457 for(x=0; x<16; x++){
2458 score+=
SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2471 for(i=0; i<
size; i++)
2472 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2490 for(i=0; i<
len; i++)
2491 dst[i] = src0[i] * src1[-i];
2496 for(i=0; i<
len; i++)
2497 dst[i] = src0[i] * src1[i] + src2[i];
2501 const float *src1,
const float *win,
int len)
2507 for(i=-len, j=len-1; i<0; i++, j--) {
2512 dst[i] = s0*wj - s1*wi;
2513 dst[j] = s0*wi + s1*wj;
2521 for (i = 0; i <
len; i++) {
2522 float t = v1[i] - v2[i];
2529 const float *src1,
int len)
2532 for (i = 0; i <
len; i++) {
2535 dst[2*i ] = f1 + f2;
2536 dst[2*i + 1] = f1 - f2;
2545 for (i = 0; i <
len; i++)
2552 uint32_t maxi, uint32_t maxisign)
2555 if(a > mini)
return mini;
2556 else if((a^(1
U<<31)) > maxisign)
return maxi;
2562 uint32_t mini = *(uint32_t*)min;
2563 uint32_t maxi = *(uint32_t*)max;
2564 uint32_t maxisign = maxi ^ (1
U<<31);
2565 uint32_t *dsti = (uint32_t*)dst;
2566 const uint32_t *srci = (
const uint32_t*)src;
2567 for(i=0; i<
len; i+=8) {
2568 dsti[i + 0] =
clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2569 dsti[i + 1] =
clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2570 dsti[i + 2] =
clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2571 dsti[i + 3] =
clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2572 dsti[i + 4] =
clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2573 dsti[i + 5] =
clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2574 dsti[i + 6] =
clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2575 dsti[i + 7] =
clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2580 if(min < 0 && max > 0) {
2583 for(i=0; i <
len; i+=8) {
2584 dst[i ] = av_clipf(src[i ], min, max);
2585 dst[i + 1] = av_clipf(src[i + 1], min, max);
2586 dst[i + 2] = av_clipf(src[i + 2], min, max);
2587 dst[i + 3] = av_clipf(src[i + 3], min, max);
2588 dst[i + 4] = av_clipf(src[i + 4], min, max);
2589 dst[i + 5] = av_clipf(src[i + 5], min, max);
2590 dst[i + 6] = av_clipf(src[i + 6], min, max);
2591 dst[i + 7] = av_clipf(src[i + 7], min, max);
2601 res += *v1++ * *v2++;
2611 *v1++ += mul * *v3++;
2617 const int16_t *window,
unsigned int len)
2620 int len2 = len >> 1;
2622 for (i = 0; i < len2; i++) {
2623 int16_t w = window[i];
2624 output[i] = (
MUL16(input[i], w) + (1 << 14)) >> 15;
2625 output[len-i-1] = (
MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2633 *dst++ = av_clip(*src++, min, max);
2634 *dst++ = av_clip(*src++, min, max);
2635 *dst++ = av_clip(*src++, min, max);
2636 *dst++ = av_clip(*src++, min, max);
2637 *dst++ = av_clip(*src++, min, max);
2638 *dst++ = av_clip(*src++, min, max);
2639 *dst++ = av_clip(*src++, min, max);
2640 *dst++ = av_clip(*src++, min, max);
2659 a1 =
W1*b[1]+
W7*b[7];
2660 a7 =
W7*b[1]-
W1*b[7];
2661 a5 =
W5*b[5]+
W3*b[3];
2662 a3 =
W3*b[5]-
W5*b[3];
2663 a2 =
W2*b[2]+
W6*b[6];
2664 a6 =
W6*b[2]-
W2*b[6];
2665 a0 =
W0*b[0]+
W0*b[4];
2666 a4 =
W0*b[0]-
W0*b[4];
2668 s1 = (181*(a1-a5+a7-
a3)+128)>>8;
2669 s2 = (181*(a1-a5-a7+
a3)+128)>>8;
2671 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2672 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2673 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2674 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2675 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2676 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2677 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2678 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2685 a1 = (
W1*b[8*1]+
W7*b[8*7] + 4)>>3;
2686 a7 = (
W7*b[8*1]-
W1*b[8*7] + 4)>>3;
2687 a5 = (
W5*b[8*5]+
W3*b[8*3] + 4)>>3;
2688 a3 = (
W3*b[8*5]-
W5*b[8*3] + 4)>>3;
2689 a2 = (
W2*b[8*2]+
W6*b[8*6] + 4)>>3;
2690 a6 = (
W6*b[8*2]-
W2*b[8*6] + 4)>>3;
2691 a0 = (
W0*b[8*0]+
W0*b[8*4] )>>3;
2692 a4 = (
W0*b[8*0]-
W0*b[8*4] )>>3;
2694 s1 = (181*(a1-a5+a7-
a3)+128)>>8;
2695 s2 = (181*(a1-a5-a7+
a3)+128)>>8;
2697 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2698 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2699 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2700 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2702 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2703 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2704 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2705 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2764 dest[0] = av_clip_uint8((block[0] + 4)>>3);
2768 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2782 for(i=0;i<512;i++) {
2790 static int did_fail=0;
2793 if((intptr_t)aligned & 15){
2795 #if HAVE_MMX || HAVE_ALTIVEC
2797 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2798 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2799 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2800 "Do not report crashes to FFmpeg developers.\n");
2833 #endif //CONFIG_ENCODERS
2840 }
else if(avctx->
lowres==2){
2845 }
else if(avctx->
lowres==3){
2924 #define dspfunc(PFX, IDX, NUM) \
2925 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2926 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2927 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2928 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2929 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2930 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2931 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2932 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2933 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2934 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2935 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2936 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2937 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2938 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2939 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2940 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2943 dspfunc(put_no_rnd_qpel, 0, 16);
2949 dspfunc(put_no_rnd_qpel, 1, 8);
2965 #define SET_CMP_FUNC(name) \
2966 c->name[0]= name ## 16_c;\
2967 c->name[1]= name ## 8x8_c;
2986 c->
vsad[4]= vsad_intra16_c;
2987 c->
vsad[5]= vsad_intra8_c;
2989 c->
vsse[4]= vsse_intra16_c;
2990 c->
vsse[5]= vsse_intra8_c;
3008 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
3018 #if CONFIG_VORBIS_DECODER
3043 #define FUNC(f, depth) f ## _ ## depth
3044 #define FUNCC(f, depth) f ## _ ## depth ## _c
3046 #define dspfunc1(PFX, IDX, NUM, depth)\
3047 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
3048 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3049 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3050 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3052 #define dspfunc2(PFX, IDX, NUM, depth)\
3053 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3054 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3055 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3056 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3057 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3058 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3059 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3060 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3061 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3062 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3063 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3064 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3065 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3066 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3067 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3068 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3071 #define BIT_DEPTH_FUNCS(depth, dct)\
3072 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
3073 c->draw_edges = FUNCC(draw_edges , depth);\
3074 c->clear_block = FUNCC(clear_block ## dct , depth);\
3075 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
3076 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
3077 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
3078 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
3079 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3081 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
3082 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
3083 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
3084 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
3085 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
3086 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
3088 dspfunc1(put , 0, 16, depth);\
3089 dspfunc1(put , 1, 8, depth);\
3090 dspfunc1(put , 2, 4, depth);\
3091 dspfunc1(put , 3, 2, depth);\
3092 dspfunc1(put_no_rnd, 0, 16, depth);\
3093 dspfunc1(put_no_rnd, 1, 8, depth);\
3094 dspfunc1(avg , 0, 16, depth);\
3095 dspfunc1(avg , 1, 8, depth);\
3096 dspfunc1(avg , 2, 4, depth);\
3097 dspfunc1(avg , 3, 2, depth);\
3098 dspfunc1(avg_no_rnd, 0, 16, depth);\
3099 dspfunc1(avg_no_rnd, 1, 8, depth);\
3101 dspfunc2(put_h264_qpel, 0, 16, depth);\
3102 dspfunc2(put_h264_qpel, 1, 8, depth);\
3103 dspfunc2(put_h264_qpel, 2, 4, depth);\
3104 dspfunc2(put_h264_qpel, 3, 2, depth);\
3105 dspfunc2(avg_h264_qpel, 0, 16, depth);\
3106 dspfunc2(avg_h264_qpel, 1, 8, depth);\
3107 dspfunc2(avg_h264_qpel, 2, 4, depth);
3155 for (i = 0; i < 4; i++) {
3156 for (j = 0; j < 16; j++) {