45 for (i = 0; i < h; i++) {
46 s += sq[pix1[0] - pix2[0]];
47 s += sq[pix1[1] - pix2[1]];
48 s += sq[pix1[2] - pix2[2]];
49 s += sq[pix1[3] - pix2[3]];
62 for (i = 0; i < h; i++) {
63 s += sq[pix1[0] - pix2[0]];
64 s += sq[pix1[1] - pix2[1]];
65 s += sq[pix1[2] - pix2[2]];
66 s += sq[pix1[3] - pix2[3]];
67 s += sq[pix1[4] - pix2[4]];
68 s += sq[pix1[5] - pix2[5]];
69 s += sq[pix1[6] - pix2[6]];
70 s += sq[pix1[7] - pix2[7]];
83 for (i = 0; i < h; i++) {
84 s += sq[pix1[0] - pix2[0]];
85 s += sq[pix1[1] - pix2[1]];
86 s += sq[pix1[2] - pix2[2]];
87 s += sq[pix1[3] - pix2[3]];
88 s += sq[pix1[4] - pix2[4]];
89 s += sq[pix1[5] - pix2[5]];
90 s += sq[pix1[6] - pix2[6]];
91 s += sq[pix1[7] - pix2[7]];
92 s += sq[pix1[8] - pix2[8]];
93 s += sq[pix1[9] - pix2[9]];
94 s += sq[pix1[10] - pix2[10]];
95 s += sq[pix1[11] - pix2[11]];
96 s += sq[pix1[12] - pix2[12]];
97 s += sq[pix1[13] - pix2[13]];
98 s += sq[pix1[14] - pix2[14]];
99 s += sq[pix1[15] - pix2[15]];
111 for (i = 0; i < 64; i++)
112 sum +=
FFABS(block[i]);
116 #define avg2(a, b) ((a + b + 1) >> 1)
117 #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
120 int line_size,
int h)
124 for (i = 0; i < h; i++) {
125 s += abs(pix1[0] - pix2[0]);
126 s += abs(pix1[1] - pix2[1]);
127 s += abs(pix1[2] - pix2[2]);
128 s += abs(pix1[3] - pix2[3]);
129 s += abs(pix1[4] - pix2[4]);
130 s += abs(pix1[5] - pix2[5]);
131 s += abs(pix1[6] - pix2[6]);
132 s += abs(pix1[7] - pix2[7]);
133 s += abs(pix1[8] - pix2[8]);
134 s += abs(pix1[9] - pix2[9]);
135 s += abs(pix1[10] - pix2[10]);
136 s += abs(pix1[11] - pix2[11]);
137 s += abs(pix1[12] - pix2[12]);
138 s += abs(pix1[13] - pix2[13]);
139 s += abs(pix1[14] - pix2[14]);
140 s += abs(pix1[15] - pix2[15]);
148 int line_size,
int h)
152 for (i = 0; i < h; i++) {
153 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
154 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
155 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
156 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
157 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
158 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
159 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
160 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
161 s += abs(pix1[8] -
avg2(pix2[8], pix2[9]));
162 s += abs(pix1[9] -
avg2(pix2[9], pix2[10]));
163 s += abs(pix1[10] -
avg2(pix2[10], pix2[11]));
164 s += abs(pix1[11] -
avg2(pix2[11], pix2[12]));
165 s += abs(pix1[12] -
avg2(pix2[12], pix2[13]));
166 s += abs(pix1[13] -
avg2(pix2[13], pix2[14]));
167 s += abs(pix1[14] -
avg2(pix2[14], pix2[15]));
168 s += abs(pix1[15] -
avg2(pix2[15], pix2[16]));
176 int line_size,
int h)
179 uint8_t *pix3 = pix2 + line_size;
181 for (i = 0; i < h; i++) {
182 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
183 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
184 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
185 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
186 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
187 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
188 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
189 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
190 s += abs(pix1[8] -
avg2(pix2[8], pix3[8]));
191 s += abs(pix1[9] -
avg2(pix2[9], pix3[9]));
192 s += abs(pix1[10] -
avg2(pix2[10], pix3[10]));
193 s += abs(pix1[11] -
avg2(pix2[11], pix3[11]));
194 s += abs(pix1[12] -
avg2(pix2[12], pix3[12]));
195 s += abs(pix1[13] -
avg2(pix2[13], pix3[13]));
196 s += abs(pix1[14] -
avg2(pix2[14], pix3[14]));
197 s += abs(pix1[15] -
avg2(pix2[15], pix3[15]));
206 int line_size,
int h)
209 uint8_t *pix3 = pix2 + line_size;
211 for (i = 0; i < h; i++) {
212 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
213 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
214 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
215 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
216 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
217 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
218 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
219 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
220 s += abs(pix1[8] -
avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
221 s += abs(pix1[9] -
avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
222 s += abs(pix1[10] -
avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
223 s += abs(pix1[11] -
avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
224 s += abs(pix1[12] -
avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
225 s += abs(pix1[13] -
avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
226 s += abs(pix1[14] -
avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
227 s += abs(pix1[15] -
avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
236 int line_size,
int h)
240 for (i = 0; i < h; i++) {
241 s += abs(pix1[0] - pix2[0]);
242 s += abs(pix1[1] - pix2[1]);
243 s += abs(pix1[2] - pix2[2]);
244 s += abs(pix1[3] - pix2[3]);
245 s += abs(pix1[4] - pix2[4]);
246 s += abs(pix1[5] - pix2[5]);
247 s += abs(pix1[6] - pix2[6]);
248 s += abs(pix1[7] - pix2[7]);
256 int line_size,
int h)
260 for (i = 0; i < h; i++) {
261 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
262 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
263 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
264 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
265 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
266 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
267 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
268 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
276 int line_size,
int h)
279 uint8_t *pix3 = pix2 + line_size;
281 for (i = 0; i < h; i++) {
282 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
283 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
284 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
285 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
286 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
287 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
288 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
289 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
298 int line_size,
int h)
301 uint8_t *pix3 = pix2 + line_size;
303 for (i = 0; i < h; i++) {
304 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
305 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
306 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
307 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
308 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
309 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
310 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
311 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
321 int score1 = 0, score2 = 0, x,
y;
323 for (y = 0; y < h; y++) {
324 for (x = 0; x < 16; x++)
325 score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
327 for (x = 0; x < 15; x++)
328 score2 +=
FFABS(s1[x] - s1[x + stride] -
329 s1[x + 1] + s1[x + stride + 1]) -
330 FFABS(s2[x] - s2[x + stride] -
331 s2[x + 1] + s2[x + stride + 1]);
340 return score1 +
FFABS(score2) * 8;
345 int score1 = 0, score2 = 0, x,
y;
347 for (y = 0; y < h; y++) {
348 for (x = 0; x < 8; x++)
349 score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
351 for (x = 0; x < 7; x++)
352 score2 +=
FFABS(s1[x] - s1[x + stride] -
353 s1[x + 1] + s1[x + stride + 1]) -
354 FFABS(s2[x] - s2[x + stride] -
355 s2[x + 1] + s2[x + stride + 1]);
364 return score1 +
FFABS(score2) * 8;
377 memset(cmp, 0,
sizeof(
void *) * 6);
379 for (i = 0; i < 6; i++) {
380 switch (type & 0xFF) {
430 "internal error in cmp function selection\n");
435 #define BUTTERFLY2(o1, o2, i1, i2) \
439 #define BUTTERFLY1(x, y) \
448 #define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
453 int i,
temp[64], sum = 0;
457 for (i = 0; i < 8; i++) {
460 src[stride * i + 0] - dst[stride * i + 0],
461 src[stride * i + 1] - dst[stride * i + 1]);
463 src[stride * i + 2] - dst[stride * i + 2],
464 src[stride * i + 3] - dst[stride * i + 3]);
466 src[stride * i + 4] - dst[stride * i + 4],
467 src[stride * i + 5] - dst[stride * i + 5]);
469 src[stride * i + 6] - dst[stride * i + 6],
470 src[stride * i + 7] - dst[stride * i + 7]);
483 for (i = 0; i < 8; i++) {
494 sum +=
BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) +
495 BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) +
496 BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) +
505 int i,
temp[64], sum = 0;
509 for (i = 0; i < 8; i++) {
512 src[stride * i + 0], src[stride * i + 1]);
514 src[stride * i + 2], src[stride * i + 3]);
516 src[stride * i + 4], src[stride * i + 5]);
518 src[stride * i + 6], src[stride * i + 7]);
531 for (i = 0; i < 8; i++) {
544 +
BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i])
545 +
BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i])
546 +
BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
549 sum -=
FFABS(temp[8 * 0] + temp[8 * 4]);
569 const int s07 = SRC(0) + SRC(7); \
570 const int s16 = SRC(1) + SRC(6); \
571 const int s25 = SRC(2) + SRC(5); \
572 const int s34 = SRC(3) + SRC(4); \
573 const int a0 = s07 + s34; \
574 const int a1 = s16 + s25; \
575 const int a2 = s07 - s34; \
576 const int a3 = s16 - s25; \
577 const int d07 = SRC(0) - SRC(7); \
578 const int d16 = SRC(1) - SRC(6); \
579 const int d25 = SRC(2) - SRC(5); \
580 const int d34 = SRC(3) - SRC(4); \
581 const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \
582 const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \
583 const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \
584 const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \
586 DST(1, a4 + (a7 >> 2)); \
587 DST(2, a2 + (a3 >> 1)); \
588 DST(3, a5 + (a6 >> 2)); \
590 DST(5, a6 - (a5 >> 2)); \
591 DST(6, (a2 >> 1) - a3); \
592 DST(7, (a4 >> 2) - a7); \
603 #define SRC(x) dct[i][x]
604 #define DST(x, v) dct[i][x] = v
605 for (i = 0; i < 8; i++)
610 #define
SRC(x) dct[x][i]
611 #define DST(x, v) sum += FFABS(v)
612 for (i = 0; i < 8; i++)
621 uint8_t *src2,
int stride,
int h)
631 for (i = 0; i < 64; i++)
638 uint8_t *src2,
int stride,
int h)
641 int16_t *
const bak =
temp + 64;
649 memcpy(bak,
temp, 64 *
sizeof(int16_t));
656 for (i = 0; i < 64; i++)
657 sum += (
temp[i] - bak[i]) * (
temp[i] - bak[i]);
697 if (last >= start_i) {
699 for (i = start_i; i < last; i++) {
700 int j = scantable[i];
705 if ((level & (~127)) == 0)
715 level =
temp[i] + 64;
719 if ((level & (~127)) == 0) {
734 distortion = s->
dsp.
sse[1](NULL, lsrc2, lsrc1, 8, 8);
736 return distortion + ((bits * s->
qscale * s->
qscale * 109 + 64) >> 7);
769 if (last >= start_i) {
771 for (i = start_i; i < last; i++) {
772 int j = scantable[i];
777 if ((level & (~127)) == 0)
787 level =
temp[i] + 64;
791 if ((level & (~127)) == 0)
800 #define VSAD_INTRA(size) \
801 static int vsad_intra ## size ## _c(MpegEncContext *c, \
802 uint8_t *s, uint8_t *dummy, \
805 int score = 0, x, y; \
807 for (y = 1; y < h; y++) { \
808 for (x = 0; x < size; x += 4) { \
809 score += FFABS(s[x] - s[x + stride]) + \
810 FFABS(s[x + 1] - s[x + stride + 1]) + \
811 FFABS(s[x + 2] - s[x + 2 + stride]) + \
812 FFABS(s[x + 3] - s[x + 3 + stride]); \
823 static int vsad ## size ## _c(MpegEncContext *c, \
824 uint8_t *s1, uint8_t *s2, \
827 int score = 0, x, y; \
829 for (y = 1; y < h; y++) { \
830 for (x = 0; x < size; x++) \
831 score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
841 #define SQ(a) ((a) * (a))
842 #define VSSE_INTRA(size) \
843 static int vsse_intra ## size ## _c(MpegEncContext *c, \
844 uint8_t *s, uint8_t *dummy, \
847 int score = 0, x, y; \
849 for (y = 1; y < h; y++) { \
850 for (x = 0; x < size; x += 4) { \
851 score += SQ(s[x] - s[x + stride]) + \
852 SQ(s[x + 1] - s[x + stride + 1]) + \
853 SQ(s[x + 2] - s[x + stride + 2]) + \
854 SQ(s[x + 3] - s[x + stride + 3]); \
865 static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \
868 int score = 0, x, y; \
870 for (y = 1; y < h; y++) { \
871 for (x = 0; x < size; x++) \
872 score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
882 #define WRAPPER8_16_SQ(name8, name16) \
883 static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
888 score += name8(s, dst, src, stride, 8); \
889 score += name8(s, dst + 8, src + 8, stride, 8); \
893 score += name8(s, dst, src, stride, 8); \
894 score += name8(s, dst + 8, src + 8, stride, 8); \
915 for (i = 0; i < 512; i++)
921 static int did_fail = 0;
924 if ((intptr_t)aligned & 15) {
926 #if HAVE_MMX || HAVE_ALTIVEC
928 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
929 "and may be very slow or crash. This is not a bug in libavcodec,\n"
930 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
931 "Do not report crashes to FFmpeg developers.\n");
956 #define SET_CMP_FUNC(name) \
957 c->name[0] = name ## 16_c; \
958 c->name[1] = name ## 8x8_c;
976 c->
vsad[0] = vsad16_c;
977 c->
vsad[1] = vsad8_c;
978 c->
vsad[4] = vsad_intra16_c;
979 c->
vsad[5] = vsad_intra8_c;
980 c->
vsse[0] = vsse16_c;
981 c->
vsse[1] = vsse8_c;
982 c->
vsse[4] = vsse_intra16_c;
983 c->
vsse[5] = vsse_intra8_c;
986 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER