35 const ptrdiff_t dst_pitch)
38 int32_t p0, p1, p2, p3, tmp0, tmp1, tmp2;
39 int32_t b0_1, b0_2, b1_1, b1_2, b1_3, b2_1, b2_2, b2_3, b2_4, b2_5, b2_6;
40 int32_t b3_1, b3_2, b3_3, b3_4, b3_5, b3_6, b3_7, b3_8, b3_9;
41 ptrdiff_t pitch, back_pitch;
42 const short *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
43 const int num_bands = 4;
57 for (y = 0; y < plane->
height; y += 2) {
68 b1_1 = b1_ptr[back_pitch];
70 b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch];
81 b3_2 = b3_ptr[back_pitch];
85 b3_8 = b3_2 - b3_5*6 + b3_ptr[pitch];
89 for (x = 0, indx = 0; x < plane->
width; x+=2, indx++) {
90 if (x+2 >= plane->
width) {
110 p0 = p1 = p2 = p3 = 0;
116 b0_1 = b0_ptr[indx+1];
117 b0_2 = b0_ptr[pitch+indx+1];
122 p2 = (tmp0 + tmp2) * 8;
123 p3 = (tmp1 + tmp2 + b0_2) * 4;
130 b1_2 = b1_ptr[indx+1];
131 b1_1 = b1_ptr[back_pitch+indx+1];
133 tmp2 = tmp1 - tmp0*6 + b1_3;
134 b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch+indx+1];
136 p0 += (tmp0 + tmp1) * 8;
137 p1 += (tmp0 + tmp1 + b1_1 + b1_2) * 4;
139 p3 += (tmp2 + b1_3) * 2;
144 b2_3 = b2_ptr[indx+1];
145 b2_6 = b2_ptr[pitch+indx+1];
148 tmp1 = b2_1 - b2_2*6 + b2_3;
152 p2 += (tmp0 + b2_4 + b2_5) * 4;
153 p3 += (tmp1 + b2_4 - b2_5*6 + b2_6) * 2;
158 b3_6 = b3_ptr[indx+1];
159 b3_3 = b3_ptr[back_pitch+indx+1];
165 b3_9 = b3_3 - b3_6*6 + b3_ptr[pitch+indx+1];
167 p0 += (tmp0 + tmp1) * 4;
168 p1 += (tmp0 - tmp1*6 + tmp2) * 2;
169 p2 += (b3_7 + b3_8) * 2;
170 p3 += b3_7 - b3_8*6 + b3_9;
180 dst += dst_pitch << 1;
192 const ptrdiff_t dst_pitch)
194 int x, y, indx,
b0,
b1,
b2,
b3, p0, p1, p2, p3;
195 const short *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
207 for (y = 0; y < plane->
height; y += 2) {
208 for (x = 0, indx = 0; x < plane->
width; x += 2, indx++) {
228 dst += dst_pitch << 1;
238 #define IVI_HAAR_BFLY(s1, s2, o1, o2, t) \
239 t = ((s1) - (s2)) >> 1;\
240 o1 = ((s1) + (s2)) >> 1;\
244 #define INV_HAAR8(s1, s5, s3, s7, s2, s4, s6, s8,\
245 d1, d2, d3, d4, d5, d6, d7, d8,\
246 t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
247 t1 = (s1) * 2; t5 = (s5) * 2;\
248 IVI_HAAR_BFLY(t1, t5, t1, t5, t0); IVI_HAAR_BFLY(t1, s3, t1, t3, t0);\
249 IVI_HAAR_BFLY(t5, s7, t5, t7, t0); IVI_HAAR_BFLY(t1, s2, t1, t2, t0);\
250 IVI_HAAR_BFLY(t3, s4, t3, t4, t0); IVI_HAAR_BFLY(t5, s6, t5, t6, t0);\
251 IVI_HAAR_BFLY(t7, s8, t7, t8, t0);\
252 d1 = COMPENSATE(t1);\
253 d2 = COMPENSATE(t2);\
254 d3 = COMPENSATE(t3);\
255 d4 = COMPENSATE(t4);\
256 d5 = COMPENSATE(t5);\
257 d6 = COMPENSATE(t6);\
258 d7 = COMPENSATE(t7);\
259 d8 = COMPENSATE(t8); }
262 #define INV_HAAR4(s1, s3, s5, s7, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\
263 IVI_HAAR_BFLY(s1, s3, t0, t1, t4);\
264 IVI_HAAR_BFLY(t0, s5, t2, t3, t4);\
265 d1 = COMPENSATE(t2);\
266 d2 = COMPENSATE(t3);\
267 IVI_HAAR_BFLY(t1, s7, t2, t3, t4);\
268 d3 = COMPENSATE(t2);\
269 d4 = COMPENSATE(t3); }
272 const uint8_t *
flags)
274 int i,
shift, sp1, sp2, sp3, sp4;
278 int t0, t1, t2, t3, t4, t5, t6, t7, t8;
281 #define COMPENSATE(x) (x)
284 for (
i = 0;
i < 8;
i++) {
296 t0, t1, t2, t3, t4, t5, t6, t7, t8);
307 #define COMPENSATE(x) (x)
309 for (
i = 0;
i < 8;
i++) {
312 memset(
out, 0, 8 *
sizeof(
out[0]));
318 t0, t1, t2, t3, t4, t5, t6, t7, t8);
327 const uint8_t *
flags)
330 int t0, t1, t2, t3, t4, t5, t6, t7, t8;
333 #define COMPENSATE(x) (x)
334 for (
i = 0;
i < 8;
i++) {
335 if ( !in[0] && !in[1] && !in[2] && !in[3]
336 && !in[4] && !in[5] && !in[6] && !in[7]) {
337 memset(
out, 0, 8 *
sizeof(
out[0]));
340 in[4], in[5], in[6], in[7],
343 t0, t1, t2, t3, t4, t5, t6, t7, t8);
352 const uint8_t *
flags)
355 int t0, t1, t2, t3, t4, t5, t6, t7, t8;
358 #define COMPENSATE(x) (x)
359 for (
i = 0;
i < 8;
i++) {
361 INV_HAAR8(in[ 0], in[ 8], in[16], in[24],
362 in[32], in[40], in[48], in[56],
363 out[0 * pitch],
out[1 * pitch],
364 out[2 * pitch],
out[3 * pitch],
365 out[4 * pitch],
out[5 * pitch],
366 out[6 * pitch],
out[7 * pitch],
367 t0, t1, t2, t3, t4, t5, t6, t7, t8);
369 out[0 * pitch] =
out[1 * pitch] =
370 out[2 * pitch] =
out[3 * pitch] =
371 out[4 * pitch] =
out[5 * pitch] =
372 out[6 * pitch] =
out[7 * pitch] = 0;
381 const uint8_t *
flags)
387 int t0, t1, t2, t3, t4;
390 #define COMPENSATE(x) (x)
393 for (
i = 0;
i < 4;
i++) {
411 #define COMPENSATE(x) (x)
413 for (
i = 0;
i < 4;
i++) {
415 memset(
out, 0, 4 *
sizeof(
out[0]));
428 const uint8_t *
flags)
431 int t0, t1, t2, t3, t4;
434 #define COMPENSATE(x) (x)
435 for (
i = 0;
i < 4;
i++) {
436 if (!in[0] && !in[1] && !in[2] && !in[3]) {
437 memset(
out, 0, 4 *
sizeof(
out[0]));
450 const uint8_t *
flags)
453 int t0, t1, t2, t3, t4;
456 #define COMPENSATE(x) (x)
457 for (
i = 0;
i < 4;
i++) {
460 out[0 * pitch],
out[1 * pitch],
461 out[2 * pitch],
out[3 * pitch],
464 out[0 * pitch] =
out[1 * pitch] =
465 out[2 * pitch] =
out[3 * pitch] = 0;
479 dc_coeff = (*in + 0) >> 3;
481 for (y = 0; y < blk_size;
out += pitch, y++) {
482 for (x = 0; x < blk_size; x++)
488 #define IVI_SLANT_BFLY(s1, s2, o1, o2, t) \
494 #define IVI_IREFLECT(s1, s2, o1, o2, t) \
495 t = (((s1) + (s2)*2 + 2) >> 2) + (s1);\
496 o2 = (((s1)*2 - (s2) + 2) >> 2) - (s2);\
500 #define IVI_SLANT_PART4(s1, s2, o1, o2, t) \
501 t = (s2) + (((s1)*4 - (s2) + 4) >> 3);\
502 o2 = (s1) + ((-(s1) - (s2)*4 + 4) >> 3);\
506 #define IVI_INV_SLANT8(s1, s4, s8, s5, s2, s6, s3, s7,\
507 d1, d2, d3, d4, d5, d6, d7, d8,\
508 t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
509 IVI_SLANT_PART4(s4, s5, t4, t5, t0);\
511 IVI_SLANT_BFLY(s1, t5, t1, t5, t0); IVI_SLANT_BFLY(s2, s6, t2, t6, t0);\
512 IVI_SLANT_BFLY(s7, s3, t7, t3, t0); IVI_SLANT_BFLY(t4, s8, t4, t8, t0);\
514 IVI_SLANT_BFLY(t1, t2, t1, t2, t0); IVI_IREFLECT (t4, t3, t4, t3, t0);\
515 IVI_SLANT_BFLY(t5, t6, t5, t6, t0); IVI_IREFLECT (t8, t7, t8, t7, t0);\
516 IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
517 IVI_SLANT_BFLY(t5, t8, t5, t8, t0); IVI_SLANT_BFLY(t6, t7, t6, t7, t0);\
518 d1 = COMPENSATE(t1);\
519 d2 = COMPENSATE(t2);\
520 d3 = COMPENSATE(t3);\
521 d4 = COMPENSATE(t4);\
522 d5 = COMPENSATE(t5);\
523 d6 = COMPENSATE(t6);\
524 d7 = COMPENSATE(t7);\
525 d8 = COMPENSATE(t8);}
528 #define IVI_INV_SLANT4(s1, s4, s2, s3, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\
529 IVI_SLANT_BFLY(s1, s2, t1, t2, t0); IVI_IREFLECT (s4, s3, t4, t3, t0);\
531 IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
532 d1 = COMPENSATE(t1);\
533 d2 = COMPENSATE(t2);\
534 d3 = COMPENSATE(t3);\
535 d4 = COMPENSATE(t4);}
543 int t0, t1, t2, t3, t4, t5, t6, t7, t8;
545 #define COMPENSATE(x) (x)
548 for (
i = 0;
i < 8;
i++) {
552 t0, t1, t2, t3, t4, t5, t6, t7, t8);
561 #define COMPENSATE(x) (((x) + 1)>>1)
563 for (
i = 0;
i < 8;
i++) {
565 memset(
out, 0, 8*
sizeof(
out[0]));
569 t0, t1, t2, t3, t4, t5, t6, t7, t8);
583 int t0, t1, t2, t3, t4;
585 #define COMPENSATE(x) (x)
588 for (
i = 0;
i < 4;
i++) {
601 #define COMPENSATE(x) (((x) + 1)>>1)
603 for (
i = 0;
i < 4;
i++) {
622 dc_coeff = (*in + 1) >> 1;
624 for (y = 0; y < blk_size;
out += pitch, y++) {
625 for (x = 0; x < blk_size; x++)
633 int t0, t1, t2, t3, t4, t5, t6, t7, t8;
635 #define COMPENSATE(x) (((x) + 1)>>1)
636 for (
i = 0;
i < 8;
i++) {
637 if (!in[0] && !in[1] && !in[2] && !in[3] && !in[4] && !in[5] && !in[6] && !in[7]) {
638 memset(
out, 0, 8*
sizeof(
out[0]));
640 IVI_INV_SLANT8( in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7],
642 t0, t1, t2, t3, t4, t5, t6, t7, t8);
655 dc_coeff = (*in + 1) >> 1;
657 for (x = 0; x < blk_size; x++)
662 for (y = 1; y < blk_size;
out += pitch, y++) {
663 for (x = 0; x < blk_size; x++)
670 int i, row2, row4, row8;
671 int t0, t1, t2, t3, t4, t5, t6, t7, t8;
677 #define COMPENSATE(x) (((x) + 1)>>1)
678 for (
i = 0;
i < 8;
i++) {
680 IVI_INV_SLANT8(in[0], in[8], in[16], in[24], in[32], in[40], in[48], in[56],
682 out[row4 + pitch],
out[row4 + row2],
out[row8 - pitch],
683 t0, t1, t2, t3, t4, t5, t6, t7, t8);
686 out[row4 + pitch] =
out[row4 + row2] =
out[row8 - pitch] = 0;
700 dc_coeff = (*in + 1) >> 1;
702 for (y = 0; y < blk_size;
out += pitch, y++) {
704 for (x = 1; x < blk_size; x++)
712 int t0, t1, t2, t3, t4;
714 #define COMPENSATE(x) (((x) + 1)>>1)
715 for (
i = 0;
i < 4;
i++) {
716 if (!in[0] && !in[1] && !in[2] && !in[3]) {
717 memset(
out, 0, 4*
sizeof(
out[0]));
732 int t0, t1, t2, t3, t4;
736 #define COMPENSATE(x) (((x) + 1)>>1)
737 for (
i = 0;
i < 4;
i++) {
743 out[0] =
out[pitch] =
out[row2] =
out[row2 + pitch] = 0;
753 const uint8_t *
flags)
757 for (y = 0; y < 8;
out += pitch, in += 8, y++)
758 for (x = 0; x < 8; x++)
768 memset(
out + 1, 0, 7*
sizeof(
out[0]));
771 for (y = 1; y < 8;
out += pitch, y++)
772 memset(
out, 0, 8*
sizeof(
out[0]));
775 #define IVI_MC_TEMPLATE(size, suffix, OP) \
776 static void ivi_mc_ ## size ##x## size ## suffix(int16_t *buf, \
778 const int16_t *ref_buf, \
779 ptrdiff_t pitch, int mc_type) \
782 const int16_t *wptr; \
786 for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \
787 for (j = 0; j < size; j++) {\
788 OP(buf[j], ref_buf[j]); \
793 for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \
794 for (j = 0; j < size; j++) \
795 OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
798 wptr = ref_buf + pitch; \
799 for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
800 for (j = 0; j < size; j++) \
801 OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
804 wptr = ref_buf + pitch; \
805 for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
806 for (j = 0; j < size; j++) \
807 OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
812 void ff_ivi_mc_ ## size ##x## size ## suffix(int16_t *buf, const int16_t *ref_buf, \
813 ptrdiff_t pitch, int mc_type) \
815 ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \
818 #define IVI_MC_AVG_TEMPLATE(size, suffix, OP) \
819 void ff_ivi_mc_avg_ ## size ##x## size ## suffix(int16_t *buf, \
820 const int16_t *ref_buf, \
821 const int16_t *ref_buf2, \
823 int mc_type, int mc_type2) \
825 int16_t tmp[size * size]; \
828 ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \
829 ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \
830 for (i = 0; i < size; i++, buf += pitch) { \
831 for (j = 0; j < size; j++) {\
832 OP(buf[j], tmp[i * size + j] >> 1); \
837 #define OP_PUT(a, b) (a) = (b)
838 #define OP_ADD(a, b) (a) += (b)