37 #define RV40_LOWPASS(OPNAME, OP) \
38 static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
39 const int h, const int C1, const int C2, const int SHIFT){\
40 const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
42 for(i = 0; i < h; i++)\
44 OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
45 OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
46 OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
47 OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
48 OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
49 OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
50 OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
51 OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
57 static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
58 const int w, const int C1, const int C2, const int SHIFT){\
59 const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
61 for(i = 0; i < w; i++)\
63 const int srcB = src[-2*srcStride];\
64 const int srcA = src[-1*srcStride];\
65 const int src0 = src[0 *srcStride];\
66 const int src1 = src[1 *srcStride];\
67 const int src2 = src[2 *srcStride];\
68 const int src3 = src[3 *srcStride];\
69 const int src4 = src[4 *srcStride];\
70 const int src5 = src[5 *srcStride];\
71 const int src6 = src[6 *srcStride];\
72 const int src7 = src[7 *srcStride];\
73 const int src8 = src[8 *srcStride];\
74 const int src9 = src[9 *srcStride];\
75 const int src10 = src[10*srcStride];\
76 OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
77 OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
78 OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
79 OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
80 OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
81 OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
82 OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
83 OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
89 static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
90 const int w, const int C1, const int C2, const int SHIFT){\
91 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
92 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
95 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\
96 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
99 static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
100 const int h, const int C1, const int C2, const int SHIFT){\
101 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
102 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
105 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\
106 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
110 #define RV40_MC(OPNAME, SIZE) \
111 static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
113 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
116 static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
118 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
121 static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
123 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
126 static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
128 uint8_t full[SIZE*(SIZE+5)];\
129 uint8_t * const full_mid = full + SIZE*2;\
130 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
131 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
134 static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
136 uint8_t full[SIZE*(SIZE+5)];\
137 uint8_t * const full_mid = full + SIZE*2;\
138 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
139 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
142 static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
144 uint8_t full[SIZE*(SIZE+5)];\
145 uint8_t * const full_mid = full + SIZE*2;\
146 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
147 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
150 static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
152 uint8_t full[SIZE*(SIZE+5)];\
153 uint8_t * const full_mid = full + SIZE*2;\
154 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
155 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
158 static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
160 uint8_t full[SIZE*(SIZE+5)];\
161 uint8_t * const full_mid = full + SIZE*2;\
162 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
163 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
166 static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
168 uint8_t full[SIZE*(SIZE+5)];\
169 uint8_t * const full_mid = full + SIZE*2;\
170 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
171 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
174 static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
176 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
179 static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
181 uint8_t full[SIZE*(SIZE+5)];\
182 uint8_t * const full_mid = full + SIZE*2;\
183 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
184 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
187 static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
189 uint8_t full[SIZE*(SIZE+5)];\
190 uint8_t * const full_mid = full + SIZE*2;\
191 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
192 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
196 #define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
197 #define op_put(a, b) a = cm[b]
210 #define PIXOP2(OPNAME, OP) \
211 static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block, \
212 const uint8_t *pixels, \
213 ptrdiff_t line_size, \
219 for (j = 0; j < 2; j++) { \
221 const uint32_t a = AV_RN32(pixels); \
222 const uint32_t b = AV_RN32(pixels + 1); \
223 uint32_t l0 = (a & 0x03030303UL) + \
224 (b & 0x03030303UL) + \
226 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
227 ((b & 0xFCFCFCFCUL) >> 2); \
230 pixels += line_size; \
231 for (i = 0; i < h; i += 2) { \
232 uint32_t a = AV_RN32(pixels); \
233 uint32_t b = AV_RN32(pixels + 1); \
234 l1 = (a & 0x03030303UL) + \
235 (b & 0x03030303UL); \
236 h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
237 ((b & 0xFCFCFCFCUL) >> 2); \
238 OP(*((uint32_t *) block), \
239 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
240 pixels += line_size; \
241 block += line_size; \
242 a = AV_RN32(pixels); \
243 b = AV_RN32(pixels + 1); \
244 l0 = (a & 0x03030303UL) + \
245 (b & 0x03030303UL) + \
247 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
248 ((b & 0xFCFCFCFCUL) >> 2); \
249 OP(*((uint32_t *) block), \
250 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
251 pixels += line_size; \
252 block += line_size; \
254 pixels += 4 - line_size * (h + 1); \
255 block += 4 - line_size * h; \
259 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \
260 OPNAME ## _pixels8_xy2_8_c, \
263 #define op_avg(a, b) a = rnd_avg32(a, b)
264 #define op_put(a, b) a = b
272 put_pixels16_xy2_8_c(dst, src, stride, 16);
276 avg_pixels16_xy2_8_c(dst, src, stride, 16);
280 put_pixels8_xy2_8_c(dst, src, stride, 8);
284 avg_pixels8_xy2_8_c(dst, src, stride, 8);
294 #define RV40_CHROMA_MC(OPNAME, OP)\
295 static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
296 const int A = (8-x) * (8-y);\
297 const int B = ( x) * (8-y);\
298 const int C = (8-x) * ( y);\
299 const int D = ( x) * ( y);\
301 int bias = rv40_bias[y>>1][x>>1];\
303 av_assert2(x<8 && y<8 && x>=0 && y>=0);\
306 for(i = 0; i < h; i++){\
307 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
308 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
309 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
310 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
315 const int E = B + C;\
316 const int step = C ? stride : 1;\
317 for(i = 0; i < h; i++){\
318 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
319 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
320 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
321 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
328 static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
329 const int A = (8-x) * (8-y);\
330 const int B = ( x) * (8-y);\
331 const int C = (8-x) * ( y);\
332 const int D = ( x) * ( y);\
334 int bias = rv40_bias[y>>1][x>>1];\
336 av_assert2(x<8 && y<8 && x>=0 && y>=0);\
339 for(i = 0; i < h; i++){\
340 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
341 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
342 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
343 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
344 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
345 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
346 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
347 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
352 const int E = B + C;\
353 const int step = C ? stride : 1;\
354 for(i = 0; i < h; i++){\
355 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
356 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
357 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
358 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
359 OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
360 OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
361 OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
362 OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
369 #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
370 #define op_put(a, b) a = ((b)>>6)
375 #define RV40_WEIGHT_FUNC(size) \
376 static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
380 for (j = 0; j < size; j++) {\
381 for (i = 0; i < size; i++)\
382 dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
388 static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
392 for (j = 0; j < size; j++) {\
393 for (i = 0; i < size; i++)\
394 dst[i] = (w2 * src1[i] + w1 * src2[i] + 0x10) >> 5;\
408 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
409 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
416 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
417 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
420 #define CLIP_SYMM(a, b) av_clip(a, -(b), b)
438 for (i = 0; i < 4; i++, src += stride) {
439 int diff_p1p0 = src[-2*step] - src[-1*step];
440 int diff_q1q0 = src[ 1*step] - src[ 0*step];
441 int diff_p1p2 = src[-2*step] - src[-3*step];
442 int diff_q1q2 = src[ 1*step] - src[ 2*step];
444 t = src[0*step] - src[-1*step];
448 u = (alpha *
FFABS(t)) >> 7;
449 if (u > 3 - (filter_p1 && filter_q1))
453 if (filter_p1 && filter_q1)
454 t += src[-2*step] - src[1*step];
456 diff =
CLIP_SYMM((t + 4) >> 3, lim_p0q0);
457 src[-1*step] = cm[src[-1*step] + diff];
458 src[ 0*step] = cm[src[ 0*step] - diff];
460 if (filter_p1 &&
FFABS(diff_p1p2) <= beta) {
461 t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
462 src[-2*step] = cm[src[-2*step] -
CLIP_SYMM(t, lim_p1)];
465 if (filter_q1 &&
FFABS(diff_q1q2) <= beta) {
466 t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
467 src[ 1*step] = cm[src[ 1*step] -
CLIP_SYMM(t, lim_q1)];
473 const int filter_p1,
const int filter_q1,
474 const int alpha,
const int beta,
475 const int lim_p0q0,
const int lim_q1,
479 alpha, beta, lim_p0q0, lim_q1, lim_p1);
483 const int filter_p1,
const int filter_q1,
484 const int alpha,
const int beta,
485 const int lim_p0q0,
const int lim_q1,
489 alpha, beta, lim_p0q0, lim_q1, lim_p1);
502 for(i = 0; i < 4; i++, src += stride){
503 int sflag, p0, q0, p1, q1;
504 int t = src[0*step] - src[-1*step];
509 sflag = (alpha *
FFABS(t)) >> 7;
513 p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] +
514 26*src[ 0*step] + 25*src[ 1*step] +
517 q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] +
518 26*src[ 1*step] + 25*src[ 2*step] +
519 rv40_dither_r[dmode + i]) >> 7;
522 p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
523 q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
526 p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 +
528 q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] +
529 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
532 p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
533 q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
542 src[-3*step] = (25*src[-1*step] + 26*src[-2*step] +
543 51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
544 src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] +
545 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
551 const int alpha,
const int lims,
552 const int dmode,
const int chroma)
558 const int alpha,
const int lims,
559 const int dmode,
const int chroma)
565 int step, ptrdiff_t
stride,
570 int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
571 int strong0 = 0, strong1 = 0;
575 for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
576 sum_p1p0 += ptr[-2*step] - ptr[-1*step];
577 sum_q1q0 += ptr[ 1*step] - ptr[ 0*step];
580 *p1 =
FFABS(sum_p1p0) < (beta << 2);
581 *q1 =
FFABS(sum_q1q0) < (beta << 2);
589 for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
590 sum_p1p2 += ptr[-2*step] - ptr[-3*step];
591 sum_q1q2 += ptr[ 1*step] - ptr[ 2*step];
594 strong0 = *p1 && (
FFABS(sum_p1p2) < beta2);
595 strong1 = *q1 && (
FFABS(sum_q1q2) < beta2);
597 return strong0 && strong1;
601 int beta,
int beta2,
int edge,
608 int beta,
int beta2,
int edge,