26 #define PIXOP2(OPNAME, OP) \
28 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
31 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
38 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
41 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
48 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
51 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
52 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
53 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \
54 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \
61 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
64 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
65 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
66 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \
67 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \
74 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
77 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
78 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
85 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
88 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
89 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
96 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
99 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
100 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
107 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
110 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
111 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
118 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
121 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
122 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
123 OP(LP(dst+8),no_rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \
124 OP(LP(dst+12),no_rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \
131 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
134 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
135 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
136 OP(LP(dst+8),rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \
137 OP(LP(dst+12),rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \
144 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
145 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
147 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
148 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
150 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
151 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
153 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
154 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
156 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
158 uint32_t a0,a1,a2,a3; \
159 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
160 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
161 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
162 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
163 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
164 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
173 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
175 uint32_t a0,a1,a2,a3; \
176 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
177 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
178 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
179 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
180 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
181 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
190 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
192 uint32_t a0,a1,a2,a3; \
193 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
194 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
195 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
196 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
197 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
198 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
207 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
209 uint32_t a0,a1,a2,a3; \
210 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
211 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
212 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
213 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
214 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
215 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
224 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
226 uint32_t a0,a1,a2,a3; \
227 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
228 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
229 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
230 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
231 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
232 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
233 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \
234 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
235 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
236 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \
237 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
238 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
247 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
249 uint32_t a0,a1,a2,a3; \
250 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
251 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
252 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
253 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
254 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
255 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
256 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \
257 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
258 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
259 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \
260 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
261 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
270 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
272 uint32_t a0,a1,a2,a3; \
273 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
274 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
275 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
276 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
277 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
278 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
279 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \
280 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
281 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
282 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \
283 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
284 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
293 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
295 uint32_t a0,a1,a2,a3; \
296 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
297 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
298 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
299 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
300 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
301 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
302 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \
303 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
304 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
305 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \
306 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
307 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
317 #define op_avg(a, b) a = rnd_avg32(a,b)
318 #define op_put(a, b) a = b
325 #define avg2(a,b) ((a+b+1)>>1)
326 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
331 const int A=(16-x16)*(16-y16);
332 const int B=( x16)*(16-y16);
333 const int C=(16-x16)*( y16);
334 const int D=( x16)*( y16);
340 t0 = *s0++; t2 = *s1++;
341 t1 = *s0++; t3 = *s1++;
342 dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 +
rounder)>>8;
343 t0 = *s0++; t2 = *s1++;
344 dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 +
rounder)>>8;
345 t1 = *s0++; t3 = *s1++;
346 dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 +
rounder)>>8;
347 t0 = *s0++; t2 = *s1++;
348 dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 +
rounder)>>8;
349 t1 = *s0++; t3 = *s1++;
350 dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 +
rounder)>>8;
351 t0 = *s0++; t2 = *s1++;
352 dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 +
rounder)>>8;
353 t1 = *s0++; t3 = *s1++;
354 dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 +
rounder)>>8;
355 t0 = *s0++; t2 = *s1++;
356 dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 +
rounder)>>8;
362 #define H264_CHROMA_MC(OPNAME, OP)\
363 static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
364 const int A=(8-x)*(8-y);\
365 const int B=( x)*(8-y);\
366 const int C=(8-x)*( y);\
367 const int D=( x)*( y);\
369 assert(x<8 && y<8 && x>=0 && y>=0);\
374 uint8_t *s1 = src+stride; \
375 t0 = *s0++; t2 = *s1++; \
376 t1 = *s0++; t3 = *s1++; \
377 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
378 t0 = *s0++; t2 = *s1++; \
379 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
385 static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
386 const int A=(8-x)*(8-y);\
387 const int B=( x)*(8-y);\
388 const int C=(8-x)*( y);\
389 const int D=( x)*( y);\
391 assert(x<8 && y<8 && x>=0 && y>=0);\
396 uint8_t *s1 = src+stride; \
397 t0 = *s0++; t2 = *s1++; \
398 t1 = *s0++; t3 = *s1++; \
399 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
400 t0 = *s0++; t2 = *s1++; \
401 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
402 t1 = *s0++; t3 = *s1++; \
403 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
404 t0 = *s0++; t2 = *s1++; \
405 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
411 static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
412 const int A=(8-x)*(8-y);\
413 const int B=( x)*(8-y);\
414 const int C=(8-x)*( y);\
415 const int D=( x)*( y);\
417 assert(x<8 && y<8 && x>=0 && y>=0);\
422 uint8_t *s1 = src+stride; \
423 t0 = *s0++; t2 = *s1++; \
424 t1 = *s0++; t3 = *s1++; \
425 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
426 t0 = *s0++; t2 = *s1++; \
427 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
428 t1 = *s0++; t3 = *s1++; \
429 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
430 t0 = *s0++; t2 = *s1++; \
431 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
432 t1 = *s0++; t3 = *s1++; \
433 OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
434 t0 = *s0++; t2 = *s1++; \
435 OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
436 t1 = *s0++; t3 = *s1++; \
437 OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
438 t0 = *s0++; t2 = *s1++; \
439 OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
445 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
446 #define op_put(a, b) a = (((b) + 32)>>6)
453 #define QPEL_MC(r, OPNAME, RND, OP) \
454 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
455 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
458 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
464 OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
466 OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
468 OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
470 OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
472 OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
473 OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
474 OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
475 OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
481 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
482 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
485 uint8_t *s = src, *d=dst;\
486 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
487 src0 = *s; s+=srcStride; \
488 src1 = *s; s+=srcStride; \
489 src2 = *s; s+=srcStride; \
490 src3 = *s; s+=srcStride; \
491 src4 = *s; s+=srcStride; \
492 OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\
493 src5 = *s; s+=srcStride; \
494 OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\
495 src6 = *s; s+=srcStride; \
496 OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\
497 src7 = *s; s+=srcStride; \
498 OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\
500 OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\
501 OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\
502 OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\
503 OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
509 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
510 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
513 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
514 int src9,src10,src11,src12,src13,src14,src15,src16;\
520 OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
522 OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
524 OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
526 OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
528 OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
530 OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
532 OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
534 OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
536 OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
538 OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
540 OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
542 OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
544 OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
545 OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
546 OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
547 OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
553 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
554 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
557 uint8_t *s = src, *d=dst;\
558 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
559 int src9,src10,src11,src12,src13,src14,src15,src16;\
560 src0 = *s; s+=srcStride; \
561 src1 = *s; s+=srcStride; \
562 src2 = *s; s+=srcStride; \
563 src3 = *s; s+=srcStride; \
564 src4 = *s; s+=srcStride; \
565 OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\
566 src5 = *s; s+=srcStride; \
567 OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\
568 src6 = *s; s+=srcStride; \
569 OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\
570 src7 = *s; s+=srcStride; \
571 OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\
572 src8 = *s; s+=srcStride; \
573 OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\
574 src9 = *s; s+=srcStride; \
575 OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\
576 src10 = *s; s+=srcStride; \
577 OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\
578 src11 = *s; s+=srcStride; \
579 OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\
580 src12 = *s; s+=srcStride; \
581 OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\
582 src13 = *s; s+=srcStride; \
583 OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\
584 src14 = *s; s+=srcStride; \
585 OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\
586 src15 = *s; s+=srcStride; \
587 OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\
589 OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\
590 OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\
591 OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\
592 OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
598 static void OPNAME ## qpel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
599 OPNAME ## pixels8_c(dst, src, stride, 8);\
602 static void OPNAME ## qpel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
604 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
605 OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\
608 static void OPNAME ## qpel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
609 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
612 static void OPNAME ## qpel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
614 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
615 OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\
618 static void OPNAME ## qpel8_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
621 copy_block9(full, src, 16, stride, 9);\
622 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
623 OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\
626 static void OPNAME ## qpel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
628 copy_block9(full, src, 16, stride, 9);\
629 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
632 static void OPNAME ## qpel8_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
635 copy_block9(full, src, 16, stride, 9);\
636 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
637 OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\
639 static void OPNAME ## qpel8_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
643 copy_block9(full, src, 16, stride, 9);\
644 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
645 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
646 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
647 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
649 static void OPNAME ## qpel8_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
653 copy_block9(full, src, 16, stride, 9);\
654 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
655 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
656 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
657 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
659 static void OPNAME ## qpel8_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
663 copy_block9(full, src, 16, stride, 9);\
664 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
665 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
666 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
667 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
669 static void OPNAME ## qpel8_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
673 copy_block9(full, src, 16, stride, 9);\
674 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
675 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
676 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
677 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
679 static void OPNAME ## qpel8_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
682 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
683 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
684 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
686 static void OPNAME ## qpel8_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
689 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
690 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
691 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
693 static void OPNAME ## qpel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
696 copy_block9(full, src, 16, stride, 9);\
697 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
698 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
699 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
701 static void OPNAME ## qpel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
704 copy_block9(full, src, 16, stride, 9);\
705 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
706 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
707 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
709 static void OPNAME ## qpel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
711 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
712 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
714 static void OPNAME ## qpel16_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
715 OPNAME ## pixels16_c(dst, src, stride, 16);\
718 static void OPNAME ## qpel16_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
720 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
721 OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\
724 static void OPNAME ## qpel16_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
725 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
728 static void OPNAME ## qpel16_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
730 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
731 OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\
734 static void OPNAME ## qpel16_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
735 uint8_t full[24*17];\
737 copy_block17(full, src, 24, stride, 17);\
738 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
739 OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\
742 static void OPNAME ## qpel16_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
743 uint8_t full[24*17];\
744 copy_block17(full, src, 24, stride, 17);\
745 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
748 static void OPNAME ## qpel16_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
749 uint8_t full[24*17];\
751 copy_block17(full, src, 24, stride, 17);\
752 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
753 OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\
755 static void OPNAME ## qpel16_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
756 uint8_t full[24*17];\
758 uint8_t halfHV[256];\
759 copy_block17(full, src, 24, stride, 17);\
760 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
761 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
762 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
763 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
765 static void OPNAME ## qpel16_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
766 uint8_t full[24*17];\
768 uint8_t halfHV[256];\
769 copy_block17(full, src, 24, stride, 17);\
770 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
771 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
772 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
773 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
775 static void OPNAME ## qpel16_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
776 uint8_t full[24*17];\
778 uint8_t halfHV[256];\
779 copy_block17(full, src, 24, stride, 17);\
780 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
781 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
782 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
783 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
785 static void OPNAME ## qpel16_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
786 uint8_t full[24*17];\
788 uint8_t halfHV[256];\
789 copy_block17(full, src, 24, stride, 17);\
790 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
791 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
792 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
793 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
795 static void OPNAME ## qpel16_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
797 uint8_t halfHV[256];\
798 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
799 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
800 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
802 static void OPNAME ## qpel16_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
804 uint8_t halfHV[256];\
805 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
806 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
807 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
809 static void OPNAME ## qpel16_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
810 uint8_t full[24*17];\
812 copy_block17(full, src, 24, stride, 17);\
813 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
814 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
815 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
817 static void OPNAME ## qpel16_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
818 uint8_t full[24*17];\
820 copy_block17(full, src, 24, stride, 17);\
821 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
822 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
823 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
825 static void OPNAME ## qpel16_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
827 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
828 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
831 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
832 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
833 #define op_put(a, b) a = cm[((b) + 16)>>5]
834 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
838 QPEL_MC(0, avg_ , _ ,
op_avg)
845 #define H264_LOWPASS(OPNAME, OP, OP2) \
846 static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
847 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
849 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
857 OP(dst[0], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
859 OP(dst[1], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
861 OP(dst[2], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
863 OP(dst[3], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
865 int src7,src8,src9,src10; \
867 OP(dst[4], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
869 OP(dst[5], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
871 OP(dst[6], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
873 OP(dst[7], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
875 int src11,src12,src13,src14,src15,src16,src17,src18; \
877 OP(dst[8] , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
879 OP(dst[9] , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
881 OP(dst[10], (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
883 OP(dst[11], (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
885 OP(dst[12], (src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
887 OP(dst[13], (src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
889 OP(dst[14], (src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
891 OP(dst[15], (src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
899 static inline void OPNAME ## h264_qpel_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
900 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
902 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
903 uint8_t *s = src-2*srcStride,*d=dst;\
904 srcB = *s; s+=srcStride;\
905 srcA = *s; s+=srcStride;\
906 src0 = *s; s+=srcStride;\
907 src1 = *s; s+=srcStride;\
908 src2 = *s; s+=srcStride;\
909 src3 = *s; s+=srcStride;\
910 OP(*d, (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));d+=dstStride;\
911 src4 = *s; s+=srcStride;\
912 OP(*d, (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));d+=dstStride;\
913 src5 = *s; s+=srcStride;\
914 OP(*d, (src2+src3)*20 - (src1+src4)*5 + (src0+src5));d+=dstStride;\
915 src6 = *s; s+=srcStride;\
916 OP(*d, (src3+src4)*20 - (src2+src5)*5 + (src1+src6));d+=dstStride;\
918 int src7,src8,src9,src10; \
919 src7 = *s; s+=srcStride;\
920 OP(*d, (src4+src5)*20 - (src3+src6)*5 + (src2+src7));d+=dstStride;\
921 src8 = *s; s+=srcStride;\
922 OP(*d, (src5+src6)*20 - (src4+src7)*5 + (src3+src8));d+=dstStride;\
923 src9 = *s; s+=srcStride;\
924 OP(*d, (src6+src7)*20 - (src5+src8)*5 + (src4+src9));d+=dstStride;\
925 src10 = *s; s+=srcStride;\
926 OP(*d, (src7+src8)*20 - (src6+src9)*5 + (src5+src10));d+=dstStride;\
928 int src11,src12,src13,src14,src15,src16,src17,src18; \
929 src11 = *s; s+=srcStride;\
930 OP(*d , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));d+=dstStride;\
931 src12 = *s; s+=srcStride;\
932 OP(*d , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));d+=dstStride;\
933 src13 = *s; s+=srcStride;\
934 OP(*d, (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));d+=dstStride;\
935 src14 = *s; s+=srcStride;\
936 OP(*d, (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));d+=dstStride;\
937 src15 = *s; s+=srcStride;\
938 OP(*d, (src12+src13)*20 - (src11+src14)*5 + (src10+src15));d+=dstStride;\
939 src16 = *s; s+=srcStride;\
940 OP(*d, (src13+src14)*20 - (src12+src15)*5 + (src11+src16));d+=dstStride;\
941 src17 = *s; s+=srcStride;\
942 OP(*d, (src14+src15)*20 - (src13+src16)*5 + (src12+src17));d+=dstStride;\
943 src18 = *s; s+=srcStride;\
944 OP(*d, (src15+src16)*20 - (src14+src17)*5 + (src13+src18));d+=dstStride;\
952 static inline void OPNAME ## h264_qpel_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride,int w,int h){\
953 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
958 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
966 tmp[0] = ((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
968 tmp[1] = ((src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
970 tmp[2] = ((src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
972 tmp[3] = ((src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
974 int src7,src8,src9,src10; \
976 tmp[4] = ((src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
978 tmp[5] = ((src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
980 tmp[6] = ((src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
982 tmp[7] = ((src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
984 int src11,src12,src13,src14,src15,src16,src17,src18; \
986 tmp[8] = ((src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
988 tmp[9] = ((src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
990 tmp[10] = ((src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
992 tmp[11] = ((src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
994 tmp[12] = ((src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
996 tmp[13] = ((src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
998 tmp[14] = ((src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
1000 tmp[15] = ((src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
1006 tmp -= tmpStride*(h+5-2);\
1009 int tmpB,tmpA,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6;\
1010 int16_t *s = tmp-2*tmpStride; \
1012 tmpB = *s; s+=tmpStride;\
1013 tmpA = *s; s+=tmpStride;\
1014 tmp0 = *s; s+=tmpStride;\
1015 tmp1 = *s; s+=tmpStride;\
1016 tmp2 = *s; s+=tmpStride;\
1017 tmp3 = *s; s+=tmpStride;\
1018 OP2(*d, (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));d+=dstStride;\
1019 tmp4 = *s; s+=tmpStride;\
1020 OP2(*d, (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));d+=dstStride;\
1021 tmp5 = *s; s+=tmpStride;\
1022 OP2(*d, (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));d+=dstStride;\
1023 tmp6 = *s; s+=tmpStride;\
1024 OP2(*d, (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));d+=dstStride;\
1026 int tmp7,tmp8,tmp9,tmp10; \
1027 tmp7 = *s; s+=tmpStride;\
1028 OP2(*d, (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));d+=dstStride;\
1029 tmp8 = *s; s+=tmpStride;\
1030 OP2(*d, (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));d+=dstStride;\
1031 tmp9 = *s; s+=tmpStride;\
1032 OP2(*d, (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));d+=dstStride;\
1033 tmp10 = *s; s+=tmpStride;\
1034 OP2(*d, (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));d+=dstStride;\
1036 int tmp11,tmp12,tmp13,tmp14,tmp15,tmp16,tmp17,tmp18; \
1037 tmp11 = *s; s+=tmpStride;\
1038 OP2(*d , (tmp8 +tmp9 )*20 - (tmp7 +tmp10)*5 + (tmp6 +tmp11));d+=dstStride;\
1039 tmp12 = *s; s+=tmpStride;\
1040 OP2(*d , (tmp9 +tmp10)*20 - (tmp8 +tmp11)*5 + (tmp7 +tmp12));d+=dstStride;\
1041 tmp13 = *s; s+=tmpStride;\
1042 OP2(*d, (tmp10+tmp11)*20 - (tmp9 +tmp12)*5 + (tmp8 +tmp13));d+=dstStride;\
1043 tmp14 = *s; s+=tmpStride;\
1044 OP2(*d, (tmp11+tmp12)*20 - (tmp10+tmp13)*5 + (tmp9 +tmp14));d+=dstStride;\
1045 tmp15 = *s; s+=tmpStride;\
1046 OP2(*d, (tmp12+tmp13)*20 - (tmp11+tmp14)*5 + (tmp10+tmp15));d+=dstStride;\
1047 tmp16 = *s; s+=tmpStride;\
1048 OP2(*d, (tmp13+tmp14)*20 - (tmp12+tmp15)*5 + (tmp11+tmp16));d+=dstStride;\
1049 tmp17 = *s; s+=tmpStride;\
1050 OP2(*d, (tmp14+tmp15)*20 - (tmp13+tmp16)*5 + (tmp12+tmp17));d+=dstStride;\
1051 tmp18 = *s; s+=tmpStride;\
1052 OP2(*d, (tmp15+tmp16)*20 - (tmp14+tmp17)*5 + (tmp13+tmp18));d+=dstStride;\
1060 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1061 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,4,4); \
1063 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1064 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,8,8); \
1066 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1067 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,16,16); \
1070 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1071 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,4,4); \
1073 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1074 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,8,8); \
1076 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1077 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,16,16); \
1079 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1080 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,4,4); \
1082 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1083 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,8,8); \
1085 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1086 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,16,16); \
1089 #define H264_MC(OPNAME, SIZE) \
1090 static void OPNAME ## h264_qpel ## SIZE ## _mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
1091 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
1094 static void OPNAME ## h264_qpel ## SIZE ## _mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
1095 uint8_t half[SIZE*SIZE];\
1096 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1097 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src, half, stride, stride, SIZE, SIZE);\
1100 static void OPNAME ## h264_qpel ## SIZE ## _mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
1101 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
1104 static void OPNAME ## h264_qpel ## SIZE ## _mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
1105 uint8_t half[SIZE*SIZE];\
1106 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1107 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src+1, half, stride, stride, SIZE, SIZE);\
1110 static void OPNAME ## h264_qpel ## SIZE ## _mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
1111 uint8_t full[SIZE*(SIZE+5)];\
1112 uint8_t * const full_mid= full + SIZE*2;\
1113 uint8_t half[SIZE*SIZE];\
1114 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1115 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1116 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
1119 static void OPNAME ## h264_qpel ## SIZE ## _mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
1120 uint8_t full[SIZE*(SIZE+5)];\
1121 uint8_t * const full_mid= full + SIZE*2;\
1122 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1123 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
1126 static void OPNAME ## h264_qpel ## SIZE ## _mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
1127 uint8_t full[SIZE*(SIZE+5)];\
1128 uint8_t * const full_mid= full + SIZE*2;\
1129 uint8_t half[SIZE*SIZE];\
1130 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1131 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1132 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
1135 static void OPNAME ## h264_qpel ## SIZE ## _mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
1136 uint8_t full[SIZE*(SIZE+5)];\
1137 uint8_t * const full_mid= full + SIZE*2;\
1138 uint8_t halfH[SIZE*SIZE];\
1139 uint8_t halfV[SIZE*SIZE];\
1140 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1141 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1142 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1143 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1146 static void OPNAME ## h264_qpel ## SIZE ## _mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
1147 uint8_t full[SIZE*(SIZE+5)];\
1148 uint8_t * const full_mid= full + SIZE*2;\
1149 uint8_t halfH[SIZE*SIZE];\
1150 uint8_t halfV[SIZE*SIZE];\
1151 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1152 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1153 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1154 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1157 static void OPNAME ## h264_qpel ## SIZE ## _mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
1158 uint8_t full[SIZE*(SIZE+5)];\
1159 uint8_t * const full_mid= full + SIZE*2;\
1160 uint8_t halfH[SIZE*SIZE];\
1161 uint8_t halfV[SIZE*SIZE];\
1162 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1163 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1164 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1165 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1168 static void OPNAME ## h264_qpel ## SIZE ## _mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
1169 uint8_t full[SIZE*(SIZE+5)];\
1170 uint8_t * const full_mid= full + SIZE*2;\
1171 uint8_t halfH[SIZE*SIZE];\
1172 uint8_t halfV[SIZE*SIZE];\
1173 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1174 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1175 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1176 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1179 static void OPNAME ## h264_qpel ## SIZE ## _mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
1180 int16_t tmp[SIZE*(SIZE+5)];\
1181 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
1184 static void OPNAME ## h264_qpel ## SIZE ## _mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
1185 int16_t tmp[SIZE*(SIZE+5)];\
1186 uint8_t halfH[SIZE*SIZE];\
1187 uint8_t halfHV[SIZE*SIZE];\
1188 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1189 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1190 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1193 static void OPNAME ## h264_qpel ## SIZE ## _mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
1194 int16_t tmp[SIZE*(SIZE+5)];\
1195 uint8_t halfH[SIZE*SIZE];\
1196 uint8_t halfHV[SIZE*SIZE];\
1197 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1198 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1199 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1202 static void OPNAME ## h264_qpel ## SIZE ## _mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
1203 uint8_t full[SIZE*(SIZE+5)];\
1204 uint8_t * const full_mid= full + SIZE*2;\
1205 int16_t tmp[SIZE*(SIZE+5)];\
1206 uint8_t halfV[SIZE*SIZE];\
1207 uint8_t halfHV[SIZE*SIZE];\
1208 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1209 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1210 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1211 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
1214 static void OPNAME ## h264_qpel ## SIZE ## _mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
1215 uint8_t full[SIZE*(SIZE+5)];\
1216 uint8_t * const full_mid= full + SIZE*2;\
1217 int16_t tmp[SIZE*(SIZE+5)];\
1218 uint8_t halfV[SIZE*SIZE];\
1219 uint8_t halfHV[SIZE*SIZE];\
1220 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1221 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1222 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1223 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
1226 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1228 #define op_put(a, b) a = cm[((b) + 16)>>5]
1229 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
1230 #define op2_put(a, b) a = cm[((b) + 512)>>10]
1250 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
1256 dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1258 dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1260 dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1262 dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1264 dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1266 dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1268 dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1270 dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1280 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
1282 src_1 = *(s-srcStride);
1283 src0 = *s; s+=srcStride;
1284 src1 = *s; s+=srcStride;
1285 src2 = *s; s+=srcStride;
1286 *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride;
1287 src3 = *s; s+=srcStride;
1288 *d= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; d+=dstStride;
1289 src4 = *s; s+=srcStride;
1290 *d= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; d+=dstStride;
1291 src5 = *s; s+=srcStride;
1292 *d= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; d+=dstStride;
1293 src6 = *s; s+=srcStride;
1294 *d= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; d+=dstStride;
1295 src7 = *s; s+=srcStride;
1296 *d= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; d+=dstStride;
1297 src8 = *s; s+=srcStride;
1298 *d= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; d+=dstStride;
1300 *d= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; d+=dstStride;
1313 put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);
1323 put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);
1337 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
1346 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);