28 #define PIXOP2(OPNAME, OP) \
30 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
33 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
40 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
43 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
50 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
53 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
54 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
55 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \
56 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \
63 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
66 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
67 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
68 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \
69 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \
76 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
79 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
80 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
87 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
90 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
91 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
98 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
101 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
102 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
109 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
112 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
113 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
120 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
123 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
124 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
125 OP(LP(dst+8),no_rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \
126 OP(LP(dst+12),no_rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \
133 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
136 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
137 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
138 OP(LP(dst+8),rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \
139 OP(LP(dst+12),rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \
146 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
147 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
149 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
150 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
152 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
153 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
155 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
156 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
158 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
160 uint32_t a0,a1,a2,a3; \
161 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
162 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
163 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
164 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
165 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
166 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
175 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
177 uint32_t a0,a1,a2,a3; \
178 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
179 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
180 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
181 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
182 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
183 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
192 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
194 uint32_t a0,a1,a2,a3; \
195 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
196 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
197 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
198 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
199 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
200 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
209 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
211 uint32_t a0,a1,a2,a3; \
212 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
213 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
214 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
215 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
216 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
217 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
226 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
228 uint32_t a0,a1,a2,a3; \
229 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
230 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
231 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
232 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
233 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
234 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
235 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \
236 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
237 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
238 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \
239 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
240 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
249 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
251 uint32_t a0,a1,a2,a3; \
252 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
253 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
254 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
255 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
256 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
257 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
258 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \
259 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
260 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
261 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \
262 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
263 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
272 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
274 uint32_t a0,a1,a2,a3; \
275 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
276 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
277 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
278 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
279 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
280 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
281 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \
282 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
283 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
284 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \
285 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
286 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
295 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
297 uint32_t a0,a1,a2,a3; \
298 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
299 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
300 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
301 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
302 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
303 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
304 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \
305 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
306 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
307 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \
308 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
309 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
319 #define op_avg(a, b) a = rnd_avg32(a,b)
320 #define op_put(a, b) a = b
327 #define avg2(a,b) ((a+b+1)>>1)
328 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
333 const int A=(16-x16)*(16-y16);
334 const int B=( x16)*(16-y16);
335 const int C=(16-x16)*( y16);
336 const int D=( x16)*( y16);
342 t0 = *s0++; t2 = *s1++;
343 t1 = *s0++; t3 = *s1++;
344 dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 +
rounder)>>8;
345 t0 = *s0++; t2 = *s1++;
346 dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 +
rounder)>>8;
347 t1 = *s0++; t3 = *s1++;
348 dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 +
rounder)>>8;
349 t0 = *s0++; t2 = *s1++;
350 dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 +
rounder)>>8;
351 t1 = *s0++; t3 = *s1++;
352 dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 +
rounder)>>8;
353 t0 = *s0++; t2 = *s1++;
354 dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 +
rounder)>>8;
355 t1 = *s0++; t3 = *s1++;
356 dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 +
rounder)>>8;
357 t0 = *s0++; t2 = *s1++;
358 dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 +
rounder)>>8;
364 #define QPEL_MC(r, OPNAME, RND, OP) \
365 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
366 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
369 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
375 OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
377 OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
379 OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
381 OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
383 OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
384 OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
385 OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
386 OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
392 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
393 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
396 uint8_t *s = src, *d=dst;\
397 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
398 src0 = *s; s+=srcStride; \
399 src1 = *s; s+=srcStride; \
400 src2 = *s; s+=srcStride; \
401 src3 = *s; s+=srcStride; \
402 src4 = *s; s+=srcStride; \
403 OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\
404 src5 = *s; s+=srcStride; \
405 OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\
406 src6 = *s; s+=srcStride; \
407 OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\
408 src7 = *s; s+=srcStride; \
409 OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\
411 OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\
412 OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\
413 OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\
414 OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
420 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
421 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
424 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
425 int src9,src10,src11,src12,src13,src14,src15,src16;\
431 OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
433 OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
435 OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
437 OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
439 OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
441 OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
443 OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
445 OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
447 OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
449 OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
451 OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
453 OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
455 OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
456 OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
457 OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
458 OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
464 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
465 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
468 uint8_t *s = src, *d=dst;\
469 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
470 int src9,src10,src11,src12,src13,src14,src15,src16;\
471 src0 = *s; s+=srcStride; \
472 src1 = *s; s+=srcStride; \
473 src2 = *s; s+=srcStride; \
474 src3 = *s; s+=srcStride; \
475 src4 = *s; s+=srcStride; \
476 OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\
477 src5 = *s; s+=srcStride; \
478 OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\
479 src6 = *s; s+=srcStride; \
480 OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\
481 src7 = *s; s+=srcStride; \
482 OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\
483 src8 = *s; s+=srcStride; \
484 OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\
485 src9 = *s; s+=srcStride; \
486 OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\
487 src10 = *s; s+=srcStride; \
488 OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\
489 src11 = *s; s+=srcStride; \
490 OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\
491 src12 = *s; s+=srcStride; \
492 OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\
493 src13 = *s; s+=srcStride; \
494 OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\
495 src14 = *s; s+=srcStride; \
496 OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\
497 src15 = *s; s+=srcStride; \
498 OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\
500 OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\
501 OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\
502 OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\
503 OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
509 static void OPNAME ## qpel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
510 OPNAME ## pixels8_c(dst, src, stride, 8);\
513 static void OPNAME ## qpel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
515 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
516 OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\
519 static void OPNAME ## qpel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
520 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
523 static void OPNAME ## qpel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
525 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
526 OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\
529 static void OPNAME ## qpel8_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
532 copy_block9(full, src, 16, stride, 9);\
533 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
534 OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\
537 static void OPNAME ## qpel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
539 copy_block9(full, src, 16, stride, 9);\
540 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
543 static void OPNAME ## qpel8_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
546 copy_block9(full, src, 16, stride, 9);\
547 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
548 OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\
550 static void OPNAME ## qpel8_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
554 copy_block9(full, src, 16, stride, 9);\
555 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
556 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
557 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
558 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
560 static void OPNAME ## qpel8_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
564 copy_block9(full, src, 16, stride, 9);\
565 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
566 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
567 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
568 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
570 static void OPNAME ## qpel8_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
574 copy_block9(full, src, 16, stride, 9);\
575 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
576 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
577 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
578 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
580 static void OPNAME ## qpel8_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
584 copy_block9(full, src, 16, stride, 9);\
585 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
586 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
587 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
588 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
590 static void OPNAME ## qpel8_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
593 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
594 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
595 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
597 static void OPNAME ## qpel8_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
600 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
601 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
602 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
604 static void OPNAME ## qpel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
607 copy_block9(full, src, 16, stride, 9);\
608 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
609 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
610 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
612 static void OPNAME ## qpel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
615 copy_block9(full, src, 16, stride, 9);\
616 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
617 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
618 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
620 static void OPNAME ## qpel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
622 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
623 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
625 static void OPNAME ## qpel16_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
626 OPNAME ## pixels16_c(dst, src, stride, 16);\
629 static void OPNAME ## qpel16_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
631 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
632 OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\
635 static void OPNAME ## qpel16_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
636 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
639 static void OPNAME ## qpel16_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
641 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
642 OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\
645 static void OPNAME ## qpel16_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
646 uint8_t full[24*17];\
648 copy_block17(full, src, 24, stride, 17);\
649 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
650 OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\
653 static void OPNAME ## qpel16_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
654 uint8_t full[24*17];\
655 copy_block17(full, src, 24, stride, 17);\
656 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
659 static void OPNAME ## qpel16_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
660 uint8_t full[24*17];\
662 copy_block17(full, src, 24, stride, 17);\
663 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
664 OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\
666 static void OPNAME ## qpel16_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
667 uint8_t full[24*17];\
669 uint8_t halfHV[256];\
670 copy_block17(full, src, 24, stride, 17);\
671 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
672 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
673 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
674 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
676 static void OPNAME ## qpel16_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
677 uint8_t full[24*17];\
679 uint8_t halfHV[256];\
680 copy_block17(full, src, 24, stride, 17);\
681 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
682 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
683 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
684 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
686 static void OPNAME ## qpel16_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
687 uint8_t full[24*17];\
689 uint8_t halfHV[256];\
690 copy_block17(full, src, 24, stride, 17);\
691 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
692 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
693 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
694 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
696 static void OPNAME ## qpel16_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
697 uint8_t full[24*17];\
699 uint8_t halfHV[256];\
700 copy_block17(full, src, 24, stride, 17);\
701 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
702 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
703 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
704 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
706 static void OPNAME ## qpel16_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
708 uint8_t halfHV[256];\
709 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
710 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
711 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
713 static void OPNAME ## qpel16_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
715 uint8_t halfHV[256];\
716 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
717 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
718 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
720 static void OPNAME ## qpel16_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
721 uint8_t full[24*17];\
723 copy_block17(full, src, 24, stride, 17);\
724 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
725 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
726 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
728 static void OPNAME ## qpel16_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
729 uint8_t full[24*17];\
731 copy_block17(full, src, 24, stride, 17);\
732 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
733 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
734 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
736 static void OPNAME ## qpel16_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
738 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
739 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
742 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
743 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
744 #define op_put(a, b) a = cm[((b) + 16)>>5]
745 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
749 QPEL_MC(0, avg_ , _ ,
op_avg)
760 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
766 dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
768 dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
770 dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
772 dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
774 dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
776 dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
778 dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
780 dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
790 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
792 src_1 = *(s-srcStride);
793 src0 = *s; s+=srcStride;
794 src1 = *s; s+=srcStride;
795 src2 = *s; s+=srcStride;
796 *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride;
797 src3 = *s; s+=srcStride;
798 *d= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; d+=dstStride;
799 src4 = *s; s+=srcStride;
800 *d= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; d+=dstStride;
801 src5 = *s; s+=srcStride;
802 *d= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; d+=dstStride;
803 src6 = *s; s+=srcStride;
804 *d= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; d+=dstStride;
805 src7 = *s; s+=srcStride;
806 *d= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; d+=dstStride;
807 src8 = *s; s+=srcStride;
808 *d= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; d+=dstStride;
810 *d= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; d+=dstStride;
823 put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);
833 put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);
847 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
856 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);