30 ptrdiff_t line_size,
int h)
37 MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
38 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
39 MMI_ULWC1(%[ftmp1], %[pixels], 0x00)
40 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
44 MMI_SWC1(%[ftmp0], %[
block], 0x00)
45 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
46 MMI_SWC1(%[ftmp1], %[
block], 0x00)
47 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
50 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
51 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
55 : [line_size]
"r"((
mips_reg)line_size)
61 ptrdiff_t line_size,
int h)
68 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
69 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
70 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
71 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
72 MMI_ULDC1(%[ftmp2], %[pixels], 0x00)
73 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
74 MMI_ULDC1(%[ftmp3], %[pixels], 0x00)
75 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
79 MMI_SDC1(%[ftmp0], %[
block], 0x00)
80 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
81 MMI_SDC1(%[ftmp1], %[
block], 0x00)
82 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
83 MMI_SDC1(%[ftmp2], %[
block], 0x00)
84 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
85 MMI_SDC1(%[ftmp3], %[
block], 0x00)
86 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
89 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
90 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
94 : [line_size]
"r"((
mips_reg)line_size)
100 ptrdiff_t line_size,
int h)
107 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
108 MMI_ULDC1(%[ftmp2], %[pixels], 0x08)
109 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
110 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
111 MMI_ULDC1(%[ftmp3], %[pixels], 0x08)
112 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
113 MMI_ULDC1(%[ftmp4], %[pixels], 0x00)
114 MMI_ULDC1(%[ftmp6], %[pixels], 0x08)
115 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
116 MMI_ULDC1(%[ftmp5], %[pixels], 0x00)
117 MMI_ULDC1(%[ftmp7], %[pixels], 0x08)
118 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
122 MMI_SDC1(%[ftmp0], %[
block], 0x00)
123 MMI_SDC1(%[ftmp2], %[
block], 0x08)
124 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
125 MMI_SDC1(%[ftmp1], %[
block], 0x00)
126 MMI_SDC1(%[ftmp3], %[
block], 0x08)
127 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
128 MMI_SDC1(%[ftmp4], %[
block], 0x00)
129 MMI_SDC1(%[ftmp6], %[
block], 0x08)
130 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
131 MMI_SDC1(%[ftmp5], %[
block], 0x00)
132 MMI_SDC1(%[ftmp7], %[
block], 0x08)
133 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
136 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
137 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
138 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
139 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
143 : [line_size]
"r"((
mips_reg)line_size)
149 ptrdiff_t line_size,
int h)
157 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
158 MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
159 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
160 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
161 MMI_ULWC1(%[ftmp2], %[
block], 0x00)
162 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
166 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
167 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
168 MMI_SWC1(%[ftmp0], %[
block], 0x00)
169 MMI_SWC1(%[ftmp1], %[addr1], 0x00)
170 PTR_ADDU "%[pixels], %[addr0], %[line_size] \n\t"
171 PTR_ADDU "%[block], %[addr1], %[line_size] \n\t"
174 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
175 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
177 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
180 : [line_size]
"r"((
mips_reg)line_size)
186 ptrdiff_t line_size,
int h)
194 PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
196 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
197 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
198 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
199 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
200 MMI_ULDC1(%[ftmp2], %[
block], 0x00)
201 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
202 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
203 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
204 MMI_SDC1(%[ftmp0], %[
block], 0x00)
205 MMI_SDXC1(%[ftmp1], %[
block], %[line_size], 0x00)
206 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
207 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
209 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
210 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
211 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
212 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
213 MMI_ULDC1(%[ftmp2], %[
block], 0x00)
214 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
215 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
216 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
217 MMI_SDC1(%[ftmp0], %[
block], 0x00)
218 MMI_SDXC1(%[ftmp1], %[
block], %[line_size], 0x00)
219 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
220 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
224 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
225 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
228 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
229 [addr2]
"=&r"(addr[2]),
232 : [line_size]
"r"((
mips_reg)line_size)
238 ptrdiff_t line_size,
int h)
247 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
248 MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
249 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
250 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
251 MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
252 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
253 MMI_ULDC1(%[ftmp2], %[
block], 0x00)
254 MMI_ULDC1(%[ftmp6], %[
block], 0x08)
255 PTR_ADDU "%[addr0], %[block], %[line_size] \n\t"
256 MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
257 MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
258 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
259 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
260 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
261 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
262 MMI_SDC1(%[ftmp0], %[
block], 0x00)
263 MMI_SDC1(%[ftmp4], %[
block], 0x08)
264 MMI_SDC1(%[ftmp1], %[addr0], 0x00)
265 MMI_SDC1(%[ftmp5], %[addr0], 0x08)
266 PTR_ADDU "%[block], %[addr0], %[line_size] \n\t"
268 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
269 MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
270 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
271 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
272 MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
273 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
274 MMI_ULDC1(%[ftmp2], %[
block], 0x00)
275 MMI_ULDC1(%[ftmp6], %[
block], 0x08)
276 PTR_ADDU "%[addr0], %[block], %[line_size] \n\t"
277 MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
278 MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
279 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
280 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
281 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
282 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
283 MMI_SDC1(%[ftmp0], %[
block], 0x00)
284 MMI_SDC1(%[ftmp4], %[
block], 0x08)
285 MMI_SDC1(%[ftmp1], %[addr0], 0x00)
286 MMI_SDC1(%[ftmp5], %[addr0], 0x08)
287 PTR_ADDU "%[block], %[addr0], %[line_size] \n\t"
290 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
291 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
292 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
293 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
295 [addr0]
"=&r"(addr[0]),
298 : [line_size]
"r"((
mips_reg)line_size)
304 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
314 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
315 MMI_ULWC1(%[ftmp0], %[
src1], 0x00)
316 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
317 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
318 MMI_ULWC1(%[ftmp2], %[src2], 0x00)
319 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
320 PTR_ADDU "%[src1], %[addr0], %[src_stride1] \n\t"
321 PTR_ADDU "%[src2], %[addr1], %[src_stride2] \n\t"
325 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
326 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
327 MMI_SWC1(%[ftmp0], %[dst], 0x00)
328 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
329 MMI_SWC1(%[ftmp1], %[dst], 0x00)
330 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
333 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
334 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
337 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
339 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
340 : [dst_stride]
"r"((
mips_reg)dst_stride),
341 [src_stride1]
"r"((
mips_reg)src_stride1),
342 [src_stride2]
"r"((
mips_reg)src_stride2)
348 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
357 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
358 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
359 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
362 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
363 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
364 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
365 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
366 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
367 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
368 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
369 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
370 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
371 MMI_SDC1(%[ftmp0], %[dst], 0x00)
372 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
373 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
374 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
376 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
377 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
378 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
379 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
380 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
381 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
382 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
383 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
384 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
385 MMI_SDC1(%[ftmp0], %[dst], 0x00)
386 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
387 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
388 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
392 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
393 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
396 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
397 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
398 [addr4]
"=&r"(addr[4]),
400 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
401 : [dst_stride]
"r"((
mips_reg)dst_stride),
402 [src_stride1]
"r"((
mips_reg)src_stride1),
403 [src_stride2]
"r"((
mips_reg)src_stride2)
409 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
418 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
419 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
420 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
423 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
424 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
425 MMI_ULDC1(%[ftmp4], %[
src1], 0x08)
426 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
427 MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
428 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
429 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
430 MMI_ULDC1(%[ftmp6], %[src2], 0x08)
431 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
432 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
433 MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
434 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
435 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
436 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
437 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
438 MMI_SDC1(%[ftmp0], %[dst], 0x00)
439 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
440 MMI_SDC1(%[ftmp4], %[dst], 0x08)
441 MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08)
442 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
443 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
445 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
446 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
447 MMI_ULDC1(%[ftmp4], %[
src1], 0x08)
448 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
449 MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
450 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
451 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
452 MMI_ULDC1(%[ftmp6], %[src2], 0x08)
453 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
454 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
455 MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
456 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
457 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
458 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
459 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
460 MMI_SDC1(%[ftmp0], %[dst], 0x00)
461 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
462 MMI_SDC1(%[ftmp4], %[dst], 0x08)
463 MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08)
464 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
465 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
469 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
470 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
471 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
472 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
475 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
476 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
477 [addr4]
"=&r"(addr[4]),
479 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
480 : [dst_stride]
"r"((
mips_reg)dst_stride),
481 [src_stride1]
"r"((
mips_reg)src_stride1),
482 [src_stride2]
"r"((
mips_reg)src_stride2)
488 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
497 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
498 MMI_ULWC1(%[ftmp0], %[
src1], 0x00)
499 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
500 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
501 MMI_ULWC1(%[ftmp2], %[src2], 0x00)
502 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
503 PTR_ADDU "%[src1], %[addr0], %[src_stride1] \n\t"
504 PTR_ADDU "%[src2], %[addr1], %[src_stride2] \n\t"
505 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
506 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
507 PTR_ADDU "%[addr2], %[dst], %[dst_stride] \n\t"
508 MMI_ULWC1(%[ftmp4], %[dst], 0x00)
509 MMI_ULWC1(%[ftmp5], %[addr2], 0x00)
511 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
512 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
513 MMI_SWC1(%[ftmp0], %[dst], 0x00)
514 MMI_SWC1(%[ftmp1], %[addr2], 0x00)
515 PTR_ADDU "%[dst], %[addr2], %[dst_stride] \n\t"
518 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
519 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
520 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
522 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
523 [addr2]
"=&r"(addr[2]),
525 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
526 : [dst_stride]
"r"((
mips_reg)dst_stride),
527 [src_stride1]
"r"((
mips_reg)src_stride1),
528 [src_stride2]
"r"((
mips_reg)src_stride2)
534 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
543 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
544 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
545 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
548 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
549 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
550 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
551 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
552 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
553 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
554 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
555 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
556 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
557 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
558 MMI_ULDC1(%[ftmp4], %[dst], 0x00)
559 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
560 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
561 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
562 MMI_SDC1(%[ftmp0], %[dst], 0x00)
563 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
564 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
565 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
567 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
568 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
569 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
570 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
571 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
572 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
573 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
574 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
575 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
576 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
577 MMI_ULDC1(%[ftmp4], %[dst], 0x00)
578 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
579 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
580 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
581 MMI_SDC1(%[ftmp0], %[dst], 0x00)
582 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
583 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
584 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
588 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
589 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
590 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
593 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
594 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
595 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
597 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
598 : [dst_stride]
"r"((
mips_reg)dst_stride),
599 [src_stride1]
"r"((
mips_reg)src_stride1),
600 [src_stride2]
"r"((
mips_reg)src_stride2)
606 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
612 src_stride1, src_stride2,
h);
616 ptrdiff_t line_size,
int h)
623 ptrdiff_t line_size,
int h)
630 ptrdiff_t line_size,
int h)
637 ptrdiff_t line_size,
int h)
644 ptrdiff_t line_size,
int h)
651 ptrdiff_t line_size,
int h)
658 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
667 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
668 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
669 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
670 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
673 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
674 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
675 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
676 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
677 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
678 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
679 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
680 "pxor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
681 "pxor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
682 "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
683 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
684 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
685 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
686 "pxor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
687 "pxor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
688 MMI_SDC1(%[ftmp0], %[dst], 0x00)
689 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
690 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
691 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
693 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
694 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
695 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
696 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
697 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
698 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
699 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
700 "pxor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
701 "pxor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
702 "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
703 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
704 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
705 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
706 "pxor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
707 "pxor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
708 MMI_SDC1(%[ftmp0], %[dst], 0x00)
709 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
710 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
711 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
715 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
716 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
717 [ftmp4]
"=&f"(ftmp[4]),
720 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
721 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
722 [addr4]
"=&r"(addr[4]),
724 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
725 : [dst_stride]
"r"((
mips_reg)dst_stride),
726 [src_stride1]
"r"((
mips_reg)src_stride1),
727 [src_stride2]
"r"((
mips_reg)src_stride2)
733 ptrdiff_t line_size,
int h)
736 line_size, line_size,
h);
740 ptrdiff_t line_size,
int h)
747 ptrdiff_t line_size,
int h)
750 line_size, line_size,
h);
754 ptrdiff_t line_size,
int h)
757 line_size, line_size,
h);
761 ptrdiff_t line_size,
int h)
764 line_size, line_size,
h);
768 ptrdiff_t line_size,
int h)
771 line_size, line_size,
h);
775 ptrdiff_t line_size,
int h)
778 line_size, line_size,
h);
782 ptrdiff_t line_size,
int h)
789 ptrdiff_t line_size,
int h)
792 line_size, line_size, line_size,
h);
796 ptrdiff_t line_size,
int h)
803 ptrdiff_t line_size,
int h)
808 const uint32_t
b =
AV_RN32(pixels + 1);
809 uint32_t l0 = (
a & 0x03030303UL) +
812 uint32_t
h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
813 ((
b & 0xFCFCFCFCUL) >> 2);
817 for (
i = 0;
i <
h;
i += 2) {
820 l1 = (
a & 0x03030303UL) +
822 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
823 ((
b & 0xFCFCFCFCUL) >> 2);
824 *((uint32_t *)
block) =
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
829 l0 = (
a & 0x03030303UL) +
832 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
833 ((
b & 0xFCFCFCFCUL) >> 2);
834 *((uint32_t *)
block) =
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
841 ptrdiff_t line_size,
int h)
850 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
851 "dli %[addr0], 0x0f \n\t"
852 "pcmpeqw %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
853 "dmtc1 %[addr0], %[ftmp8] \n\t"
854 "dli %[addr0], 0x01 \n\t"
855 "psrlh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
856 "dmtc1 %[addr0], %[ftmp8] \n\t"
857 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
859 "dli %[addr0], 0x02 \n\t"
860 "dmtc1 %[addr0], %[ftmp9] \n\t"
861 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
862 MMI_ULDC1(%[ftmp4], %[pixels], 0x01)
863 "mov.d %[ftmp1], %[ftmp0] \n\t"
864 "mov.d %[ftmp5], %[ftmp4] \n\t"
865 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
866 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
867 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
868 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
869 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
870 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
871 "xor %[addr0], %[addr0], %[addr0] \n\t"
872 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
876 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
877 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
878 MMI_ULDC1(%[ftmp2], %[addr1], 0x01)
879 "mov.d %[ftmp1], %[ftmp0] \n\t"
880 "mov.d %[ftmp3], %[ftmp2] \n\t"
881 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
882 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
883 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
884 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
885 "paddush %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
886 "paddush %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
887 "paddush %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
888 "paddush %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
889 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
890 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
891 "psrlh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
892 "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
893 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
894 MMI_SDXC1(%[ftmp4], %[
block], %[addr0], 0x00)
895 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
896 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
897 MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
898 MMI_ULDC1(%[ftmp4], %[addr1], 0x01)
899 "mov.d %[ftmp3], %[ftmp2] \n\t"
900 "mov.d %[ftmp5], %[ftmp4] \n\t"
901 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
902 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
903 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
904 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
905 "paddush %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
906 "paddush %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
907 "paddush %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
908 "paddush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
909 "paddush %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
910 "paddush %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
911 "psrlh %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
912 "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
913 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
914 MMI_SDXC1(%[ftmp0], %[
block], %[addr0], 0x00)
915 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
918 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
919 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
920 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
921 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
922 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
925 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
926 [
h]
"+&r"(
h), [pixels]
"+&r"(pixels)
934 for (j = 0; j < 2; j++) {
937 const uint32_t
b =
AV_RN32(pixels + 1);
938 uint32_t l0 = (
a & 0x03030303UL) +
941 uint32_t
h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
942 ((
b & 0xFCFCFCFCUL) >> 2);
946 for (
i = 0;
i <
h;
i += 2) {
949 l1 = (
a & 0x03030303UL) +
951 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
952 ((
b & 0xFCFCFCFCUL) >> 2);
953 *((uint32_t *)
block) =
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
958 l0 = (
a & 0x03030303UL) +
961 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
962 ((
b & 0xFCFCFCFCUL) >> 2);
963 *((uint32_t *)
block) =
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
967 pixels += 4 - line_size * (
h + 1);
968 block += 4 - line_size *
h;
974 ptrdiff_t line_size,
int h)
981 ptrdiff_t line_size,
int h)
986 const uint32_t
b =
AV_RN32(pixels + 1);
987 uint32_t l0 = (
a & 0x03030303UL) +
990 uint32_t
h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
991 ((
b & 0xFCFCFCFCUL) >> 2);
995 for (
i = 0;
i <
h;
i += 2) {
998 l1 = (
a & 0x03030303UL) +
1000 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
1001 ((
b & 0xFCFCFCFCUL) >> 2);
1002 *((uint32_t *)
block) =
rnd_avg32(*((uint32_t *)
block),
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1003 pixels += line_size;
1007 l0 = (
a & 0x03030303UL) +
1008 (
b & 0x03030303UL) +
1010 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1011 ((
b & 0xFCFCFCFCUL) >> 2);
1012 *((uint32_t *)
block) =
rnd_avg32(*((uint32_t *)
block),
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1013 pixels += line_size;
1019 ptrdiff_t line_size,
int h)
1024 for (j = 0; j < 2; j++) {
1026 const uint32_t
a =
AV_RN32(pixels);
1027 const uint32_t
b =
AV_RN32(pixels + 1);
1028 uint32_t l0 = (
a & 0x03030303UL) +
1029 (
b & 0x03030303UL) +
1031 uint32_t
h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1032 ((
b & 0xFCFCFCFCUL) >> 2);
1035 pixels += line_size;
1036 for (
i = 0;
i <
h;
i += 2) {
1039 l1 = (
a & 0x03030303UL) +
1041 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
1042 ((
b & 0xFCFCFCFCUL) >> 2);
1043 *((uint32_t *)
block) =
rnd_avg32(*((uint32_t *)
block),
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1044 pixels += line_size;
1048 l0 = (
a & 0x03030303UL) +
1049 (
b & 0x03030303UL) +
1051 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1052 ((
b & 0xFCFCFCFCUL) >> 2);
1053 *((uint32_t *)
block) =
rnd_avg32(*((uint32_t *)
block),
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1054 pixels += line_size;
1057 pixels += 4 - line_size * (
h + 1);
1058 block += 4 - line_size *
h;
1063 ptrdiff_t line_size,
int h)
1070 ptrdiff_t line_size,
int h)
1075 for (j = 0; j < 2; j++) {
1077 const uint32_t
a =
AV_RN32(pixels);
1078 const uint32_t
b =
AV_RN32(pixels + 1);
1079 uint32_t l0 = (
a & 0x03030303UL) +
1080 (
b & 0x03030303UL) +
1082 uint32_t
h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1083 ((
b & 0xFCFCFCFCUL) >> 2);
1086 pixels += line_size;
1087 for (
i = 0;
i <
h;
i += 2) {
1090 l1 = (
a & 0x03030303UL) +
1092 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
1093 ((
b & 0xFCFCFCFCUL) >> 2);
1094 *((uint32_t *)
block) =
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1095 pixels += line_size;
1099 l0 = (
a & 0x03030303UL) +
1100 (
b & 0x03030303UL) +
1102 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1103 ((
b & 0xFCFCFCFCUL) >> 2);
1104 *((uint32_t *)
block) =
h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1105 pixels += line_size;
1108 pixels += 4 - line_size * (
h + 1);
1109 block += 4 - line_size *
h;
1114 ptrdiff_t line_size,
int h)