33 #define VC1_INV_TRANCS_8_TYPE1(o1, o2, r1, r2, r3, r4, c0) \
34 "li %[tmp0], "#r1" \n\t" \
35 "mtc1 %[tmp0], %[ftmp13] \n\t" \
36 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \
37 "li %[tmp0], "#r2" \n\t" \
38 "mtc1 %[tmp0], %[ftmp14] \n\t" \
39 "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \
40 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp13] \n\t" \
41 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp14] \n\t" \
42 "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
43 "pmaddhw %[ftmp2], %[ftmp6], %[ftmp13] \n\t" \
44 "pmaddhw %[ftmp3], %[ftmp8], %[ftmp14] \n\t" \
45 "paddw %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \
47 "li %[tmp0], "#r3" \n\t" \
48 "mtc1 %[tmp0], %[ftmp13] \n\t" \
49 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \
50 "li %[tmp0], "#r4" \n\t" \
51 "mtc1 %[tmp0], %[ftmp14] \n\t" \
52 "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \
53 "pmaddhw %[ftmp3], %[ftmp9], %[ftmp13] \n\t" \
54 "pmaddhw %[ftmp4], %[ftmp11], %[ftmp14] \n\t" \
55 "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
56 "pmaddhw %[ftmp4], %[ftmp10], %[ftmp13] \n\t" \
57 "pmaddhw %[ftmp13], %[ftmp12], %[ftmp14] \n\t" \
58 "paddw %[ftmp4], %[ftmp4], %[ftmp13] \n\t" \
60 "paddw %[ftmp1], %[ftmp1], "#c0" \n\t" \
61 "paddw %[ftmp2], %[ftmp2], "#c0" \n\t" \
62 "paddw %[ftmp13], %[ftmp1], %[ftmp3] \n\t" \
63 "psubw %[ftmp14], %[ftmp1], %[ftmp3] \n\t" \
64 "paddw %[ftmp1], %[ftmp2], %[ftmp4] \n\t" \
65 "psubw %[ftmp3], %[ftmp2], %[ftmp4] \n\t" \
66 "psraw %[ftmp13], %[ftmp13], %[ftmp0] \n\t" \
67 "psraw %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \
68 "psraw %[ftmp14], %[ftmp14], %[ftmp0] \n\t" \
69 "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
70 "punpcklhw %[ftmp2], %[ftmp13], %[ftmp1] \n\t" \
71 "punpckhhw %[ftmp4], %[ftmp13], %[ftmp1] \n\t" \
72 "punpcklhw "#o1", %[ftmp2], %[ftmp4] \n\t" \
73 "punpcklhw %[ftmp2], %[ftmp14], %[ftmp3] \n\t" \
74 "punpckhhw %[ftmp4], %[ftmp14], %[ftmp3] \n\t" \
75 "punpcklhw "#o2", %[ftmp2], %[ftmp4] \n\t"
77 #define VC1_INV_TRANCS_8_TYPE2(o1, o2, r1, r2, r3, r4, c0, c1) \
78 "li %[tmp0], "#r1" \n\t" \
79 "mtc1 %[tmp0], %[ftmp13] \n\t" \
80 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \
81 "li %[tmp0], "#r2" \n\t" \
82 "mtc1 %[tmp0], %[ftmp14] \n\t" \
83 "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \
84 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp13] \n\t" \
85 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp14] \n\t" \
86 "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
87 "pmaddhw %[ftmp2], %[ftmp6], %[ftmp13] \n\t" \
88 "pmaddhw %[ftmp3], %[ftmp8], %[ftmp14] \n\t" \
89 "paddw %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \
91 "li %[tmp0], "#r3" \n\t" \
92 "mtc1 %[tmp0], %[ftmp13] \n\t" \
93 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \
94 "li %[tmp0], "#r4" \n\t" \
95 "mtc1 %[tmp0], %[ftmp14] \n\t" \
96 "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \
97 "pmaddhw %[ftmp3], %[ftmp9], %[ftmp13] \n\t" \
98 "pmaddhw %[ftmp4], %[ftmp11], %[ftmp14] \n\t" \
99 "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
100 "pmaddhw %[ftmp4], %[ftmp10], %[ftmp13] \n\t" \
101 "pmaddhw %[ftmp13], %[ftmp12], %[ftmp14] \n\t" \
102 "paddw %[ftmp4], %[ftmp4], %[ftmp13] \n\t" \
104 "paddw %[ftmp13], %[ftmp1], %[ftmp3] \n\t" \
105 "psubw %[ftmp14], %[ftmp1], %[ftmp3] \n\t" \
106 "paddw %[ftmp14], %[ftmp14], "#c1" \n\t" \
107 "paddw %[ftmp1], %[ftmp2], %[ftmp4] \n\t" \
108 "psubw %[ftmp3], %[ftmp2], %[ftmp4] \n\t" \
109 "paddw %[ftmp3], %[ftmp3], "#c1" \n\t" \
110 "paddw %[ftmp13], %[ftmp13], "#c0" \n\t" \
111 "paddw %[ftmp14], %[ftmp14], "#c0" \n\t" \
112 "paddw %[ftmp1], %[ftmp1], "#c0" \n\t" \
113 "paddw %[ftmp3], %[ftmp3], "#c0" \n\t" \
114 "psraw %[ftmp13], %[ftmp13], %[ftmp0] \n\t" \
115 "psraw %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \
116 "psraw %[ftmp14], %[ftmp14], %[ftmp0] \n\t" \
117 "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
118 "punpcklhw %[ftmp2], %[ftmp13], %[ftmp1] \n\t" \
119 "punpckhhw %[ftmp4], %[ftmp13], %[ftmp1] \n\t" \
120 "punpcklhw "#o1", %[ftmp2], %[ftmp4] \n\t" \
121 "punpcklhw %[ftmp2], %[ftmp14], %[ftmp3] \n\t" \
122 "punpckhhw %[ftmp4], %[ftmp14], %[ftmp3] \n\t" \
123 "punpcklhw "#o2", %[ftmp2], %[ftmp4] \n\t"
133 dc = (3 *
dc + 1) >> 1;
134 dc = (3 *
dc + 16) >> 5;
137 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
138 "pshufh %[dc], %[dc], %[ftmp0] \n\t"
139 "li %[count], 0x02 \n\t"
142 MMI_LDC1(%[ftmp1], %[dest], 0x00)
143 PTR_ADDU "%[addr0], %[dest], %[linesize] \n\t"
144 MMI_LDC1(%[ftmp2], %[addr0], 0x00)
145 PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t"
146 MMI_LDC1(%[ftmp3], %[addr0], 0x00)
147 PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t"
148 MMI_LDC1(%[ftmp4], %[addr0], 0x00)
150 "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t"
151 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
152 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
153 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
154 "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
155 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
156 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
157 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
159 "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t"
160 "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t"
161 "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t"
162 "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t"
163 "paddsh %[ftmp5], %[ftmp5], %[dc] \n\t"
164 "paddsh %[ftmp6], %[ftmp6], %[dc] \n\t"
165 "paddsh %[ftmp7], %[ftmp7], %[dc] \n\t"
166 "paddsh %[ftmp8], %[ftmp8], %[dc] \n\t"
168 "packushb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
169 "packushb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
170 "packushb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
171 "packushb %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
173 MMI_SDC1(%[ftmp1], %[dest], 0x00)
174 PTR_ADDU "%[addr0], %[dest], %[linesize] \n\t"
175 MMI_SDC1(%[ftmp2], %[addr0], 0x00)
176 PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t"
177 MMI_SDC1(%[ftmp3], %[addr0], 0x00)
178 PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t"
179 MMI_SDC1(%[ftmp4], %[addr0], 0x00)
181 "addiu %[count], %[count], -0x01 \n\t"
182 PTR_ADDU "%[dest], %[addr0], %[linesize] \n\t"
183 "bnez %[count], 1b \n\t"
184 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
185 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
186 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
187 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
188 [ftmp8]
"=&f"(ftmp[8]),
189 [addr0]
"=&r"(addr[0]),
190 [count]
"=&r"(count), [dest]
"+&r"(dest)
191 : [linesize]
"r"((
mips_reg)linesize),
197 #if _MIPS_SIM != _ABIO32
201 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
202 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
203 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
209 "li %[tmp0], 0x03 \n\t"
210 "mtc1 %[tmp0], %[ftmp0] \n\t"
213 MMI_LDC1(%[ftmp1], %[
block], 0x00)
214 MMI_LDC1(%[ftmp11], %[
block], 0x10)
215 MMI_LDC1(%[ftmp2], %[
block], 0x20)
216 MMI_LDC1(%[ftmp12], %[
block], 0x30)
217 MMI_LDC1(%[ftmp3], %[
block], 0x40)
218 MMI_LDC1(%[ftmp13], %[
block], 0x50)
219 MMI_LDC1(%[ftmp4], %[
block], 0x60)
220 MMI_LDC1(%[ftmp14], %[
block], 0x70)
221 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
222 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
223 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
224 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t"
226 "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t"
227 "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t"
228 "punpcklhw %[ftmp11], %[ftmp13], %[ftmp14] \n\t"
229 "punpckhhw %[ftmp12], %[ftmp13], %[ftmp14] \n\t"
233 0x000f0010, 0x00040009, %[
ff_pw_4])
237 0xfffc000f, 0xfff7fff0, %[
ff_pw_4])
241 0xfff00009, 0x000f0004, %[
ff_pw_4])
245 0xfff70004, 0xfff0000f, %[
ff_pw_4])
247 TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
248 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
250 TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
251 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
253 MMI_SDC1(%[ftmp15], %[
temp], 0x00)
254 MMI_SDC1(%[ftmp19], %[
temp], 0x08)
255 MMI_SDC1(%[ftmp16], %[
temp], 0x10)
256 MMI_SDC1(%[ftmp20], %[
temp], 0x18)
257 MMI_SDC1(%[ftmp17], %[
temp], 0x20)
258 MMI_SDC1(%[ftmp21], %[
temp], 0x28)
259 MMI_SDC1(%[ftmp18], %[
temp], 0x30)
260 MMI_SDC1(%[ftmp22], %[
temp], 0x38)
263 MMI_LDC1(%[ftmp1], %[
block], 0x08)
264 MMI_LDC1(%[ftmp11], %[
block], 0x18)
265 MMI_LDC1(%[ftmp2], %[
block], 0x28)
266 MMI_LDC1(%[ftmp12], %[
block], 0x38)
267 MMI_LDC1(%[ftmp3], %[
block], 0x48)
268 MMI_LDC1(%[ftmp13], %[
block], 0x58)
269 MMI_LDC1(%[ftmp4], %[
block], 0x68)
270 MMI_LDC1(%[ftmp14], %[
block], 0x78)
271 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
272 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
273 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
274 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t"
276 "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t"
277 "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t"
278 "punpcklhw %[ftmp11], %[ftmp13], %[ftmp14] \n\t"
279 "punpckhhw %[ftmp12], %[ftmp13], %[ftmp14] \n\t"
283 0x000f0010, 0x00040009, %[
ff_pw_4])
287 0xfffc000f, 0xfff7fff0, %[
ff_pw_4])
291 0xfff00009, 0x000f0004, %[
ff_pw_4])
295 0xfff70004, 0xfff0000f, %[
ff_pw_4])
297 TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
298 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
300 TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
301 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
303 MMI_SDC1(%[ftmp19], %[
temp], 0x48)
304 MMI_SDC1(%[ftmp20], %[
temp], 0x58)
305 MMI_SDC1(%[ftmp21], %[
temp], 0x68)
306 MMI_SDC1(%[ftmp22], %[
temp], 0x78)
310 "li %[tmp0], 0x07 \n\t"
311 "mtc1 %[tmp0], %[ftmp0] \n\t"
314 MMI_LDC1(%[ftmp1], %[
temp], 0x00)
315 MMI_LDC1(%[ftmp11], %[
temp], 0x10)
316 MMI_LDC1(%[ftmp2], %[
temp], 0x20)
317 MMI_LDC1(%[ftmp12], %[
temp], 0x30)
318 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
319 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
320 "punpcklhw %[ftmp7], %[ftmp15], %[ftmp17] \n\t"
321 "punpckhhw %[ftmp8], %[ftmp15], %[ftmp17] \n\t"
323 "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t"
324 "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t"
325 "punpcklhw %[ftmp11], %[ftmp16], %[ftmp18] \n\t"
326 "punpckhhw %[ftmp12], %[ftmp16], %[ftmp18] \n\t"
344 MMI_SDC1(%[ftmp15], %[
block], 0x00)
345 MMI_SDC1(%[ftmp16], %[
block], 0x10)
346 MMI_SDC1(%[ftmp17], %[
block], 0x20)
347 MMI_SDC1(%[ftmp18], %[
block], 0x30)
348 MMI_SDC1(%[ftmp19], %[
block], 0x40)
349 MMI_SDC1(%[ftmp20], %[
block], 0x50)
350 MMI_SDC1(%[ftmp21], %[
block], 0x60)
351 MMI_SDC1(%[ftmp22], %[
block], 0x70)
354 MMI_LDC1(%[ftmp1], %[
temp], 0x08)
355 MMI_LDC1(%[ftmp11], %[
temp], 0x18)
356 MMI_LDC1(%[ftmp2], %[
temp], 0x28)
357 MMI_LDC1(%[ftmp12], %[
temp], 0x38)
358 MMI_LDC1(%[ftmp3], %[
temp], 0x48)
359 MMI_LDC1(%[ftmp13], %[
temp], 0x58)
360 MMI_LDC1(%[ftmp4], %[
temp], 0x68)
361 MMI_LDC1(%[ftmp14], %[
temp], 0x78)
362 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
363 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
364 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
365 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t"
367 "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t"
368 "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t"
369 "punpcklhw %[ftmp11], %[ftmp13], %[ftmp14] \n\t"
370 "punpckhhw %[ftmp12], %[ftmp13], %[ftmp14] \n\t"
388 MMI_SDC1(%[ftmp15], %[
block], 0x08)
389 MMI_SDC1(%[ftmp16], %[
block], 0x18)
390 MMI_SDC1(%[ftmp17], %[
block], 0x28)
391 MMI_SDC1(%[ftmp18], %[
block], 0x38)
392 MMI_SDC1(%[ftmp19], %[
block], 0x48)
393 MMI_SDC1(%[ftmp20], %[
block], 0x58)
394 MMI_SDC1(%[ftmp21], %[
block], 0x68)
395 MMI_SDC1(%[ftmp22], %[
block], 0x78)
397 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
398 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
399 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
400 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
401 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
402 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
403 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
404 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
405 [ftmp16]
"=&f"(ftmp[16]), [ftmp17]
"=&f"(ftmp[17]),
406 [ftmp18]
"=&f"(ftmp[18]), [ftmp19]
"=&f"(ftmp[19]),
407 [ftmp20]
"=&f"(ftmp[20]), [ftmp21]
"=&f"(ftmp[21]),
408 [ftmp22]
"=&f"(ftmp[22]),
424 dc = ( 3 *
dc + 1) >> 1;
425 dc = (17 *
dc + 64) >> 7;
428 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
429 "pshufh %[dc], %[dc], %[ftmp0] \n\t"
431 MMI_LDC1(%[ftmp1], %[dest0], 0x00)
432 MMI_LDC1(%[ftmp2], %[dest1], 0x00)
433 MMI_LDC1(%[ftmp3], %[dest2], 0x00)
434 MMI_LDC1(%[ftmp4], %[dest3], 0x00)
436 "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t"
437 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
438 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
439 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
440 "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
441 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
442 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
443 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
445 "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t"
446 "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t"
447 "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t"
448 "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t"
449 "paddsh %[ftmp5], %[ftmp5], %[dc] \n\t"
450 "paddsh %[ftmp6], %[ftmp6], %[dc] \n\t"
451 "paddsh %[ftmp7], %[ftmp7], %[dc] \n\t"
452 "paddsh %[ftmp8], %[ftmp8], %[dc] \n\t"
454 "packushb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
455 "packushb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
456 "packushb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
457 "packushb %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
459 MMI_SDC1(%[ftmp1], %[dest0], 0x00)
460 MMI_SDC1(%[ftmp2], %[dest1], 0x00)
461 MMI_SDC1(%[ftmp3], %[dest2], 0x00)
462 MMI_SDC1(%[ftmp4], %[dest3], 0x00)
463 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
464 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
465 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
466 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
467 [ftmp8]
"=&f"(ftmp[8])
468 : [dest0]
"r"(dest+0*linesize), [dest1]
"r"(dest+1*linesize),
469 [dest2]
"r"(dest+2*linesize), [dest3]
"r"(dest+3*linesize),
475 #if _MIPS_SIM != _ABIO32
479 int16_t *dst =
block;
483 DECLARE_ALIGNED(16,
const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
484 DECLARE_ALIGNED(16,
const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
485 int16_t
coeff[64] = {12, 16, 16, 15, 12, 9, 6, 4,
486 12, 15, 6, -4, -12, -16, -16, -9,
487 12, 9, -6, -16, -12, 4, 16, 15,
488 12, 4, -16, -9, 12, 15, -6, -16,
489 12, -4, -16, 9, 12, -15, -6, 16,
490 12, -9, -6, 16, -12, -4, 16, -15,
491 12, -15, 6, 4, -12, 16, -16, 9,
492 12, -16, 16, -15, 12, -9, 6, -4};
496 "li %[tmp0], 0x03 \n\t"
497 "mtc1 %[tmp0], %[ftmp0] \n\t"
500 MMI_LDC1(%[ftmp1], %[
src], 0x00)
501 MMI_LDC1(%[ftmp2], %[
src], 0x08)
504 MMI_LDC1(%[ftmp3], %[
coeff], 0x00)
505 MMI_LDC1(%[ftmp4], %[
coeff], 0x08)
506 MMI_LDC1(%[ftmp5], %[
coeff], 0x10)
507 MMI_LDC1(%[ftmp6], %[
coeff], 0x18)
508 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t"
509 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
510 "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
511 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t"
512 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t"
513 "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
514 "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t"
515 "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t"
516 "paddw %[ftmp11], %[ftmp7], %[ftmp8] \n\t"
517 "paddw %[ftmp11], %[ftmp11], %[ff_pw_4] \n\t"
520 MMI_LDC1(%[ftmp3], %[
coeff], 0x20)
521 MMI_LDC1(%[ftmp4], %[
coeff], 0x28)
522 MMI_LDC1(%[ftmp5], %[
coeff], 0x30)
523 MMI_LDC1(%[ftmp6], %[
coeff], 0x38)
524 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t"
525 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
526 "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
527 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t"
528 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t"
529 "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
530 "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t"
531 "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t"
532 "paddw %[ftmp12], %[ftmp7], %[ftmp8] \n\t"
533 "paddw %[ftmp12], %[ftmp12], %[ff_pw_4] \n\t"
536 MMI_LDC1(%[ftmp3], %[
coeff], 0x40)
537 MMI_LDC1(%[ftmp4], %[
coeff], 0x48)
538 MMI_LDC1(%[ftmp5], %[
coeff], 0x50)
539 MMI_LDC1(%[ftmp6], %[
coeff], 0x58)
540 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t"
541 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
542 "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
543 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t"
544 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t"
545 "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
546 "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t"
547 "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t"
548 "paddw %[ftmp13], %[ftmp7], %[ftmp8] \n\t"
549 "paddw %[ftmp13], %[ftmp13], %[ff_pw_4] \n\t"
552 MMI_LDC1(%[ftmp3], %[
coeff], 0x60)
553 MMI_LDC1(%[ftmp4], %[
coeff], 0x68)
554 MMI_LDC1(%[ftmp5], %[
coeff], 0x70)
555 MMI_LDC1(%[ftmp6], %[
coeff], 0x78)
556 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t"
557 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
558 "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
559 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t"
560 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t"
561 "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
562 "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t"
563 "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t"
564 "paddw %[ftmp14], %[ftmp7], %[ftmp8] \n\t"
565 "paddw %[ftmp14], %[ftmp14], %[ff_pw_4] \n\t"
568 "psraw %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
569 "psraw %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
570 "psraw %[ftmp13], %[ftmp13], %[ftmp0] \n\t"
571 "psraw %[ftmp14], %[ftmp14], %[ftmp0] \n\t"
572 "punpcklhw %[ftmp7], %[ftmp11], %[ftmp12] \n\t"
573 "punpckhhw %[ftmp8], %[ftmp11], %[ftmp12] \n\t"
574 "punpcklhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
575 "punpcklhw %[ftmp7], %[ftmp13], %[ftmp14] \n\t"
576 "punpckhhw %[ftmp8], %[ftmp13], %[ftmp14] \n\t"
577 "punpcklhw %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
578 MMI_SDC1(%[ftmp9], %[dst], 0x00)
579 MMI_SDC1(%[ftmp10], %[dst], 0x08)
583 "addiu %[count], %[count], -0x01 \n\t"
584 "bnez %[count], 1b \n\t"
585 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
586 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
587 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
588 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
589 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
590 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
591 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
592 [ftmp14]
"=&f"(ftmp[14]), [tmp0]
"=&r"(
tmp[0]),
593 [
src]
"+&r"(
src), [dst]
"+&r"(dst), [count]
"+&r"(count)
602 "li %[tmp0], 0x44 \n\t"
603 "mtc1 %[tmp0], %[ftmp15] \n\t"
606 "li %[tmp0], 0x07 \n\t"
607 "mtc1 %[tmp0], %[ftmp0] \n\t"
608 MMI_LDC1(%[ftmp1], %[
src], 0x00)
609 MMI_LDC1(%[ftmp2], %[
src], 0x10)
610 MMI_LDC1(%[ftmp3], %[
src], 0x20)
611 MMI_LDC1(%[ftmp4], %[
src], 0x30)
612 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
613 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
614 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
615 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t"
618 "li %[tmp0], 0x00160011 \n\t"
619 "mtc1 %[tmp0], %[ftmp3] \n\t"
620 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
621 "li %[tmp0], 0x000a0011 \n\t"
622 "mtc1 %[tmp0], %[ftmp4] \n\t"
623 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
624 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
625 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
626 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
627 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
628 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
629 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
630 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
631 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
632 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
633 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
634 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
635 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
636 "punpcklhw %[ftmp11], %[ftmp1], %[ftmp2] \n\t"
639 "li %[tmp0], 0x000a0011 \n\t"
640 "mtc1 %[tmp0], %[ftmp3] \n\t"
641 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
642 "li %[tmp0], 0xffeaffef \n\t"
643 "mtc1 %[tmp0], %[ftmp4] \n\t"
644 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
645 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
646 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
647 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
648 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
649 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
650 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
651 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
652 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
653 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
654 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
655 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
656 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
657 "punpcklhw %[ftmp12], %[ftmp1], %[ftmp2] \n\t"
660 "li %[tmp0], 0xfff60011 \n\t"
661 "mtc1 %[tmp0], %[ftmp3] \n\t"
662 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
663 "li %[tmp0], 0x0016ffef \n\t"
664 "mtc1 %[tmp0], %[ftmp4] \n\t"
665 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
666 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
667 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
668 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
669 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
670 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
671 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
672 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
673 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
674 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
675 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
676 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
677 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
678 "punpcklhw %[ftmp13], %[ftmp1], %[ftmp2] \n\t"
681 "li %[tmp0], 0xffea0011 \n\t"
682 "mtc1 %[tmp0], %[ftmp3] \n\t"
683 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
684 "li %[tmp0], 0xfff60011 \n\t"
685 "mtc1 %[tmp0], %[ftmp4] \n\t"
686 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
687 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
688 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
689 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
690 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
691 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
692 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
693 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
694 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
695 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
696 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
697 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
698 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
699 "punpcklhw %[ftmp14], %[ftmp1], %[ftmp2] \n\t"
701 MMI_LWC1(%[ftmp1], %[dest], 0x00)
702 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t"
703 MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
704 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
705 MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
706 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
707 MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
708 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
709 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
710 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
711 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
712 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
713 "paddh %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
714 "paddh %[ftmp2], %[ftmp2], %[ftmp12] \n\t"
715 "paddh %[ftmp3], %[ftmp3], %[ftmp13] \n\t"
716 "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
717 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
718 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
719 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
720 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
721 MMI_SWC1(%[ftmp1], %[dest], 0x00)
722 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t"
723 MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
724 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
725 MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
726 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
727 MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
730 "li %[tmp0], 0x07 \n\t"
731 "mtc1 %[tmp0], %[ftmp0] \n\t"
732 MMI_LDC1(%[ftmp1], %[
src], 0x08)
733 MMI_LDC1(%[ftmp2], %[
src], 0x18)
734 MMI_LDC1(%[ftmp3], %[
src], 0x28)
735 MMI_LDC1(%[ftmp4], %[
src], 0x38)
736 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
737 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
738 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
739 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t"
742 "li %[tmp0], 0x00160011 \n\t"
743 "mtc1 %[tmp0], %[ftmp3] \n\t"
744 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
745 "li %[tmp0], 0x000a0011 \n\t"
746 "mtc1 %[tmp0], %[ftmp4] \n\t"
747 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
748 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
749 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
750 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
751 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
752 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
753 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
754 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
755 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
756 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
757 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
758 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
759 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
760 "punpcklhw %[ftmp11], %[ftmp1], %[ftmp2] \n\t"
763 "li %[tmp0], 0x000a0011 \n\t"
764 "mtc1 %[tmp0], %[ftmp3] \n\t"
765 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
766 "li %[tmp0], 0xffeaffef \n\t"
767 "mtc1 %[tmp0], %[ftmp4] \n\t"
768 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
769 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
770 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
771 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
772 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
773 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
774 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
775 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
776 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
777 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
778 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
779 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
780 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
781 "punpcklhw %[ftmp12], %[ftmp1], %[ftmp2] \n\t"
784 "li %[tmp0], 0xfff60011 \n\t"
785 "mtc1 %[tmp0], %[ftmp3] \n\t"
786 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
787 "li %[tmp0], 0x0016ffef \n\t"
788 "mtc1 %[tmp0], %[ftmp4] \n\t"
789 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
790 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
791 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
792 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
793 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
794 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
795 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
796 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
797 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
798 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
799 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
800 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
801 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
802 "punpcklhw %[ftmp13], %[ftmp1], %[ftmp2] \n\t"
805 "li %[tmp0], 0xffea0011 \n\t"
806 "mtc1 %[tmp0], %[ftmp3] \n\t"
807 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
808 "li %[tmp0], 0xfff60011 \n\t"
809 "mtc1 %[tmp0], %[ftmp4] \n\t"
810 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
811 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
812 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
813 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
814 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
815 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
816 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
817 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
818 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
819 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
820 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
821 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
822 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
823 "punpcklhw %[ftmp14], %[ftmp1], %[ftmp2] \n\t"
825 MMI_LWC1(%[ftmp1], %[dest], 0x04)
826 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t"
827 MMI_LWC1(%[ftmp2], %[tmp0], 0x04)
828 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
829 MMI_LWC1(%[ftmp3], %[tmp0], 0x04)
830 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
831 MMI_LWC1(%[ftmp4], %[tmp0], 0x04)
832 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
833 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
834 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
835 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
836 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
837 "paddh %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
838 "paddh %[ftmp2], %[ftmp2], %[ftmp12] \n\t"
839 "paddh %[ftmp3], %[ftmp3], %[ftmp13] \n\t"
840 "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
841 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
842 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
843 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
844 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
845 MMI_SWC1(%[ftmp1], %[dest], 0x04)
846 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t"
847 MMI_SWC1(%[ftmp2], %[tmp0], 0x04)
848 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
849 MMI_SWC1(%[ftmp3], %[tmp0], 0x04)
850 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
851 MMI_SWC1(%[ftmp4], %[tmp0], 0x04)
853 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
854 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
855 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
856 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
857 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
858 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
859 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
860 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
863 [
src]
"r"(
src), [dest]
"r"(dest), [linesize]
"r"(linesize)
876 dc = (17 *
dc + 4) >> 3;
877 dc = (12 *
dc + 64) >> 7;
880 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
881 "pshufh %[dc], %[dc], %[ftmp0] \n\t"
883 MMI_LWC1(%[ftmp1], %[dest0], 0x00)
884 MMI_LWC1(%[ftmp2], %[dest1], 0x00)
885 MMI_LWC1(%[ftmp3], %[dest2], 0x00)
886 MMI_LWC1(%[ftmp4], %[dest3], 0x00)
887 MMI_LWC1(%[ftmp5], %[dest4], 0x00)
888 MMI_LWC1(%[ftmp6], %[dest5], 0x00)
889 MMI_LWC1(%[ftmp7], %[dest6], 0x00)
890 MMI_LWC1(%[ftmp8], %[dest7], 0x00)
892 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
893 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
894 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
895 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
896 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
897 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
898 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
899 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
901 "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t"
902 "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t"
903 "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t"
904 "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t"
905 "paddsh %[ftmp5], %[ftmp5], %[dc] \n\t"
906 "paddsh %[ftmp6], %[ftmp6], %[dc] \n\t"
907 "paddsh %[ftmp7], %[ftmp7], %[dc] \n\t"
908 "paddsh %[ftmp8], %[ftmp8], %[dc] \n\t"
910 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
911 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
912 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
913 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
914 "packushb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
915 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
916 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
917 "packushb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
919 MMI_SWC1(%[ftmp1], %[dest0], 0x00)
920 MMI_SWC1(%[ftmp2], %[dest1], 0x00)
921 MMI_SWC1(%[ftmp3], %[dest2], 0x00)
922 MMI_SWC1(%[ftmp4], %[dest3], 0x00)
923 MMI_SWC1(%[ftmp5], %[dest4], 0x00)
924 MMI_SWC1(%[ftmp6], %[dest5], 0x00)
925 MMI_SWC1(%[ftmp7], %[dest6], 0x00)
926 MMI_SWC1(%[ftmp8], %[dest7], 0x00)
927 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
928 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
929 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
930 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
932 [ftmp8]
"=&f"(ftmp[8])
933 : [dest0]
"r"(dest+0*linesize), [dest1]
"r"(dest+1*linesize),
934 [dest2]
"r"(dest+2*linesize), [dest3]
"r"(dest+3*linesize),
935 [dest4]
"r"(dest+4*linesize), [dest5]
"r"(dest+5*linesize),
936 [dest6]
"r"(dest+6*linesize), [dest7]
"r"(dest+7*linesize),
942 #if _MIPS_SIM != _ABIO32
946 int16_t *dst =
block;
948 uint32_t count = 8,
tmp[1];
949 int16_t
coeff[16] = {17, 22, 17, 10,
953 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
954 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
955 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
960 "li %[tmp0], 0x03 \n\t"
961 "mtc1 %[tmp0], %[ftmp0] \n\t"
963 MMI_LDC1(%[ftmp2], %[
coeff], 0x00)
964 MMI_LDC1(%[ftmp3], %[
coeff], 0x08)
965 MMI_LDC1(%[ftmp4], %[
coeff], 0x10)
966 MMI_LDC1(%[ftmp5], %[
coeff], 0x18)
969 MMI_LDC1(%[ftmp1], %[
src], 0x00)
970 "pmaddhw %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
971 "pmaddhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
972 "pmaddhw %[ftmp8], %[ftmp4], %[ftmp1] \n\t"
973 "pmaddhw %[ftmp9], %[ftmp5], %[ftmp1] \n\t"
974 "punpcklwd %[ftmp10], %[ftmp6], %[ftmp7] \n\t"
975 "punpckhwd %[ftmp11], %[ftmp6], %[ftmp7] \n\t"
976 "punpcklwd %[ftmp6], %[ftmp8], %[ftmp9] \n\t"
977 "punpckhwd %[ftmp7], %[ftmp8], %[ftmp9] \n\t"
978 "paddw %[ftmp8], %[ftmp10], %[ftmp11] \n\t"
979 "paddw %[ftmp9], %[ftmp6], %[ftmp7] \n\t"
980 "paddw %[ftmp8], %[ftmp8], %[ff_pw_4] \n\t"
981 "paddw %[ftmp9], %[ftmp9], %[ff_pw_4] \n\t"
982 "psraw %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
983 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
984 "punpcklhw %[ftmp6], %[ftmp8], %[ftmp9] \n\t"
985 "punpckhhw %[ftmp7], %[ftmp8], %[ftmp9] \n\t"
986 "punpcklhw %[ftmp8], %[ftmp6], %[ftmp7] \n\t"
987 MMI_SDC1(%[ftmp8], %[dst], 0x00)
991 "addiu %[count], %[count], -0x01 \n\t"
992 "bnez %[count], 1b \n\t"
993 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
994 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
995 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
996 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
997 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
998 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
999 [tmp0]
"=&r"(
tmp[0]), [count]
"+&r"(count),
1000 [
src]
"+&r"(
src), [dst]
"+&r"(dst)
1009 "li %[tmp0], 0x07 \n\t"
1010 "mtc1 %[tmp0], %[ftmp0] \n\t"
1012 MMI_LDC1(%[ftmp1], %[
src], 0x00)
1013 MMI_LDC1(%[ftmp2], %[
src], 0x20)
1014 MMI_LDC1(%[ftmp3], %[
src], 0x40)
1015 MMI_LDC1(%[ftmp4], %[
src], 0x60)
1016 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1017 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1018 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1019 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t"
1021 MMI_LDC1(%[ftmp1], %[
src], 0x10)
1022 MMI_LDC1(%[ftmp2], %[
src], 0x30)
1023 MMI_LDC1(%[ftmp3], %[
src], 0x50)
1024 MMI_LDC1(%[ftmp4], %[
src], 0x70)
1025 "punpcklhw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
1026 "punpckhhw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
1027 "punpcklhw %[ftmp11], %[ftmp3], %[ftmp4] \n\t"
1028 "punpckhhw %[ftmp12], %[ftmp3], %[ftmp4] \n\t"
1046 MMI_LWC1(%[ftmp1], %[dest], 0x00)
1047 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t"
1048 MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
1049 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1050 MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
1051 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1052 MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
1053 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1054 MMI_LWC1(%[ftmp5], %[tmp0], 0x00)
1055 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1056 MMI_LWC1(%[ftmp6], %[tmp0], 0x00)
1057 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1058 MMI_LWC1(%[ftmp7], %[tmp0], 0x00)
1059 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1060 MMI_LWC1(%[ftmp8], %[tmp0], 0x00)
1061 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1062 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1063 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1064 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1065 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1066 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1067 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1068 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1069 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1071 "paddh %[ftmp1], %[ftmp1], %[ftmp15] \n\t"
1072 "paddh %[ftmp2], %[ftmp2], %[ftmp16] \n\t"
1073 "paddh %[ftmp3], %[ftmp3], %[ftmp17] \n\t"
1074 "paddh %[ftmp4], %[ftmp4], %[ftmp18] \n\t"
1075 "paddh %[ftmp5], %[ftmp5], %[ftmp19] \n\t"
1076 "paddh %[ftmp6], %[ftmp6], %[ftmp20] \n\t"
1077 "paddh %[ftmp7], %[ftmp7], %[ftmp21] \n\t"
1078 "paddh %[ftmp8], %[ftmp8], %[ftmp22] \n\t"
1080 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1081 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1082 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1083 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1084 "packushb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1085 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1086 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1087 "packushb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1089 MMI_SWC1(%[ftmp1], %[dest], 0x00)
1090 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t"
1091 MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
1092 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1093 MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
1094 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1095 MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
1096 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1097 MMI_SWC1(%[ftmp5], %[tmp0], 0x00)
1098 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1099 MMI_SWC1(%[ftmp6], %[tmp0], 0x00)
1100 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1101 MMI_SWC1(%[ftmp7], %[tmp0], 0x00)
1102 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1103 MMI_SWC1(%[ftmp8], %[tmp0], 0x00)
1105 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1106 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1107 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1108 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1109 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1110 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1111 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
1112 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
1113 [ftmp16]
"=&f"(ftmp[16]), [ftmp17]
"=&f"(ftmp[17]),
1114 [ftmp18]
"=&f"(ftmp[18]), [ftmp19]
"=&f"(ftmp[19]),
1115 [ftmp20]
"=&f"(ftmp[20]), [ftmp21]
"=&f"(ftmp[21]),
1116 [ftmp22]
"=&f"(ftmp[22]),
1119 [
src]
"r"(
src), [dest]
"r"(dest), [linesize]
"r"(linesize)
1132 dc = (17 *
dc + 4) >> 3;
1133 dc = (17 *
dc + 64) >> 7;
1136 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1137 "pshufh %[dc], %[dc], %[ftmp0] \n\t"
1139 MMI_LWC1(%[ftmp1], %[dest0], 0x00)
1140 MMI_LWC1(%[ftmp2], %[dest1], 0x00)
1141 MMI_LWC1(%[ftmp3], %[dest2], 0x00)
1142 MMI_LWC1(%[ftmp4], %[dest3], 0x00)
1144 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1145 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1146 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1147 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1149 "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t"
1150 "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t"
1151 "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t"
1152 "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t"
1154 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1155 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1156 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1157 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1159 MMI_SWC1(%[ftmp1], %[dest0], 0x00)
1160 MMI_SWC1(%[ftmp2], %[dest1], 0x00)
1161 MMI_SWC1(%[ftmp3], %[dest2], 0x00)
1162 MMI_SWC1(%[ftmp4], %[dest3], 0x00)
1163 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1164 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1166 [ftmp4]
"=&f"(ftmp[4])
1167 : [dest0]
"r"(dest+0*linesize), [dest1]
"r"(dest+1*linesize),
1168 [dest2]
"r"(dest+2*linesize), [dest3]
"r"(dest+3*linesize),
1177 int16_t *dst =
block;
1179 uint32_t count = 4,
tmp[1];
1180 int16_t
coeff[16] = {17, 22, 17, 10,
1184 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
1185 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
1189 "li %[tmp0], 0x03 \n\t"
1190 "mtc1 %[tmp0], %[ftmp0] \n\t"
1191 MMI_LDC1(%[ftmp2], %[
coeff], 0x00)
1192 MMI_LDC1(%[ftmp3], %[
coeff], 0x08)
1193 MMI_LDC1(%[ftmp4], %[
coeff], 0x10)
1194 MMI_LDC1(%[ftmp5], %[
coeff], 0x18)
1197 MMI_LDC1(%[ftmp1], %[
src], 0x00)
1198 "pmaddhw %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
1199 "pmaddhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
1200 "pmaddhw %[ftmp8], %[ftmp4], %[ftmp1] \n\t"
1201 "pmaddhw %[ftmp9], %[ftmp5], %[ftmp1] \n\t"
1202 "punpcklwd %[ftmp10], %[ftmp6], %[ftmp7] \n\t"
1203 "punpckhwd %[ftmp11], %[ftmp6], %[ftmp7] \n\t"
1204 "punpcklwd %[ftmp6], %[ftmp8], %[ftmp9] \n\t"
1205 "punpckhwd %[ftmp7], %[ftmp8], %[ftmp9] \n\t"
1206 "paddw %[ftmp8], %[ftmp10], %[ftmp11] \n\t"
1207 "paddw %[ftmp9], %[ftmp6], %[ftmp7] \n\t"
1208 "paddw %[ftmp8], %[ftmp8], %[ff_pw_4] \n\t"
1209 "paddw %[ftmp9], %[ftmp9], %[ff_pw_4] \n\t"
1210 "psraw %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1211 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
1212 "punpcklhw %[ftmp6], %[ftmp8], %[ftmp9] \n\t"
1213 "punpckhhw %[ftmp7], %[ftmp8], %[ftmp9] \n\t"
1214 "punpcklhw %[ftmp8], %[ftmp6], %[ftmp7] \n\t"
1215 MMI_SDC1(%[ftmp8], %[dst], 0x00)
1219 "addiu %[count], %[count], -0x01 \n\t"
1220 "bnez %[count], 1b \n\t"
1221 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1222 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1223 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1224 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1225 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1226 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1227 [tmp0]
"=&r"(
tmp[0]), [count]
"+&r"(count),
1228 [
src]
"+&r"(
src), [dst]
"+&r"(dst)
1237 "li %[tmp0], 0x07 \n\t"
1238 "mtc1 %[tmp0], %[ftmp0] \n\t"
1239 "li %[tmp0], 0x44 \n\t"
1240 "mtc1 %[tmp0], %[ftmp15] \n\t"
1242 MMI_LDC1(%[ftmp1], %[
src], 0x00)
1243 MMI_LDC1(%[ftmp2], %[
src], 0x10)
1244 MMI_LDC1(%[ftmp3], %[
src], 0x20)
1245 MMI_LDC1(%[ftmp4], %[
src], 0x30)
1246 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1247 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1248 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1249 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t"
1252 "li %[tmp0], 0x00160011 \n\t"
1253 "mtc1 %[tmp0], %[ftmp3] \n\t"
1254 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
1255 "li %[tmp0], 0x000a0011 \n\t"
1256 "mtc1 %[tmp0], %[ftmp4] \n\t"
1257 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
1258 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
1259 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
1260 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
1261 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
1262 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
1263 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
1264 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
1265 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
1266 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
1267 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
1268 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
1269 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
1270 "punpcklhw %[ftmp11], %[ftmp1], %[ftmp2] \n\t"
1273 "li %[tmp0], 0x000a0011 \n\t"
1274 "mtc1 %[tmp0], %[ftmp3] \n\t"
1275 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
1276 "li %[tmp0], 0xffeaffef \n\t"
1277 "mtc1 %[tmp0], %[ftmp4] \n\t"
1278 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
1279 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
1280 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
1281 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
1282 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
1283 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
1284 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
1285 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
1286 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
1287 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
1288 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
1289 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
1290 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
1291 "punpcklhw %[ftmp12], %[ftmp1], %[ftmp2] \n\t"
1294 "li %[tmp0], 0xfff60011 \n\t"
1295 "mtc1 %[tmp0], %[ftmp3] \n\t"
1296 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
1297 "li %[tmp0], 0x0016ffef \n\t"
1298 "mtc1 %[tmp0], %[ftmp4] \n\t"
1299 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
1300 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
1301 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
1302 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
1303 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
1304 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
1305 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
1306 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
1307 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
1308 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
1309 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
1310 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
1311 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
1312 "punpcklhw %[ftmp13], %[ftmp1], %[ftmp2] \n\t"
1315 "li %[tmp0], 0xffea0011 \n\t"
1316 "mtc1 %[tmp0], %[ftmp3] \n\t"
1317 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
1318 "li %[tmp0], 0xfff60011 \n\t"
1319 "mtc1 %[tmp0], %[ftmp4] \n\t"
1320 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t"
1321 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t"
1322 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t"
1323 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t"
1324 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t"
1325 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t"
1326 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t"
1327 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t"
1328 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t"
1329 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
1330 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
1331 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t"
1332 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t"
1333 "punpcklhw %[ftmp14], %[ftmp1], %[ftmp2] \n\t"
1335 MMI_LWC1(%[ftmp1], %[dest], 0x00)
1336 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t"
1337 MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
1338 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1339 MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
1340 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1341 MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
1342 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1343 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1344 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1345 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1346 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1347 "paddh %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
1348 "paddh %[ftmp2], %[ftmp2], %[ftmp12] \n\t"
1349 "paddh %[ftmp3], %[ftmp3], %[ftmp13] \n\t"
1350 "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
1351 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1352 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1353 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1354 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1356 MMI_SWC1(%[ftmp1], %[dest], 0x00)
1357 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t"
1358 MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
1359 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1360 MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
1361 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t"
1362 MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
1364 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1365 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1366 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1367 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1368 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1369 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1370 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
1371 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
1374 [
src]
"r"(
src), [dest]
"r"(dest), [linesize]
"r"(linesize)
1386 for (
i = 0;
i < 8;
i++) {
1391 d1 = (
a - d + 3 +
rnd) >> 3;
1392 d2 = (
a - d +
b -
c + 4 -
rnd) >> 3;
1408 int rnd1 =
flags & 2 ? 3 : 4;
1409 int rnd2 = 7 - rnd1;
1410 for (
i = 0;
i < 8;
i++) {
1418 left[6] = ((
a << 3) - d1 + rnd1) >> 3;
1419 left[7] = ((
b << 3) - d2 + rnd2) >> 3;
1420 right[0] = ((c << 3) + d2 + rnd1) >> 3;
1421 right[1] = ((d << 3) + d1 + rnd2) >> 3;
1423 right += right_stride;
1424 left += left_stride;
1439 for (
i = 0;
i < 8;
i++) {
1444 d1 = (
a - d + 3 +
rnd) >> 3;
1445 d2 = (
a - d +
b -
c + 4 -
rnd) >> 3;
1461 int rnd1 = 4, rnd2 = 3;
1462 for (
i = 0;
i < 8;
i++) {
1470 top[48] = ((
a << 3) - d1 + rnd1) >> 3;
1471 top[56] = ((
b << 3) - d2 + rnd2) >> 3;
1472 bottom[0] = ((c << 3) + d2 + rnd1) >> 3;
1473 bottom[8] = ((d << 3) + d1 + rnd2) >> 3;
1494 int a0_sign =
a0 >> 31;
1496 a0 = (
a0 ^ a0_sign) - a0_sign;
1504 int clip_sign =
clip >> 31;
1506 clip = ((
clip ^ clip_sign) - clip_sign) >> 1;
1509 int d = 5 * (
a3 -
a0);
1510 int d_sign = (d >> 31);
1512 d = ((d ^ d_sign) - d_sign) >> 3;
1515 if (d_sign ^ clip_sign)
1519 d = (d ^ d_sign) - d_sign;
1545 for (
i = 0;
i <
len;
i += 4) {
1607 #define OP_PUT(S, D)
1608 #define OP_AVG(S, D) \
1609 "ldc1 $f16, "#S" \n\t" \
1610 "pavgb "#D", "#D", $f16 \n\t"
1613 #define NORMALIZE_MMI(SHIFT) \
1614 "paddh $f6, $f6, $f14 \n\t" \
1615 "paddh $f8, $f8, $f14 \n\t" \
1616 "psrah $f6, $f6, "SHIFT" \n\t" \
1617 "psrah $f8, $f8, "SHIFT" \n\t"
1619 #define TRANSFER_DO_PACK(OP) \
1620 "packushb $f6, $f6, $f8 \n\t" \
1622 "sdc1 $f6, 0x00(%[dst]) \n\t"
1624 #define TRANSFER_DONT_PACK(OP) \
1625 OP(0(%[dst]), $f6) \
1626 OP(8(%[dst]), $f8) \
1627 "sdc1 $f6, 0x00(%[dst]) \n\t" \
1628 "sdc1 $f8, 0x08(%[dst]) \n\t"
1631 #define DO_UNPACK(reg) \
1632 "punpcklbh "reg", "reg", $f0 \n\t"
1633 #define DONT_UNPACK(reg)
1636 #define LOAD_ROUNDER_MMI(ROUND) \
1637 "lwc1 $f14, "ROUND" \n\t" \
1638 "punpcklhw $f14, $f14, $f14 \n\t" \
1639 "punpcklwd $f14, $f14, $f14 \n\t"
1642 #define SHIFT2_LINE(OFF, R0, R1, R2, R3) \
1643 "paddh "#R1", "#R1", "#R2" \n\t" \
1644 PTR_ADDU "$9, %[src], %[stride1] \n\t" \
1645 MMI_ULWC1(R0, $9, 0x00) \
1646 "pmullh "#R1", "#R1", $f6 \n\t" \
1647 "punpcklbh "#R0", "#R0", $f0 \n\t" \
1648 PTR_ADDU "$9, %[src], %[stride] \n\t" \
1649 MMI_ULWC1(R3, $9, 0x00) \
1650 "psubh "#R1", "#R1", "#R0" \n\t" \
1651 "punpcklbh "#R3", "#R3", $f0 \n\t" \
1652 "paddh "#R1", "#R1", $f14 \n\t" \
1653 "psubh "#R1", "#R1", "#R3" \n\t" \
1654 "psrah "#R1", "#R1", %[shift] \n\t" \
1655 MMI_SDC1(R1, %[dst], OFF) \
1656 PTR_ADDU "%[src], %[src], %[stride] \n\t"
1667 "xor $f0, $f0, $f0 \n\t"
1670 "ldc1 $f12, %[ff_pw_9] \n\t"
1672 MMI_ULWC1($f4, %[
src], 0x00)
1673 PTR_ADDU "%[src], %[src], %[stride] \n\t"
1674 MMI_ULWC1($f6, %[
src], 0x00)
1675 "punpcklbh $f4, $f4, $f0 \n\t"
1676 "punpcklbh $f6, $f6, $f0 \n\t"
1685 PTR_SUBU "%[src], %[src], %[stride2] \n\t"
1687 "addiu $8, $8, -0x01 \n\t"
1689 : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT
1690 [
src]
"+r"(
src), [dst]
"+r"(dst)
1694 :
"$8",
"$9",
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
1695 "$f14",
"$f16",
"memory"
1703 #define VC1_HOR_16B_SHIFT2(OP, OPNAME) \
1704 static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
1705 const int16_t *src, int rnd) \
1708 DECLARE_VAR_ALL64; \
1709 DECLARE_VAR_ADDRT; \
1712 rnd -= (-1+9+9-1)*1024; \
1715 LOAD_ROUNDER_MMI("%[rnd]") \
1716 "ldc1 $f12, %[ff_pw_128] \n\t" \
1717 "ldc1 $f10, %[ff_pw_9] \n\t" \
1719 MMI_ULDC1($f2, %[src], 0x00) \
1720 MMI_ULDC1($f4, %[src], 0x08) \
1721 MMI_ULDC1($f6, %[src], 0x02) \
1722 MMI_ULDC1($f8, %[src], 0x0a) \
1723 MMI_ULDC1($f0, %[src], 0x06) \
1724 "paddh $f2, $f2, $f0 \n\t" \
1725 MMI_ULDC1($f0, %[src], 0x0e) \
1726 "paddh $f4, $f4, $f0 \n\t" \
1727 MMI_ULDC1($f0, %[src], 0x04) \
1728 "paddh $f6, $f6, $f0 \n\t" \
1729 MMI_ULDC1($f0, %[src], 0x0b) \
1730 "paddh $f8, $f8, $f0 \n\t" \
1731 "pmullh $f6, $f6, $f10 \n\t" \
1732 "pmullh $f8, $f8, $f10 \n\t" \
1733 "psubh $f6, $f6, $f2 \n\t" \
1734 "psubh $f8, $f8, $f4 \n\t" \
1735 "li $8, 0x07 \n\t" \
1736 "mtc1 $8, $f16 \n\t" \
1737 NORMALIZE_MMI("$f16") \
1739 "paddh $f6, $f6, $f12 \n\t" \
1740 "paddh $f8, $f8, $f12 \n\t" \
1741 TRANSFER_DO_PACK(OP) \
1742 "addiu %[h], %[h], -0x01 \n\t" \
1743 PTR_ADDIU "%[src], %[src], 0x18 \n\t" \
1744 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \
1745 "bnez %[h], 1b \n\t" \
1746 : RESTRICT_ASM_ALL64 RESTRICT_ASM_ADDRT \
1748 [src]"+r"(src), [dst]"+r"(dst) \
1749 : [stride]"r"(stride), [rnd]"m"(rnd), \
1750 [ff_pw_9]"m"(ff_pw_9), [ff_pw_128]"m"(ff_pw_128) \
1751 : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", \
1763 #define VC1_SHIFT2(OP, OPNAME)\
1764 static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
1765 mips_reg stride, int rnd, \
1768 DECLARE_VAR_LOW32; \
1769 DECLARE_VAR_ADDRT; \
1774 "xor $f0, $f0, $f0 \n\t" \
1775 "li $10, 0x08 \n\t" \
1776 LOAD_ROUNDER_MMI("%[rnd]") \
1777 "ldc1 $f12, %[ff_pw_9] \n\t" \
1779 MMI_ULWC1($f6, %[src], 0x00) \
1780 MMI_ULWC1($f8, %[src], 0x04) \
1781 PTR_ADDU "$9, %[src], %[offset] \n\t" \
1782 MMI_ULWC1($f2, $9, 0x00) \
1783 MMI_ULWC1($f4, $9, 0x04) \
1784 PTR_ADDU "%[src], %[src], %[offset] \n\t" \
1785 "punpcklbh $f6, $f6, $f0 \n\t" \
1786 "punpcklbh $f8, $f8, $f0 \n\t" \
1787 "punpcklbh $f2, $f2, $f0 \n\t" \
1788 "punpcklbh $f4, $f4, $f0 \n\t" \
1789 "paddh $f6, $f6, $f2 \n\t" \
1790 "paddh $f8, $f8, $f4 \n\t" \
1791 PTR_ADDU "$9, %[src], %[offset_x2n] \n\t" \
1792 MMI_ULWC1($f2, $9, 0x00) \
1793 MMI_ULWC1($f4, $9, 0x04) \
1794 "pmullh $f6, $f6, $f12 \n\t" \
1795 "pmullh $f8, $f8, $f12 \n\t" \
1796 "punpcklbh $f2, $f2, $f0 \n\t" \
1797 "punpcklbh $f4, $f4, $f0 \n\t" \
1798 "psubh $f6, $f6, $f2 \n\t" \
1799 "psubh $f8, $f8, $f4 \n\t" \
1800 PTR_ADDU "$9, %[src], %[offset] \n\t" \
1801 MMI_ULWC1($f2, $9, 0x00) \
1802 MMI_ULWC1($f4, $9, 0x04) \
1803 "punpcklbh $f2, $f2, $f0 \n\t" \
1804 "punpcklbh $f4, $f4, $f0 \n\t" \
1805 "psubh $f6, $f6, $f2 \n\t" \
1806 "psubh $f8, $f8, $f4 \n\t" \
1807 "li $8, 0x04 \n\t" \
1808 "mtc1 $8, $f16 \n\t" \
1809 NORMALIZE_MMI("$f16") \
1810 "packushb $f6, $f6, $f8 \n\t" \
1812 "sdc1 $f6, 0x00(%[dst]) \n\t" \
1813 "addiu $10, $10, -0x01 \n\t" \
1814 PTR_ADDU "%[src], %[src], %[stride1] \n\t" \
1815 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \
1816 "bnez $10, 1b \n\t" \
1817 : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT \
1818 [src]"+r"(src), [dst]"+r"(dst) \
1819 : [offset]"r"(offset), [offset_x2n]"r"(-2*offset), \
1820 [stride]"r"(stride), [rnd]"m"(rnd), \
1821 [stride1]"r"(stride-offset), \
1822 [ff_pw_9]"m"(ff_pw_9) \
1823 : "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", \
1824 "$f12", "$f14", "$f16", "memory" \
1842 #define MSPEL_FILTER13_CORE(UNPACK, LOAD, M, A1, A2, A3, A4) \
1843 PTR_ADDU "$9, %[src], "#A1" \n\t" \
1844 LOAD($f2, $9, M*0) \
1845 LOAD($f4, $9, M*4) \
1848 "pmullh $f2, $f2, %[ff_pw_3] \n\t" \
1849 "pmullh $f4, $f4, %[ff_pw_3] \n\t" \
1850 PTR_ADDU "$9, %[src], "#A2" \n\t" \
1851 LOAD($f6, $9, M*0) \
1852 LOAD($f8, $9, M*4) \
1855 "pmullh $f6, $f6, $f12 \n\t" \
1856 "pmullh $f8, $f8, $f12 \n\t" \
1857 "psubh $f6, $f6, $f2 \n\t" \
1858 "psubh $f8, $f8, $f4 \n\t" \
1859 PTR_ADDU "$9, %[src], "#A4" \n\t" \
1860 LOAD($f2, $9, M*0) \
1861 LOAD($f4, $9, M*4) \
1864 "li $8, 0x02 \n\t" \
1865 "mtc1 $8, $f16 \n\t" \
1866 "psllh $f2, $f2, $f16 \n\t" \
1867 "psllh $f4, $f4, $f16 \n\t" \
1868 "psubh $f6, $f6, $f2 \n\t" \
1869 "psubh $f8, $f8, $f4 \n\t" \
1870 PTR_ADDU "$9, %[src], "#A3" \n\t" \
1871 LOAD($f2, $9, M*0) \
1872 LOAD($f4, $9, M*4) \
1875 "pmullh $f2, $f2, $f10 \n\t" \
1876 "pmullh $f4, $f4, $f10 \n\t" \
1877 "paddh $f6, $f6, $f2 \n\t" \
1878 "paddh $f8, $f8, $f4 \n\t"
1888 #define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \
1890 vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
1891 mips_reg src_stride, \
1892 int rnd, int64_t shift) \
1895 DECLARE_VAR_LOW32; \
1896 DECLARE_VAR_ADDRT; \
1898 src -= src_stride; \
1901 "xor $f0, $f0, $f0 \n\t" \
1902 LOAD_ROUNDER_MMI("%[rnd]") \
1903 "ldc1 $f10, %[ff_pw_53] \n\t" \
1904 "ldc1 $f12, %[ff_pw_18] \n\t" \
1907 MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
1908 NORMALIZE_MMI("%[shift]") \
1909 TRANSFER_DONT_PACK(OP_PUT) \
1911 PTR_ADDU "$9, %[src], "#A1" \n\t" \
1912 MMI_ULWC1($f2, $9, 0x08) \
1914 "mov.d $f6, $f2 \n\t" \
1915 "paddh $f2, $f2, $f2 \n\t" \
1916 "paddh $f2, $f2, $f6 \n\t" \
1917 PTR_ADDU "$9, %[src], "#A2" \n\t" \
1918 MMI_ULWC1($f6, $9, 0x08) \
1920 "pmullh $f6, $f6, $f12 \n\t" \
1921 "psubh $f6, $f6, $f2 \n\t" \
1922 PTR_ADDU "$9, %[src], "#A3" \n\t" \
1923 MMI_ULWC1($f2, $9, 0x08) \
1925 "pmullh $f2, $f2, $f10 \n\t" \
1926 "paddh $f6, $f6, $f2 \n\t" \
1927 PTR_ADDU "$9, %[src], "#A4" \n\t" \
1928 MMI_ULWC1($f2, $9, 0x08) \
1930 "li $8, 0x02 \n\t" \
1931 "mtc1 $8, $f16 \n\t" \
1932 "psllh $f2, $f2, $f16 \n\t" \
1933 "psubh $f6, $f6, $f2 \n\t" \
1934 "paddh $f6, $f6, $f14 \n\t" \
1935 "li $8, 0x06 \n\t" \
1936 "mtc1 $8, $f16 \n\t" \
1937 "psrah $f6, $f6, $f16 \n\t" \
1938 "sdc1 $f6, 0x10(%[dst]) \n\t" \
1939 "addiu %[h], %[h], -0x01 \n\t" \
1940 PTR_ADDU "%[src], %[src], %[stride_x1] \n\t" \
1941 PTR_ADDIU "%[dst], %[dst], 0x18 \n\t" \
1942 "bnez %[h], 1b \n\t" \
1943 : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT \
1945 [src]"+r"(src), [dst]"+r"(dst) \
1946 : [stride_x1]"r"(src_stride), [stride_x2]"r"(2*src_stride), \
1947 [stride_x3]"r"(3*src_stride), \
1948 [rnd]"m"(rnd), [shift]"f"(shift), \
1949 [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
1950 [ff_pw_3]"f"(ff_pw_3) \
1951 : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
1952 "$f14", "$f16", "memory" \
1963 #define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME) \
1965 OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \
1966 const int16_t *src, int rnd) \
1969 DECLARE_VAR_ALL64; \
1970 DECLARE_VAR_ADDRT; \
1973 rnd -= (-4+58+13-3)*256; \
1976 "xor $f0, $f0, $f0 \n\t" \
1977 LOAD_ROUNDER_MMI("%[rnd]") \
1978 "ldc1 $f10, %[ff_pw_53] \n\t" \
1979 "ldc1 $f12, %[ff_pw_18] \n\t" \
1982 MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4) \
1983 "li $8, 0x07 \n\t" \
1984 "mtc1 $8, $f16 \n\t" \
1985 NORMALIZE_MMI("$f16") \
1987 "paddh $f6, $f6, %[ff_pw_128] \n\t" \
1988 "paddh $f8, $f8, %[ff_pw_128] \n\t" \
1989 TRANSFER_DO_PACK(OP) \
1990 "addiu %[h], %[h], -0x01 \n\t" \
1991 PTR_ADDU "%[src], %[src], 0x18 \n\t" \
1992 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \
1993 "bnez %[h], 1b \n\t" \
1994 : RESTRICT_ASM_ALL64 RESTRICT_ASM_ADDRT \
1996 [src]"+r"(src), [dst]"+r"(dst) \
1997 : [stride]"r"(stride), [rnd]"m"(rnd), \
1998 [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
1999 [ff_pw_3]"f"(ff_pw_3), [ff_pw_128]"f"(ff_pw_128) \
2000 : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
2001 "$f14", "$f16", "memory" \
2013 #define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME) \
2015 OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \
2016 mips_reg stride, int rnd, mips_reg offset) \
2019 DECLARE_VAR_LOW32; \
2020 DECLARE_VAR_ADDRT; \
2025 __asm__ volatile ( \
2026 "xor $f0, $f0, $f0 \n\t" \
2027 LOAD_ROUNDER_MMI("%[rnd]") \
2028 "ldc1 $f10, %[ff_pw_53] \n\t" \
2029 "ldc1 $f12, %[ff_pw_18] \n\t" \
2032 MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
2033 "li $8, 0x06 \n\t" \
2034 "mtc1 $8, $f16 \n\t" \
2035 NORMALIZE_MMI("$f16") \
2036 TRANSFER_DO_PACK(OP) \
2037 "addiu %[h], %[h], -0x01 \n\t" \
2038 PTR_ADDU "%[src], %[src], %[stride] \n\t" \
2039 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \
2040 "bnez %[h], 1b \n\t" \
2041 : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT \
2043 [src]"+r"(src), [dst]"+r"(dst) \
2044 : [offset_x1]"r"(offset), [offset_x2]"r"(2*offset), \
2045 [offset_x3]"r"(3*offset), [stride]"r"(stride), \
2047 [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
2048 [ff_pw_3]"f"(ff_pw_3) \
2049 : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
2050 "$f14", "$f16", "memory" \
2089 #define VC1_MSPEL_MC(OP) \
2090 static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
2091 int hmode, int vmode, int rnd) \
2093 static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\
2094 { NULL, vc1_put_ver_16b_shift1_mmi, \
2095 vc1_put_ver_16b_shift2_mmi, \
2096 vc1_put_ver_16b_shift3_mmi }; \
2097 static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\
2098 { NULL, OP ## vc1_hor_16b_shift1_mmi, \
2099 OP ## vc1_hor_16b_shift2_mmi, \
2100 OP ## vc1_hor_16b_shift3_mmi }; \
2101 static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] = \
2102 { NULL, OP ## vc1_shift1_mmi, \
2103 OP ## vc1_shift2_mmi, \
2104 OP ## vc1_shift3_mmi }; \
2108 static const int shift_value[] = { 0, 5, 1, 5 }; \
2109 int shift = (shift_value[hmode]+shift_value[vmode])>>1; \
2111 LOCAL_ALIGNED(16, int16_t, tmp, [12*8]); \
2113 r = (1<<(shift-1)) + rnd-1; \
2114 vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift); \
2116 vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd); \
2120 vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride); \
2126 vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1); \
2128 static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \
2129 int stride, int hmode, int vmode, int rnd)\
2131 OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \
2132 OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \
2133 dst += 8*stride; src += 8*stride; \
2134 OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \
2135 OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \
2142 #define DECLARE_FUNCTION(a, b) \
2143 void ff_put_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst, \
2144 const uint8_t *src, \
2148 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
2150 void ff_avg_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst, \
2151 const uint8_t *src, \
2155 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
2157 void ff_put_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst, \
2158 const uint8_t *src, \
2162 put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
2164 void ff_avg_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst, \
2165 const uint8_t *src, \
2169 avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
2191 #define CHROMA_MC_8_MMI \
2192 "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \
2193 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \
2194 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" \
2195 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \
2196 "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \
2197 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
2198 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" \
2199 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
2201 "pmullh %[ftmp1], %[ftmp1], %[A] \n\t" \
2202 "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" \
2203 "pmullh %[ftmp2], %[ftmp2], %[B] \n\t" \
2204 "pmullh %[ftmp6], %[ftmp6], %[B] \n\t" \
2205 "pmullh %[ftmp3], %[ftmp3], %[C] \n\t" \
2206 "pmullh %[ftmp7], %[ftmp7], %[C] \n\t" \
2207 "pmullh %[ftmp4], %[ftmp4], %[D] \n\t" \
2208 "pmullh %[ftmp8], %[ftmp8], %[D] \n\t" \
2210 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
2211 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
2212 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
2213 "paddh %[ftmp1], %[ftmp1], %[ff_pw_28] \n\t" \
2215 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \
2216 "paddh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" \
2217 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
2218 "paddh %[ftmp5], %[ftmp5], %[ff_pw_28] \n\t" \
2220 "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" \
2221 "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \
2222 "packushb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2225 #define CHROMA_MC_4_MMI \
2226 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \
2227 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \
2228 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
2229 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
2231 "pmullh %[ftmp1], %[ftmp1], %[A] \n\t" \
2232 "pmullh %[ftmp2], %[ftmp2], %[B] \n\t" \
2233 "pmullh %[ftmp3], %[ftmp3], %[C] \n\t" \
2234 "pmullh %[ftmp4], %[ftmp4], %[D] \n\t" \
2236 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
2237 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
2238 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
2239 "paddh %[ftmp1], %[ftmp1], %[ff_pw_28] \n\t" \
2241 "psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \
2242 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2247 ptrdiff_t
stride,
int h,
int x,
int y)
2249 const int A = (8 - x) * (8 - y);
2250 const int B = (x) * (8 - y);
2251 const int C = (8 - x) * (y);
2252 const int D = (x) * (y);
2258 av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2261 "li %[tmp0], 0x06 \n\t"
2262 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2263 "mtc1 %[tmp0], %[ftmp9] \n\t"
2264 "pshufh %[A], %[A], %[ftmp0] \n\t"
2265 "pshufh %[B], %[B], %[ftmp0] \n\t"
2266 "pshufh %[C], %[C], %[ftmp0] \n\t"
2267 "pshufh %[D], %[D], %[ftmp0] \n\t"
2270 MMI_ULDC1(%[ftmp1], %[
src], 0x00)
2271 MMI_ULDC1(%[ftmp2], %[
src], 0x01)
2272 PTR_ADDU "%[src], %[src], %[stride] \n\t"
2273 MMI_ULDC1(%[ftmp3], %[
src], 0x00)
2274 MMI_ULDC1(%[ftmp4], %[
src], 0x01)
2278 MMI_SDC1(%[ftmp1], %[dst], 0x00)
2279 "addiu %[h], %[h], -0x01 \n\t"
2280 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
2281 "bnez %[h], 1b \n\t"
2282 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2283 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2284 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2285 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2286 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
2289 [tmp0]
"=&r"(
tmp[0]),
2290 [
src]
"+&r"(
src), [dst]
"+&r"(dst),
2293 [
A]
"f"(
A), [
B]
"f"(
B),
2294 [
C]
"f"(
C), [
D]
"f"(
D),
2302 ptrdiff_t
stride,
int h,
int x,
int y)
2304 const int A = (8 - x) * (8 - y);
2305 const int B = (x) * (8 - y);
2306 const int C = (8 - x) * (y);
2307 const int D = (x) * (y);
2313 av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2316 "li %[tmp0], 0x06 \n\t"
2317 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2318 "mtc1 %[tmp0], %[ftmp5] \n\t"
2319 "pshufh %[A], %[A], %[ftmp0] \n\t"
2320 "pshufh %[B], %[B], %[ftmp0] \n\t"
2321 "pshufh %[C], %[C], %[ftmp0] \n\t"
2322 "pshufh %[D], %[D], %[ftmp0] \n\t"
2325 MMI_ULWC1(%[ftmp1], %[
src], 0x00)
2326 MMI_ULWC1(%[ftmp2], %[
src], 0x01)
2327 PTR_ADDU "%[src], %[src], %[stride] \n\t"
2328 MMI_ULWC1(%[ftmp3], %[
src], 0x00)
2329 MMI_ULWC1(%[ftmp4], %[
src], 0x01)
2333 MMI_SWC1(%[ftmp1], %[dst], 0x00)
2334 "addiu %[h], %[h], -0x01 \n\t"
2335 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
2336 "bnez %[h], 1b \n\t"
2337 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2338 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2339 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2340 [tmp0]
"=&r"(
tmp[0]),
2343 [
src]
"+&r"(
src), [dst]
"+&r"(dst),
2346 [
A]
"f"(
A), [
B]
"f"(
B),
2347 [
C]
"f"(
C), [
D]
"f"(
D),
2355 ptrdiff_t
stride,
int h,
int x,
int y)
2357 const int A = (8 - x) * (8 - y);
2358 const int B = (x) * (8 - y);
2359 const int C = (8 - x) * (y);
2360 const int D = (x) * (y);
2366 av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2369 "li %[tmp0], 0x06 \n\t"
2370 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2371 "mtc1 %[tmp0], %[ftmp9] \n\t"
2372 "pshufh %[A], %[A], %[ftmp0] \n\t"
2373 "pshufh %[B], %[B], %[ftmp0] \n\t"
2374 "pshufh %[C], %[C], %[ftmp0] \n\t"
2375 "pshufh %[D], %[D], %[ftmp0] \n\t"
2378 MMI_ULDC1(%[ftmp1], %[
src], 0x00)
2379 MMI_ULDC1(%[ftmp2], %[
src], 0x01)
2380 PTR_ADDU "%[src], %[src], %[stride] \n\t"
2381 MMI_ULDC1(%[ftmp3], %[
src], 0x00)
2382 MMI_ULDC1(%[ftmp4], %[
src], 0x01)
2386 MMI_LDC1(%[ftmp2], %[dst], 0x00)
2387 "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2389 MMI_SDC1(%[ftmp1], %[dst], 0x00)
2390 "addiu %[h], %[h], -0x01 \n\t"
2391 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
2392 "bnez %[h], 1b \n\t"
2393 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2394 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2395 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2396 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2397 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
2398 [tmp0]
"=&r"(
tmp[0]),
2401 [
src]
"+&r"(
src), [dst]
"+&r"(dst),
2404 [
A]
"f"(
A), [
B]
"f"(
B),
2405 [
C]
"f"(
C), [
D]
"f"(
D),
2413 ptrdiff_t
stride,
int h,
int x,
int y)
2415 const int A = (8 - x) * (8 - y);
2416 const int B = ( x) * (8 - y);
2417 const int C = (8 - x) * ( y);
2418 const int D = ( x) * ( y);
2424 av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2427 "li %[tmp0], 0x06 \n\t"
2428 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2429 "mtc1 %[tmp0], %[ftmp5] \n\t"
2430 "pshufh %[A], %[A], %[ftmp0] \n\t"
2431 "pshufh %[B], %[B], %[ftmp0] \n\t"
2432 "pshufh %[C], %[C], %[ftmp0] \n\t"
2433 "pshufh %[D], %[D], %[ftmp0] \n\t"
2436 MMI_ULWC1(%[ftmp1], %[
src], 0x00)
2437 MMI_ULWC1(%[ftmp2], %[
src], 0x01)
2438 PTR_ADDU "%[src], %[src], %[stride] \n\t"
2439 MMI_ULWC1(%[ftmp3], %[
src], 0x00)
2440 MMI_ULWC1(%[ftmp4], %[
src], 0x01)
2444 MMI_LWC1(%[ftmp2], %[dst], 0x00)
2445 "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2447 MMI_SWC1(%[ftmp1], %[dst], 0x00)
2448 "addiu %[h], %[h], -0x01 \n\t"
2449 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
2450 "bnez %[h], 1b \n\t"
2451 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2452 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2453 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2454 [tmp0]
"=&r"(
tmp[0]),
2457 [
src]
"+&r"(
src), [dst]
"+&r"(dst),
2460 [
A]
"f"(
A), [
B]
"f"(
B),
2461 [
C]
"f"(
C), [
D]
"f"(
D),