37 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
38 MMI_LDC1(%[ftmp1], %[src], 0x00)
39 MMI_LDC1(%[ftmp2], %[src], 0x08)
40 MMI_LDC1(%[ftmp3], %[src], 0x10)
41 MMI_LDC1(%[ftmp4], %[src], 0x18)
42 MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
43 MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
44 MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
45 MMI_ULWC1(%[ftmp8], %[dst3], 0x00)
46 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
47 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
48 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
49 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
50 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
51 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
52 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
53 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
54 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
55 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
56 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
57 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
58 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
59 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
60 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
61 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
62 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
63 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
64 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
65 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
69 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
70 [dst2]
"r"(dst+2*stride), [dst3]
"r"(dst+3*
stride),
87 "dli %[tmp0], 0x01 \n\t"
88 MMI_LDC1(%[ftmp0], %[block], 0x00)
89 "mtc1 %[tmp0], %[ftmp8] \n\t"
90 MMI_LDC1(%[ftmp1], %[block], 0x08)
91 "dli %[tmp0], 0x06 \n\t"
92 MMI_LDC1(%[ftmp2], %[block], 0x10)
93 "mtc1 %[tmp0], %[ftmp9] \n\t"
94 "psrah %[ftmp4], %[ftmp1], %[ftmp8] \n\t"
95 MMI_LDC1(%[ftmp3], %[block], 0x18)
96 "psrah %[ftmp5], %[ftmp3], %[ftmp8] \n\t"
97 "psubh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
98 "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
99 "paddh %[ftmp10], %[ftmp2], %[ftmp0] \n\t"
100 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
101 "paddh %[ftmp11], %[ftmp5], %[ftmp10] \n\t"
102 "psubh %[ftmp2], %[ftmp10], %[ftmp5] \n\t"
103 "paddh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
104 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
105 "punpckhhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
106 "punpcklhw %[ftmp5], %[ftmp11], %[ftmp10] \n\t"
107 "punpckhhw %[ftmp4], %[ftmp0], %[ftmp2] \n\t"
108 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
109 "punpckhwd %[ftmp2], %[ftmp5], %[ftmp0] \n\t"
110 "punpcklwd %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
111 "punpcklwd %[ftmp10], %[ftmp1], %[ftmp4] \n\t"
112 "punpckhwd %[ftmp0], %[ftmp1], %[ftmp4] \n\t"
113 "paddh %[ftmp5], %[ftmp5], %[ff_pw_32] \n\t"
114 "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
115 "psrah %[ftmp3], %[ftmp0], %[ftmp8] \n\t"
116 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
117 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
118 "paddh %[ftmp1], %[ftmp10], %[ftmp5] \n\t"
119 "psubh %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
120 "paddh %[ftmp10], %[ftmp3], %[ftmp1] \n\t"
121 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
122 "paddh %[ftmp11], %[ftmp4], %[ftmp5] \n\t"
123 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
124 "psubh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
125 MMI_SDC1(%[ftmp7], %[block], 0x00)
126 MMI_SDC1(%[ftmp7], %[block], 0x08)
127 MMI_SDC1(%[ftmp7], %[block], 0x10)
128 MMI_SDC1(%[ftmp7], %[block], 0x18)
129 MMI_ULWC1(%[ftmp2], %[dst], 0x00)
130 "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
131 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
132 "psrah %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
133 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
134 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
135 "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
136 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
137 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
138 "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
139 MMI_SWC1(%[ftmp2], %[dst], 0x00)
140 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
141 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
142 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
143 MMI_ULWC1(%[ftmp2], %[dst], 0x00)
144 "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
145 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
146 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
147 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
148 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
149 "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
150 "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
151 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
152 MMI_SWC1(%[ftmp2], %[dst], 0x00)
153 "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
154 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
155 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
156 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
157 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
158 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
159 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
160 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
170 memset(block, 0, 32);
183 "lhu %[tmp0], 0x00(%[block]) \n\t"
186 MMI_LDC1(%[ftmp1], %[block], 0x10)
187 "sh %[tmp0], 0x00(%[block]) \n\t"
188 MMI_LDC1(%[ftmp2], %[block], 0x20)
189 "dli %[tmp0], 0x01 \n\t"
190 MMI_LDC1(%[ftmp3], %[block], 0x30)
191 "mtc1 %[tmp0], %[ftmp8] \n\t"
192 MMI_LDC1(%[ftmp5], %[block], 0x50)
193 MMI_LDC1(%[ftmp6], %[block], 0x60)
194 MMI_LDC1(%[ftmp7], %[block], 0x70)
195 "mov.d %[ftmp0], %[ftmp1] \n\t"
196 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
197 "psrah %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
198 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
199 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
200 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
201 "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
202 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
203 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
204 "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
205 "psubh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
206 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
207 "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
208 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
209 "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
210 "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
211 "dli %[tmp0], 0x02 \n\t"
212 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
213 "mtc1 %[tmp0], %[ftmp9] \n\t"
214 "mov.d %[ftmp7], %[ftmp1] \n\t"
215 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
216 "psrah %[ftmp3], %[ftmp4], %[ftmp9] \n\t"
217 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
218 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
219 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
220 "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
221 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
222 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
223 "mov.d %[ftmp5], %[ftmp6] \n\t"
224 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
225 "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
226 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
227 "psubh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
228 MMI_LDC1(%[ftmp2], %[block], 0x00)
229 MMI_LDC1(%[ftmp5], %[block], 0x40)
230 "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
231 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
232 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
233 "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
234 "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
235 "paddh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
236 "psubh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
237 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
238 "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
239 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
240 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
241 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
242 "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
243 "paddh %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
244 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
245 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
246 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
247 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
248 "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
249 "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
250 MMI_SDC1(%[ftmp6], %[block], 0x00)
251 "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
252 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
253 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
254 "punpckhhw %[ftmp0], %[ftmp3], %[ftmp1] \n\t"
255 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
256 "punpckhwd %[ftmp1], %[ftmp7], %[ftmp3] \n\t"
257 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
258 "punpckhwd %[ftmp3], %[ftmp6], %[ftmp0] \n\t"
259 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
260 MMI_LDC1(%[ftmp0], %[block], 0x00)
261 MMI_SDC1(%[ftmp7], $29, 0x00)
262 MMI_SDC1(%[ftmp1], $29, 0x10)
263 "dmfc1 %[tmp1], %[ftmp6] \n\t"
264 "dmfc1 %[tmp3], %[ftmp3] \n\t"
265 "punpckhhw %[ftmp3], %[ftmp5], %[ftmp2] \n\t"
266 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
267 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp0] \n\t"
268 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
269 "punpckhwd %[ftmp0], %[ftmp5], %[ftmp4] \n\t"
270 "punpcklwd %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
271 "punpckhwd %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
272 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
273 MMI_SDC1(%[ftmp5], $29, 0x08)
274 MMI_SDC1(%[ftmp0], $29, 0x18)
275 "dmfc1 %[tmp2], %[ftmp3] \n\t"
276 "dmfc1 %[tmp4], %[ftmp4] \n\t"
277 MMI_LDC1(%[ftmp1], %[block], 0x18)
278 MMI_LDC1(%[ftmp6], %[block], 0x28)
279 MMI_LDC1(%[ftmp2], %[block], 0x38)
280 MMI_LDC1(%[ftmp0], %[block], 0x58)
281 MMI_LDC1(%[ftmp3], %[block], 0x68)
282 MMI_LDC1(%[ftmp4], %[block], 0x78)
283 "mov.d %[ftmp7], %[ftmp1] \n\t"
284 "psrah %[ftmp5], %[ftmp0], %[ftmp8] \n\t"
285 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
286 "paddh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
287 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
288 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
289 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
290 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
291 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
292 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
293 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
294 "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
295 "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
296 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
297 "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
298 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
299 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
300 "mov.d %[ftmp4], %[ftmp1] \n\t"
301 "psrah %[ftmp2], %[ftmp5], %[ftmp9] \n\t"
302 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
303 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
304 "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
305 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
306 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
307 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
308 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
309 "mov.d %[ftmp0], %[ftmp3] \n\t"
310 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
311 "psrah %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
312 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
313 "psubh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
314 MMI_LDC1(%[ftmp6], %[block], 0x08)
315 MMI_LDC1(%[ftmp0], %[block], 0x48)
316 "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
317 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
318 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
319 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
320 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
321 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
322 "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
323 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
324 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
325 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
326 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
327 "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
328 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
329 "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
330 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
331 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
332 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
333 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
334 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
335 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
336 MMI_SDC1(%[ftmp3], %[block], 0x08)
337 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
338 "punpckhhw %[ftmp3], %[ftmp4], %[ftmp7] \n\t"
339 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
340 "punpckhhw %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
341 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
342 "punpckhwd %[ftmp1], %[ftmp4], %[ftmp2] \n\t"
343 "punpcklwd %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
344 "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t"
345 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
346 MMI_LDC1(%[ftmp7], %[block], 0x08)
347 "dmfc1 %[tmp5], %[ftmp4] \n\t"
348 "mov.d %[ftmp10], %[ftmp1] \n\t"
349 "mov.d %[ftmp12], %[ftmp3] \n\t"
350 "mov.d %[ftmp14], %[ftmp2] \n\t"
351 "punpckhhw %[ftmp2], %[ftmp0], %[ftmp6] \n\t"
352 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
353 "punpckhhw %[ftmp6], %[ftmp5], %[ftmp7] \n\t"
354 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
355 "punpckhwd %[ftmp7], %[ftmp0], %[ftmp5] \n\t"
356 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
357 "punpckhwd %[ftmp5], %[ftmp2], %[ftmp6] \n\t"
358 "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
359 "dmfc1 %[tmp6], %[ftmp0] \n\t"
360 "mov.d %[ftmp11], %[ftmp7] \n\t"
361 "mov.d %[ftmp13], %[ftmp2] \n\t"
362 "mov.d %[ftmp15], %[ftmp5] \n\t"
364 "mov.d %[ftmp7], %[ftmp10] \n\t"
365 "dmtc1 %[tmp3], %[ftmp6] \n\t"
366 MMI_LDC1(%[ftmp1], $29, 0x10)
367 "dmtc1 %[tmp1], %[ftmp3] \n\t"
368 "mov.d %[ftmp4], %[ftmp1] \n\t"
369 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
370 "psrah %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
371 "paddh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
372 "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
373 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
374 "paddh %[ftmp0], %[ftmp0], %[ftmp14] \n\t"
375 "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
376 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
377 "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
378 "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
379 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
380 "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
381 "psubh %[ftmp7], %[ftmp7], %[ftmp14] \n\t"
382 "psrah %[ftmp5], %[ftmp14], %[ftmp8] \n\t"
383 "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
384 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
385 "mov.d %[ftmp5], %[ftmp1] \n\t"
386 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
387 "psrah %[ftmp6], %[ftmp0], %[ftmp9] \n\t"
388 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
389 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
390 "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
391 "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
392 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
393 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
394 "mov.d %[ftmp7], %[ftmp12] \n\t"
395 "psrah %[ftmp2], %[ftmp12], %[ftmp8] \n\t"
396 "psrah %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
397 "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
398 "psubh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
399 MMI_LDC1(%[ftmp3], $29, 0x00)
400 "dmtc1 %[tmp5], %[ftmp7] \n\t"
401 "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
402 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
403 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
404 "psubh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
405 "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
406 "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
407 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
408 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
409 "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
410 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
411 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
412 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
413 "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
414 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
415 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
416 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
417 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
418 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
419 "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
420 "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
421 MMI_SDC1(%[ftmp3], $29, 0x00)
422 "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
423 MMI_SDC1(%[ftmp0], $29, 0x10)
424 "dmfc1 %[tmp1], %[ftmp2] \n\t"
425 "xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
426 MMI_SDC1(%[ftmp2], %[block], 0x00)
427 MMI_SDC1(%[ftmp2], %[block], 0x08)
428 MMI_SDC1(%[ftmp2], %[block], 0x10)
429 MMI_SDC1(%[ftmp2], %[block], 0x18)
430 MMI_SDC1(%[ftmp2], %[block], 0x20)
431 MMI_SDC1(%[ftmp2], %[block], 0x28)
432 MMI_SDC1(%[ftmp2], %[block], 0x30)
433 MMI_SDC1(%[ftmp2], %[block], 0x38)
434 MMI_SDC1(%[ftmp2], %[block], 0x40)
435 MMI_SDC1(%[ftmp2], %[block], 0x48)
436 MMI_SDC1(%[ftmp2], %[block], 0x50)
437 MMI_SDC1(%[ftmp2], %[block], 0x58)
438 MMI_SDC1(%[ftmp2], %[block], 0x60)
439 MMI_SDC1(%[ftmp2], %[block], 0x68)
440 MMI_SDC1(%[ftmp2], %[block], 0x70)
441 MMI_SDC1(%[ftmp2], %[block], 0x78)
442 "dli %[tmp3], 0x06 \n\t"
443 "mtc1 %[tmp3], %[ftmp10] \n\t"
444 MMI_ULWC1(%[ftmp3], %[dst], 0x00)
445 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
446 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
447 "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
448 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
449 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
450 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
451 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
452 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
453 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
454 MMI_SWC1(%[ftmp3], %[dst], 0x00)
455 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
456 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
457 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
458 MMI_ULWC1(%[ftmp3], %[dst], 0x00)
459 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
460 "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
461 "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
462 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
463 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
464 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
465 "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
466 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
467 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
468 MMI_SWC1(%[ftmp3], %[dst], 0x00)
469 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
470 MMI_LDC1(%[ftmp5], $29, 0x00)
471 MMI_LDC1(%[ftmp4], $29, 0x10)
472 "dmtc1 %[tmp1], %[ftmp6] \n\t"
473 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
474 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
475 MMI_ULWC1(%[ftmp3], %[dst], 0x00)
476 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
477 "psrah %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
478 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
479 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
480 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
481 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
482 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
483 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
484 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
485 MMI_SWC1(%[ftmp3], %[dst], 0x00)
486 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
487 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
488 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
489 MMI_ULWC1(%[ftmp3], %[dst], 0x00)
490 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
491 "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
492 "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
493 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
494 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
495 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
496 "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
497 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
498 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
499 MMI_SWC1(%[ftmp3], %[dst], 0x00)
500 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
501 "dmtc1 %[tmp4], %[ftmp1] \n\t"
502 "dmtc1 %[tmp2], %[ftmp6] \n\t"
503 MMI_LDC1(%[ftmp4], $29, 0x18)
504 "mov.d %[ftmp5], %[ftmp4] \n\t"
505 "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
506 "psrah %[ftmp7], %[ftmp11], %[ftmp8] \n\t"
507 "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
508 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
509 "paddh %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
510 "paddh %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
511 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
512 "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
513 "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
514 "psubh %[ftmp3], %[ftmp11], %[ftmp1] \n\t"
515 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
516 "paddh %[ftmp5], %[ftmp5], %[ftmp15] \n\t"
517 "psubh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
518 "psrah %[ftmp2], %[ftmp15], %[ftmp8] \n\t"
519 "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
520 "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
521 "mov.d %[ftmp2], %[ftmp4] \n\t"
522 "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
523 "psrah %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
524 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
525 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
526 "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
527 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
528 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
529 "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
530 "mov.d %[ftmp3], %[ftmp13] \n\t"
531 "psrah %[ftmp0], %[ftmp13], %[ftmp8] \n\t"
532 "psrah %[ftmp7], %[ftmp6], %[ftmp8] \n\t"
533 "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
534 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
535 MMI_LDC1(%[ftmp6], $29, 0x08)
536 "dmtc1 %[tmp6], %[ftmp3] \n\t"
537 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
538 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
539 "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
540 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
541 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
542 "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
543 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
544 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
545 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
546 "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
547 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
548 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
549 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
550 "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
551 "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
552 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
553 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
554 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
555 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
556 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
557 MMI_SDC1(%[ftmp6], $29, 0x08)
558 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
559 MMI_SDC1(%[ftmp7], $29, 0x18)
560 "dmfc1 %[tmp2], %[ftmp0] \n\t"
561 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
562 MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
563 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
564 "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
565 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
566 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
567 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
568 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
569 "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
570 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
571 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
572 MMI_SWC1(%[ftmp6], %[addr0], 0x00)
573 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
574 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
575 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
576 MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
577 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
578 "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
579 "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
580 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
581 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
582 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
583 "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
584 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
585 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
586 MMI_SWC1(%[ftmp6], %[addr0], 0x00)
587 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
588 MMI_LDC1(%[ftmp2], $29, 0x08)
589 MMI_LDC1(%[ftmp5], $29, 0x18)
590 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
591 "dmtc1 %[tmp2], %[ftmp1] \n\t"
592 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
593 MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
594 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
595 "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
596 "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
597 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
598 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
599 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
600 "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
601 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
602 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
603 MMI_SWC1(%[ftmp6], %[addr0], 0x00)
604 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
605 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
606 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
607 MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
608 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
609 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
610 "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
611 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
612 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
613 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
614 "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
615 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
616 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
617 MMI_SWC1(%[ftmp6], %[addr0], 0x00)
618 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
620 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
621 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
622 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
623 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
624 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
625 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
626 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
627 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
628 [tmp0]
"=&r"(tmp[0]), [tmp1]
"=&r"(tmp[1]),
629 [tmp2]
"=&r"(tmp[2]), [tmp3]
"=&r"(tmp[3]),
630 [tmp4]
"=&r"(tmp[4]), [tmp5]
"=&r"(tmp[5]),
635 [addr0]
"=&r"(addr[0])
636 : [dst]
"r"(dst), [block]
"r"(block),
641 memset(block, 0, 128);
646 int dc = (block[0] + 32) >> 6;
653 "mtc1 %[dc], %[ftmp5] \n\t"
654 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
655 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
656 MMI_ULWC1(%[ftmp1], %[dst0], 0x00)
657 MMI_ULWC1(%[ftmp2], %[dst1], 0x00)
658 MMI_ULWC1(%[ftmp3], %[dst2], 0x00)
659 MMI_ULWC1(%[ftmp4], %[dst3], 0x00)
660 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
661 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
662 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
663 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
664 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
665 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
666 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
667 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
668 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
669 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
670 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
671 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
672 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
673 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
674 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
675 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
676 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
677 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
678 [ftmp4]
"=&f"(ftmp[4]),
680 [ftmp5]
"=&f"(ftmp[5])
681 : [dst0]
"r"(dst), [dst1]
"r"(dst+stride),
682 [dst2]
"r"(dst+2*
stride), [dst3]
"r"(dst+3*stride),
690 int dc = (block[0] + 32) >> 6;
697 "mtc1 %[dc], %[ftmp5] \n\t"
698 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
699 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
700 MMI_LDC1(%[ftmp1], %[dst0], 0x00)
701 MMI_LDC1(%[ftmp2], %[dst1], 0x00)
702 MMI_LDC1(%[ftmp3], %[dst2], 0x00)
703 MMI_LDC1(%[ftmp4], %[dst3], 0x00)
704 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
705 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
706 "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
707 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
708 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
709 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
710 "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
711 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
712 "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
713 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
714 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
715 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
716 "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
717 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
718 "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
719 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
720 "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
721 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
722 "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
723 "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
724 MMI_SDC1(%[ftmp1], %[dst0], 0x00)
725 MMI_SDC1(%[ftmp2], %[dst1], 0x00)
726 MMI_SDC1(%[ftmp3], %[dst2], 0x00)
727 MMI_SDC1(%[ftmp4], %[dst3], 0x00)
729 MMI_LDC1(%[ftmp1], %[dst4], 0x00)
730 MMI_LDC1(%[ftmp2], %[dst5], 0x00)
731 MMI_LDC1(%[ftmp3], %[dst6], 0x00)
732 MMI_LDC1(%[ftmp4], %[dst7], 0x00)
733 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
734 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
735 "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
736 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
737 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
738 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
739 "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
740 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
741 "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
742 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
743 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
744 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
745 "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
746 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
747 "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
748 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
749 "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
750 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
751 "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
752 "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
753 MMI_SDC1(%[ftmp1], %[dst4], 0x00)
754 MMI_SDC1(%[ftmp2], %[dst5], 0x00)
755 MMI_SDC1(%[ftmp3], %[dst6], 0x00)
756 MMI_SDC1(%[ftmp4], %[dst7], 0x00)
757 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
758 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
759 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
760 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
761 [ftmp8]
"=&f"(ftmp[8]),
763 [ftmp9]
"=&f"(ftmp[9])
764 : [dst0]
"r"(dst), [dst1]
"r"(dst+stride),
765 [dst2]
"r"(dst+2*
stride), [dst3]
"r"(dst+3*stride),
766 [dst4]
"r"(dst+4*
stride), [dst5]
"r"(dst+5*stride),
767 [dst6]
"r"(dst+6*
stride), [dst7]
"r"(dst+7*stride),
778 int nnz = nnzc[
scan8[i] ];
780 if(nnz==1 && ((int16_t*)block)[i*16])
797 else if(((int16_t*)block)[i*16])
807 for(i=0; i<16; i+=4){
808 int nnz = nnzc[
scan8[i] ];
810 if(nnz==1 && ((int16_t*)block)[i*16])
812 block + i*16, stride);
825 for(i=j*16; i<j*16+4; i++){
828 block + i*16, stride);
829 else if(((int16_t*)block)[i*16])
831 block + i*16, stride);
842 for(i=j*16; i<j*16+4; i++){
845 block + i*16, stride);
846 else if(((int16_t*)block)[i*16])
848 block + i*16, stride);
853 for(i=j*16+4; i<j*16+8; i++){
854 if(nnzc[
scan8[i+4] ])
856 block + i*16, stride);
857 else if(((int16_t*)block)[i*16])
859 block + i*16, stride);
872 ".set noreorder \n\t"
873 "dli %[tmp0], 0x08 \n\t"
874 MMI_LDC1(%[ftmp3], %[input], 0x18)
875 "mtc1 %[tmp0], %[ftmp8] \n\t"
876 MMI_LDC1(%[ftmp2], %[input], 0x10)
877 "dli %[tmp0], 0x20 \n\t"
878 MMI_LDC1(%[ftmp1], %[input], 0x08)
879 "mtc1 %[tmp0], %[ftmp9] \n\t"
880 MMI_LDC1(%[ftmp0], %[input], 0x00)
881 "mov.d %[ftmp4], %[ftmp3] \n\t"
882 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
883 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
884 "mov.d %[ftmp4], %[ftmp1] \n\t"
885 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
886 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
887 "mov.d %[ftmp4], %[ftmp3] \n\t"
888 "paddh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
889 "psubh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
890 "mov.d %[ftmp4], %[ftmp2] \n\t"
891 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
892 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
893 "mov.d %[ftmp4], %[ftmp3] \n\t"
894 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
895 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
896 "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
897 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
898 "punpckhwd %[ftmp2], %[ftmp3], %[ftmp0] \n\t"
899 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
900 "mov.d %[ftmp0], %[ftmp4] \n\t"
901 "punpcklwd %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
902 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
903 "mov.d %[ftmp1], %[ftmp0] \n\t"
904 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
905 "psubh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
906 "mov.d %[ftmp1], %[ftmp2] \n\t"
907 "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
908 "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
909 "mov.d %[ftmp1], %[ftmp0] \n\t"
910 "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
911 "psubh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
912 "mov.d %[ftmp1], %[ftmp4] \n\t"
913 "daddi %[tmp0], %[qmul], -0x7fff \n\t"
914 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
915 "bgtz %[tmp0], 1f \n\t"
916 "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
917 "ori %[tmp0], $0, 0x80 \n\t"
918 "dsll %[tmp0], %[tmp0], 0x10 \n\t"
919 "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
920 "daddu %[qmul], %[qmul], %[tmp0] \n\t"
921 "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
922 "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
923 "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
924 "mtc1 %[qmul], %[ftmp7] \n\t"
925 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
926 "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
927 "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
928 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
929 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
930 "psraw %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
931 "psraw %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
932 "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
933 "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
934 "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
935 "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
936 "dmfc1 %[tmp1], %[ftmp0] \n\t"
937 "dsrl %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
938 "mfc1 %[input], %[ftmp0] \n\t"
939 "sh %[tmp1], 0x00(%[output]) \n\t"
940 "sh %[input], 0x80(%[output]) \n\t"
941 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
942 PTR_SRL "%[input], %[input], 0x10 \n\t"
943 "sh %[tmp1], 0x20(%[output]) \n\t"
944 "sh %[input], 0xa0(%[output]) \n\t"
945 "dmfc1 %[tmp1], %[ftmp2] \n\t"
946 "dsrl %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
947 "mfc1 %[input], %[ftmp2] \n\t"
948 "sh %[tmp1], 0x40(%[output]) \n\t"
949 "sh %[input], 0xc0(%[output]) \n\t"
950 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
951 PTR_SRL "%[input], %[input], 0x10 \n\t"
952 "sh %[tmp1], 0x60(%[output]) \n\t"
953 "sh %[input], 0xe0(%[output]) \n\t"
954 "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
955 "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
956 "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
957 "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
958 "mtc1 %[qmul], %[ftmp7] \n\t"
959 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
960 "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
961 "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
962 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
963 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
964 "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
965 "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
966 "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
967 "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
968 "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
969 "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
970 "dmfc1 %[tmp1], %[ftmp3] \n\t"
971 "dsrl %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
972 "mfc1 %[input], %[ftmp3] \n\t"
973 "sh %[tmp1], 0x100(%[output]) \n\t"
974 "sh %[input], 0x180(%[output]) \n\t"
975 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
976 PTR_SRL "%[input], %[input], 0x10 \n\t"
977 "sh %[tmp1], 0x120(%[output]) \n\t"
978 "sh %[input], 0x1a0(%[output]) \n\t"
979 "dmfc1 %[tmp1], %[ftmp4] \n\t"
980 "dsrl %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
981 "mfc1 %[input], %[ftmp4] \n\t"
982 "sh %[tmp1], 0x140(%[output]) \n\t"
983 "sh %[input], 0x1c0(%[output]) \n\t"
984 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
985 PTR_SRL "%[input], %[input], 0x10 \n\t"
986 "sh %[tmp1], 0x160(%[output]) \n\t"
988 "sh %[input], 0x1e0(%[output]) \n\t"
990 "ori %[tmp0], $0, 0x1f \n\t"
992 "clz %[tmp1], %[qmul] \n\t"
995 "ori %[input], $0, 0x07 \n\t"
996 "dsubu %[tmp1], %[tmp0], %[tmp1] \n\t"
997 "ori %[tmp0], $0, 0x80 \n\t"
998 "dsll %[tmp0], %[tmp0], 0x10 \n\t"
999 "daddu %[qmul], %[qmul], %[tmp0] \n\t"
1000 "dsubu %[tmp0], %[tmp1], %[input] \n\t"
1001 "movn %[tmp1], %[input], %[tmp0] \n\t"
1002 PTR_ADDIU "%[input], %[input], 0x01 \n\t"
1003 "andi %[tmp0], %[tmp1], 0xff \n\t"
1004 "srlv %[qmul], %[qmul], %[tmp0] \n\t"
1005 PTR_SUBU "%[input], %[input], %[tmp1] \n\t"
1006 "mtc1 %[input], %[ftmp6] \n\t"
1007 "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
1008 "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
1009 "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
1010 "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
1011 "mtc1 %[qmul], %[ftmp7] \n\t"
1012 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1013 "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1014 "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1015 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1016 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1017 "psraw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1018 "psraw %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1019 "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1020 "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1021 "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1022 "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1023 "dmfc1 %[tmp1], %[ftmp0] \n\t"
1024 "dsrl %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1025 "sh %[tmp1], 0x00(%[output]) \n\t"
1026 "mfc1 %[input], %[ftmp0] \n\t"
1027 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1028 "sh %[input], 0x80(%[output]) \n\t"
1029 "sh %[tmp1], 0x20(%[output]) \n\t"
1030 PTR_SRL "%[input], %[input], 0x10 \n\t"
1031 "dmfc1 %[tmp1], %[ftmp2] \n\t"
1032 "sh %[input], 0xa0(%[output]) \n\t"
1033 "dsrl %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
1034 "sh %[tmp1], 0x40(%[output]) \n\t"
1035 "mfc1 %[input], %[ftmp2] \n\t"
1036 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1037 "sh %[input], 0xc0(%[output]) \n\t"
1038 "sh %[tmp1], 0x60(%[output]) \n\t"
1039 PTR_SRL "%[input], %[input], 0x10 \n\t"
1040 "sh %[input], 0xe0(%[output]) \n\t"
1041 "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
1042 "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
1043 "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
1044 "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
1045 "mtc1 %[qmul], %[ftmp7] \n\t"
1046 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1047 "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1048 "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1049 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1050 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1051 "psraw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1052 "psraw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1053 "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1054 "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1055 "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1056 "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1057 "dmfc1 %[tmp1], %[ftmp3] \n\t"
1058 "dsrl %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1059 "mfc1 %[input], %[ftmp3] \n\t"
1060 "sh %[tmp1], 0x100(%[output]) \n\t"
1061 "sh %[input], 0x180(%[output]) \n\t"
1062 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1063 PTR_SRL "%[input], %[input], 0x10 \n\t"
1064 "sh %[tmp1], 0x120(%[output]) \n\t"
1065 "sh %[input], 0x1a0(%[output]) \n\t"
1066 "dmfc1 %[tmp1], %[ftmp4] \n\t"
1067 "dsrl %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1068 "mfc1 %[input], %[ftmp4] \n\t"
1069 "sh %[tmp1], 0x140(%[output]) \n\t"
1070 "sh %[input], 0x1c0(%[output]) \n\t"
1071 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1072 PTR_SRL "%[input], %[input], 0x10 \n\t"
1073 "sh %[tmp1], 0x160(%[output]) \n\t"
1074 "sh %[input], 0x1e0(%[output]) \n\t"
1077 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1078 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1079 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1080 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1081 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1082 [tmp0]
"=&r"(tmp[0]), [tmp1]
"=&r"(tmp[1]),
1084 [output]
"+&r"(output), [input]
"+&r"(input),
1096 temp[0] = block[0] + block[16];
1097 temp[1] = block[0] - block[16];
1098 temp[2] = block[32] + block[48];
1099 temp[3] = block[32] - block[48];
1100 temp[4] = block[64] + block[80];
1101 temp[5] = block[64] - block[80];
1102 temp[6] = block[96] + block[112];
1103 temp[7] = block[96] - block[112];
1105 t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1106 t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1107 t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1108 t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1109 t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1110 t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1111 t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1112 t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1114 block[ 0]= (t[0]*qmul + 128) >> 8;
1115 block[ 32]= (t[1]*qmul + 128) >> 8;
1116 block[ 64]= (t[2]*qmul + 128) >> 8;
1117 block[ 96]= (t[3]*qmul + 128) >> 8;
1118 block[ 16]= (t[4]*qmul + 128) >> 8;
1119 block[ 48]= (t[5]*qmul + 128) >> 8;
1120 block[ 80]= (t[6]*qmul + 128) >> 8;
1121 block[112]= (t[7]*qmul + 128) >> 8;
1128 d = block[0] - block[16];
1129 a = block[0] + block[16];
1130 b = block[32] - block[48];
1131 c = block[32] + block[48];
1132 block[0] = ((a+
c)*qmul) >> 7;
1133 block[16]= ((d+
b)*qmul) >> 7;
1134 block[32]= ((a-
c)*qmul) >> 7;
1135 block[48]= ((d-
b)*qmul) >> 7;
1145 offset <<= log2_denom;
1148 offset += 1 << (log2_denom - 1);
1152 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1153 MMI_LDC1(%[ftmp1], %[block0], 0x00)
1154 MMI_LDC1(%[ftmp2], %[
block1], 0x00)
1155 "mtc1 %[weight], %[ftmp3] \n\t"
1156 "mtc1 %[offset], %[ftmp4] \n\t"
1157 "mtc1 %[log2_denom], %[ftmp5] \n\t"
1158 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1159 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1160 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
1161 "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
1162 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1163 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1164 "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1165 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1166 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1167 "pmullh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1168 "paddsh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1169 "paddsh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1170 "paddsh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1171 "paddsh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1172 "psrah %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1173 "psrah %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1174 "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1175 "psrah %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1176 "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1177 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1178 MMI_SDC1(%[ftmp1], %[block0], 0x00)
1179 MMI_SDC1(%[ftmp2], %[
block1], 0x00)
1180 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1181 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1182 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1183 [ftmp6]
"=&f"(ftmp[6]),
1185 [ftmp7]
"=&f"(ftmp[7])
1188 [log2_denom]
"r"(log2_denom)
1195 ptrdiff_t
stride,
int height,
int log2_denom,
int weightd,
int weights,
1202 offset = ((offset + 1) | 1) << log2_denom;
1206 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1207 MMI_LDC1(%[ftmp1], %[
src0], 0x00)
1208 MMI_LDC1(%[ftmp2], %[dst0], 0x00)
1209 "mtc1 %[weights], %[ftmp3] \n\t"
1210 "mtc1 %[weightd], %[ftmp4] \n\t"
1211 "mtc1 %[offset], %[ftmp5] \n\t"
1212 "mtc1 %[log2_denom], %[ftmp6] \n\t"
1213 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1214 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1215 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1216 "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1217 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1218 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1219 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1220 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1221 "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1222 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1223 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1224 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1225 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1226 "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1227 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1228 "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1229 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1230 "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1231 MMI_SDC1(%[ftmp1], %[dst0], 0x00)
1232 MMI_LDC1(%[ftmp1], %[
src1], 0x00)
1233 MMI_LDC1(%[ftmp2], %[dst1], 0x00)
1234 "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1235 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1236 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1237 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1238 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1239 "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1240 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1241 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1242 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1243 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1244 "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1245 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1246 "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1247 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1248 "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1249 MMI_SDC1(%[ftmp1], %[dst1], 0x00)
1250 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1251 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1252 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1253 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1255 [ftmp8]
"=&f"(ftmp[8])
1256 : [dst0]
"r"(dst), [dst1]
"r"(dst+8),
1258 [weights]
"r"(weights), [weightd]
"r"(weightd),
1259 [offset]
"r"(offset), [log2_denom]
"r"(log2_denom+1)
1272 offset <<= log2_denom;
1275 offset += 1 << (log2_denom - 1);
1279 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1280 MMI_LDC1(%[ftmp1], %[block], 0x00)
1281 "mtc1 %[weight], %[ftmp2] \n\t"
1282 "mtc1 %[offset], %[ftmp3] \n\t"
1283 "mtc1 %[log2_denom], %[ftmp5] \n\t"
1284 "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1285 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1286 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
1287 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1288 "pmullh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1289 "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1290 "paddsh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1291 "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1292 "psrah %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1293 "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1294 "packushb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1295 MMI_SDC1(%[ftmp1], %[block], 0x00)
1296 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1297 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1298 [ftmp4]
"=&f"(ftmp[4]),
1300 [ftmp5]
"=&f"(ftmp[5])
1309 ptrdiff_t
stride,
int height,
int log2_denom,
int weightd,
int weights,
1316 offset = ((offset + 1) | 1) << log2_denom;
1320 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1321 MMI_LDC1(%[ftmp1], %[src], 0x00)
1322 MMI_LDC1(%[ftmp2], %[dst], 0x00)
1323 "mtc1 %[weights], %[ftmp3] \n\t"
1324 "mtc1 %[weightd], %[ftmp4] \n\t"
1325 "mtc1 %[offset], %[ftmp5] \n\t"
1326 "mtc1 %[log2_denom], %[ftmp6] \n\t"
1327 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1328 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1329 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1330 "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1331 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1332 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1333 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1334 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1335 "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1336 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1337 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1338 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1339 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1340 "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1341 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1342 "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1343 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1344 "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1345 MMI_SDC1(%[ftmp1], %[dst], 0x00)
1346 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1347 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1348 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1349 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1351 [ftmp8]
"=&f"(ftmp[8])
1352 : [dst]
"r"(dst), [
src]
"r"(
src),
1353 [weights]
"r"(weights), [weightd]
"r"(weightd),
1354 [offset]
"r"(offset), [log2_denom]
"r"(log2_denom+1)
1367 offset <<= log2_denom;
1370 offset += 1 << (log2_denom - 1);
1374 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1375 MMI_ULWC1(%[ftmp1], %[block], 0x00)
1376 "mtc1 %[weight], %[ftmp2] \n\t"
1377 "mtc1 %[offset], %[ftmp3] \n\t"
1378 "mtc1 %[log2_denom], %[ftmp4] \n\t"
1379 "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1380 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1381 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1382 "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1383 "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1384 "psrah %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1385 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1386 MMI_SWC1(%[ftmp1], %[block], 0x00)
1387 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1388 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1390 [ftmp4]
"=&f"(ftmp[4])
1392 [offset]
"r"(offset), [log2_denom]
"r"(log2_denom)
1399 ptrdiff_t
stride,
int height,
int log2_denom,
int weightd,
int weights,
1406 offset = ((offset + 1) | 1) << log2_denom;
1410 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1411 MMI_ULWC1(%[ftmp1], %[src], 0x00)
1412 MMI_ULWC1(%[ftmp2], %[dst], 0x00)
1413 "mtc1 %[weight], %[ftmp3] \n\t"
1414 "mtc1 %[weightd], %[ftmp4] \n\t"
1415 "mtc1 %[offset], %[ftmp5] \n\t"
1416 "mtc1 %[log2_denom], %[ftmp6] \n\t"
1417 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1418 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1419 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1420 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1421 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1422 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1423 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1424 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1425 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1426 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1427 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1428 MMI_SWC1(%[ftmp1], %[dst], 0x00)
1429 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1430 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1431 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1433 [ftmp6]
"=&f"(ftmp[6])
1434 : [dst]
"r"(dst), [
src]
"r"(
src),
1435 [
weight]
"r"(weights), [weightd]
"r"(weightd),
1436 [offset]
"r"(offset), [log2_denom]
"r"(log2_denom+1)
1452 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1453 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1454 PTR_ADDU "%[addr1], %[stride], %[addr0] \n\t"
1455 "addi %[alpha], %[alpha], -0x01 \n\t"
1456 PTR_SUBU "%[addr1], $0, %[addr1] \n\t"
1457 "addi %[beta], %[beta], -0x01 \n\t"
1458 PTR_ADDU "%[addr1], %[addr1], %[pix] \n\t"
1459 MMI_LDC1(%[ftmp3], %[pix], 0x00)
1460 MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00)
1461 MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1462 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1463 "mtc1 %[alpha], %[ftmp5] \n\t"
1464 "mtc1 %[beta], %[ftmp6] \n\t"
1465 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1466 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1467 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1468 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1469 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1470 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1471 "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1472 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1473 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1474 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1475 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1476 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1477 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1478 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1479 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1480 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1481 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1482 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1483 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1484 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
1485 MMI_ULWC1(%[ftmp5], %[tc0], 0x00)
1486 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1487 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp5] \n\t"
1488 "pcmpgtb %[ftmp5], %[ftmp9], %[ftmp4] \n\t"
1489 MMI_LDC1(%[ftmp4], %[addr1], 0x00)
1490 "and %[ftmp10], %[ftmp5], %[ftmp8] \n\t"
1491 "psubusb %[ftmp8], %[ftmp4], %[ftmp2] \n\t"
1492 "psubusb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1493 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1494 "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1495 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1496 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1497 "and %[ftmp5], %[ftmp10], %[ftmp9] \n\t"
1498 "psubb %[ftmp8], %[ftmp5], %[ftmp7] \n\t"
1499 "and %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1500 "pavgb %[ftmp5], %[ftmp2], %[ftmp3] \n\t"
1501 MMI_LDC1(%[ftmp11], %[addr1], 0x00)
1502 "pavgb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1503 "xor %[ftmp5], %[ftmp5], %[ftmp11] \n\t"
1504 "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1505 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1506 "psubusb %[ftmp5], %[ftmp1], %[ftmp7] \n\t"
1507 "paddusb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1508 "pmaxub %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1509 "pminub %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1510 MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00)
1511 MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00)
1512 "psubusb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
1513 "psubusb %[ftmp7], %[ftmp3], %[ftmp5] \n\t"
1514 "psubusb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1515 "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1516 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1517 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1518 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1519 "and %[ftmp6], %[ftmp9], %[ftmp7] \n\t"
1520 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1521 "pavgb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1522 MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00)
1523 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1524 "xor %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1525 "and %[ftmp7], %[ftmp7], %[ff_pb_1] \n\t"
1526 "psubusb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1527 "psubusb %[ftmp7], %[ftmp4], %[ftmp6] \n\t"
1528 "paddusb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1529 "pmaxub %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1530 "pminub %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1531 MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1532 "xor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1533 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1534 "and %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1535 "xor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1536 "xor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1537 "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1538 "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1539 "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1540 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1541 "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1542 "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1543 "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1544 "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1545 "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1546 "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1547 "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1548 "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1549 "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1550 MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1551 MMI_SDC1(%[ftmp3], %[pix], 0x00)
1552 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1553 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1554 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1555 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1556 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1557 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1561 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1])
1581 "ori %[tmp0], $0, 0x01 \n\t"
1582 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1583 "mtc1 %[tmp0], %[ftmp9] \n\t"
1584 PTR_SLL "%[addr0], %[stride], 0x02 \n\t"
1585 PTR_ADDU "%[addr2], %[stride], %[stride] \n\t"
1586 PTR_ADDIU "%[alpha], %[alpha], -0x01 \n\t"
1587 PTR_SLL "%[ftmp11], %[ftmp9], %[ftmp9] \n\t"
1588 "bltz %[alpha], 1f \n\t"
1589 PTR_ADDU "%[addr1], %[addr2], %[stride] \n\t"
1590 PTR_ADDIU "%[beta], %[beta], -0x01 \n\t"
1591 "bltz %[beta], 1f \n\t"
1592 PTR_SUBU "%[addr0], $0, %[addr0] \n\t"
1593 PTR_ADDU "%[addr0], %[addr0], %[pix] \n\t"
1594 MMI_LDC1(%[ftmp3], %[pix], 0x00)
1595 MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00)
1596 MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00)
1597 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1598 "mtc1 %[alpha], %[ftmp5] \n\t"
1599 "mtc1 %[beta], %[ftmp6] \n\t"
1600 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1601 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1602 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1603 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1604 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1605 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1606 "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1607 MMI_SDC1(%[ftmp5], %[stack], 0x10)
1608 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1609 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1610 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1611 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1612 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1613 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1614 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1615 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1616 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1617 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1618 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1619 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1620 MMI_LDC1(%[ftmp5], %[stack], 0x10)
1621 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1622 "ldc1 %[ftmp10], %[ff_pb_1] \n\t"
1623 MMI_SDC1(%[ftmp8], %[stack], 0x20)
1624 "pavgb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1625 "psubusb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1626 "pavgb %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
1627 "psubusb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1628 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1629 "psubusb %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1630 MMI_LDC1(%[ftmp15], %[stack], 0x20)
1631 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1632 "and %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
1633 MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00)
1634 "psubusb %[ftmp8], %[ftmp15], %[ftmp2] \n\t"
1635 "psubusb %[ftmp5], %[ftmp2], %[ftmp15] \n\t"
1636 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1637 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1638 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1639 "and %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1640 MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00)
1641 MMI_SDC1(%[ftmp5], %[stack], 0x30)
1642 "psubusb %[ftmp8], %[ftmp14], %[ftmp3] \n\t"
1643 "psubusb %[ftmp5], %[ftmp3], %[ftmp14] \n\t"
1644 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1645 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1646 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1647 "and %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1648 MMI_SDC1(%[ftmp5], %[stack], 0x40)
1649 "pavgb %[ftmp5], %[ftmp15], %[ftmp1] \n\t"
1650 "pavgb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1651 "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1652 MMI_SDC1(%[ftmp6], %[stack], 0x10)
1653 "paddb %[ftmp7], %[ftmp15], %[ftmp1] \n\t"
1654 "paddb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1655 "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1656 "mov.d %[ftmp8], %[ftmp7] \n\t"
1657 MMI_SDC1(%[ftmp7], %[stack], 0x00)
1658 "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1659 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1660 "xor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1661 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1662 "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1663 "pavgb %[ftmp6], %[ftmp15], %[ftmp4] \n\t"
1664 "psubb %[ftmp7], %[ftmp15], %[ftmp4] \n\t"
1665 "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1666 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1667 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1668 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1669 MMI_LDC1(%[ftmp13], %[stack], 0x10)
1670 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1671 "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1672 "pavgb %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1673 "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1674 "xor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1675 "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1676 "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1677 "xor %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
1678 "pavgb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1679 "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1680 "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1681 MMI_LDC1(%[ftmp13], %[stack], 0x30)
1682 "pavgb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1683 MMI_LDC1(%[ftmp12], %[stack], 0x20)
1684 "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1685 "xor %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
1686 "and %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1687 "and %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1688 "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1689 "xor %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1690 MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00)
1691 MMI_LDC1(%[ftmp6], %[addr0], 0x00)
1692 "paddb %[ftmp7], %[ftmp15], %[ftmp6] \n\t"
1693 "pavgb %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1694 MMI_LDC1(%[ftmp12], %[stack], 0x00)
1695 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1696 "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1697 "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1698 "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1699 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1700 "xor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1701 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1702 MMI_LDC1(%[ftmp12], %[stack], 0x30)
1703 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1704 "xor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1705 "xor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1706 "and %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1707 "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1708 "xor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1709 "xor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1710 MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00)
1711 MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00)
1712 "pavgb %[ftmp5], %[ftmp14], %[ftmp4] \n\t"
1713 "pavgb %[ftmp6], %[ftmp3], %[ftmp2] \n\t"
1714 "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1715 MMI_SDC1(%[ftmp6], %[stack], 0x10)
1716 "paddb %[ftmp7], %[ftmp14], %[ftmp4] \n\t"
1717 "paddb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1718 "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1719 "mov.d %[ftmp8], %[ftmp7] \n\t"
1720 MMI_SDC1(%[ftmp7], %[stack], 0x00)
1721 "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1722 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1723 "xor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1724 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1725 "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1726 "pavgb %[ftmp6], %[ftmp14], %[ftmp1] \n\t"
1727 "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1728 "psubb %[ftmp7], %[ftmp14], %[ftmp1] \n\t"
1729 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1730 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1731 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1732 MMI_LDC1(%[ftmp12], %[stack], 0x10)
1733 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1734 "pavgb %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1735 "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1736 "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1737 "xor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1738 "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1739 "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1740 "xor %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
1741 "pavgb %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
1742 "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1743 MMI_LDC1(%[ftmp12], %[stack], 0x40)
1744 "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1745 MMI_LDC1(%[ftmp13], %[stack], 0x20)
1746 "pavgb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1747 "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1748 "xor %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1749 "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1750 "and %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
1751 "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1752 "xor %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1753 MMI_SDC1(%[ftmp6], %[pix], 0x00)
1754 MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00)
1755 "paddb %[ftmp7], %[ftmp14], %[ftmp6] \n\t"
1756 "pavgb %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1757 MMI_LDC1(%[ftmp12], %[stack], 0x00)
1758 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1759 "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1760 "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1761 "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1762 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1763 "xor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1764 "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1765 MMI_LDC1(%[ftmp12], %[stack], 0x40)
1766 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1767 "xor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1768 "xor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1769 "and %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1770 "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1771 "xor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1772 "xor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1773 MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1774 MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00)
1776 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1777 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1778 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1779 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1780 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1781 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1782 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
1783 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
1784 [tmp0]
"=&r"(tmp[0]),
1787 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1788 [addr2]
"=&r"(addr[2]),
1797 int beta, int8_t *tc0)
1806 "addi %[alpha], %[alpha], -0x01 \n\t"
1807 "addi %[beta], %[beta], -0x01 \n\t"
1808 "or %[addr0], $0, %[pix] \n\t"
1809 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1810 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1811 MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1812 MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1813 MMI_LDC1(%[ftmp3], %[pix], 0x00)
1814 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1816 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1817 "mtc1 %[alpha], %[ftmp5] \n\t"
1818 "mtc1 %[beta], %[ftmp6] \n\t"
1819 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1820 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1821 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1822 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1823 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1824 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1825 "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1826 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1827 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1828 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1829 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1830 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1831 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1832 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1833 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1834 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1835 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1836 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1837 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1838 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1839 MMI_ULWC1(%[ftmp7], %[tc0], 0x00)
1840 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1841 "and %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1842 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1843 "xor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1844 "xor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1845 "and %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1846 "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1847 "xor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1848 "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1849 "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1850 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1851 "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1852 "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1853 "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1854 "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1855 "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1856 "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1857 "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1858 "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1859 "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1861 MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1862 MMI_SDC1(%[ftmp3], %[pix], 0x00)
1863 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1864 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1865 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1866 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1867 [ftmp8]
"=&f"(ftmp[8]),
1871 [addr0]
"=&r"(addr[0])
1873 [alpha]
"r"(alpha), [beta]
"r"(beta),
1889 "addi %[alpha], %[alpha], -0x01 \n\t"
1890 "addi %[beta], %[beta], -0x01 \n\t"
1891 "or %[addr0], $0, %[pix] \n\t"
1892 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1893 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1894 MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1895 MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1896 MMI_LDC1(%[ftmp3], %[pix], 0x00)
1897 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1899 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1900 "mtc1 %[alpha], %[ftmp5] \n\t"
1901 "mtc1 %[beta], %[ftmp6] \n\t"
1902 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1903 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1904 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1905 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1906 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1907 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1908 "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1909 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1910 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1911 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1912 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1913 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1914 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1915 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1916 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1917 "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1918 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1919 "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1920 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1921 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1922 "mov.d %[ftmp6], %[ftmp2] \n\t"
1923 "mov.d %[ftmp7], %[ftmp3] \n\t"
1924 "xor %[ftmp5], %[ftmp2], %[ftmp4] \n\t"
1925 "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1926 "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1927 "psubusb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1928 "pavgb %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1929 "xor %[ftmp5], %[ftmp3], %[ftmp1] \n\t"
1930 "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1931 "pavgb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1932 "psubusb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1933 "pavgb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1934 "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1935 "psubb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1936 "and %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1937 "and %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1938 "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1939 "paddb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1941 MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1942 MMI_SDC1(%[ftmp3], %[pix], 0x00)
1943 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1944 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1945 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1946 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1947 [ftmp8]
"=&f"(ftmp[8]),
1950 [addr0]
"=&r"(addr[0])
1952 [alpha]
"r"(alpha), [beta]
"r"(beta),
1966 "addi %[alpha], %[alpha], -0x01 \n\t"
1967 "addi %[beta], %[beta], -0x01 \n\t"
1968 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1969 PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
1970 PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
1971 PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
1972 "or %[addr5], $0, %[pix] \n\t"
1973 PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
1974 MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
1975 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
1976 MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
1977 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
1978 MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
1979 MMI_ULWC1(%[ftmp3], %[pix], 0x00)
1980 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1981 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1982 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
1983 "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
1984 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1985 MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
1986 PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
1987 MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
1988 PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
1989 MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
1990 PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
1991 MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
1992 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1993 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1994 "mov.d %[ftmp6], %[ftmp4] \n\t"
1995 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1996 "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1997 "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
1998 "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
1999 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2000 "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2001 "mov.d %[ftmp9], %[ftmp0] \n\t"
2002 "mov.d %[ftmp10], %[ftmp3] \n\t"
2004 "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2005 "mtc1 %[alpha], %[ftmp4] \n\t"
2006 "mtc1 %[beta], %[ftmp5] \n\t"
2007 "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2008 "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2009 "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2010 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2011 "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2012 "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2013 "or %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2014 "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2015 "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2016 "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2017 "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2018 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2019 "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2020 "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2021 "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2022 "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2023 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2024 "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2025 "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2026 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2027 MMI_ULWC1(%[ftmp6], %[tc0], 0x00)
2028 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2029 "and %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2030 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2031 "xor %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
2032 "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2033 "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
2034 "pavgb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
2035 "xor %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
2036 "pavgb %[ftmp3], %[ftmp3], %[ff_pb_3] \n\t"
2037 "pavgb %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
2038 "pavgb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2039 "paddusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2040 "psubusb %[ftmp6], %[ff_pb_A1], %[ftmp3] \n\t"
2041 "psubusb %[ftmp3], %[ftmp3], %[ff_pb_A1] \n\t"
2042 "pminub %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
2043 "pminub %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2044 "psubusb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2045 "psubusb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2046 "paddusb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2047 "paddusb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2049 "punpckhwd %[ftmp4], %[ftmp9], %[ftmp9] \n\t"
2050 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2051 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2052 "punpcklbh %[ftmp0], %[ftmp9], %[ftmp1] \n\t"
2053 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
2054 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2055 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2056 MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2057 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2058 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2059 MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2060 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2061 MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2062 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2063 "punpckhwd %[ftmp3], %[ftmp10], %[ftmp10] \n\t"
2064 MMI_USWC1(%[ftmp0], %[pix], 0x00)
2065 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2066 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2067 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2068 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2069 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2070 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2071 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2072 PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2073 PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2074 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2075 MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2076 PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2077 "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2078 MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2079 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2080 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2081 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2082 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2083 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
2084 [ftmp10]
"=&f"(ftmp[10]),
2086 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
2087 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
2088 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
2106 "addi %[alpha], %[alpha], -0x01 \n\t"
2107 "addi %[beta], %[beta], -0x01 \n\t"
2108 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2109 PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
2110 PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
2111 PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
2112 "or %[addr5], $0, %[pix] \n\t"
2113 PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
2114 MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
2115 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2116 MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
2117 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2118 MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
2119 MMI_ULWC1(%[ftmp3], %[pix], 0x00)
2120 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2121 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2122 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2123 "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
2124 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2125 MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
2126 PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
2127 MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2128 PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
2129 MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2130 PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
2131 MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2132 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2133 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
2134 "mov.d %[ftmp6], %[ftmp4] \n\t"
2135 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2136 "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
2137 "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
2138 "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
2139 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2140 "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2142 "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2143 "mtc1 %[alpha], %[ftmp4] \n\t"
2144 "mtc1 %[beta], %[ftmp5] \n\t"
2145 "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2146 "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2147 "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2148 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2149 "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2150 "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2151 "or %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2152 "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2153 "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2154 "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2155 "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2156 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2157 "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2158 "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2159 "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2160 "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2161 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2162 "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2163 "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2164 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2165 "mov.d %[ftmp5], %[ftmp1] \n\t"
2166 "mov.d %[ftmp6], %[ftmp2] \n\t"
2167 "xor %[ftmp4], %[ftmp1], %[ftmp3] \n\t"
2168 "and %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2169 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2170 "psubusb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
2171 "pavgb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2172 "xor %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2173 "and %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2174 "pavgb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2175 "psubusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2176 "pavgb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2177 "psubb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2178 "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2179 "and %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2180 "and %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2181 "paddb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2182 "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2184 "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2185 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2186 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2187 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2188 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2189 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2190 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2191 MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2192 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2193 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2194 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2195 MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2196 MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2197 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2198 "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2199 MMI_USWC1(%[ftmp0], %[pix], 0x00)
2200 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2201 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2202 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2203 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2204 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2205 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2206 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2207 PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2208 PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2209 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2210 PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2211 MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2212 "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2213 MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2214 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2215 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2216 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2217 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2218 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
2219 [ftmp10]
"=&f"(ftmp[10]),
2221 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
2222 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
2223 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
2234 if ((tc0[0] & tc0[1]) >= 0)
2236 if ((tc0[2] & tc0[3]) >= 0)
2257 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2258 PTR_ADDI "%[addr1], %[pix], -0x4 \n\t"
2259 PTR_ADDU "%[addr2], %[stride], %[addr0] \n\t"
2260 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2261 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2262 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2263 MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2264 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2265 MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2266 MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2267 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2268 MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2269 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2270 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2271 PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2272 MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2273 PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2274 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2275 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2276 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2277 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2278 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2279 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2280 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2281 MMI_SDC1(%[ftmp1], %[stack], 0x10)
2282 MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2283 PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2284 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2285 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2286 "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2287 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2288 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2289 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2290 MMI_LDC1(%[ftmp8], %[stack], 0x10)
2291 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2292 MMI_SDC1(%[ftmp0], %[stack], 0x00)
2293 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2294 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2295 "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2296 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2297 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2298 "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2299 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2300 "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2301 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2302 MMI_SDC1(%[ftmp1], %[stack], 0x10)
2303 MMI_SDC1(%[ftmp3], %[stack], 0x20)
2304 MMI_SDC1(%[ftmp7], %[stack], 0x30)
2305 MMI_SDC1(%[ftmp5], %[stack], 0x40)
2306 MMI_SDC1(%[ftmp6], %[stack], 0x50)
2307 PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2308 PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2309 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2310 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2311 MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2312 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2313 MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2314 MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2315 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2316 MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2317 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2318 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2319 PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2320 MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2321 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2322 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2323 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2324 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2325 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2326 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2327 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2328 MMI_SDC1(%[ftmp1], %[stack], 0x18)
2329 MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2330 "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2331 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2332 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2333 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2334 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2335 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2336 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2337 MMI_LDC1(%[ftmp8], %[stack], 0x18)
2338 MMI_SDC1(%[ftmp0], %[stack], 0x08)
2339 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2340 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2341 "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2342 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2343 "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2344 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2345 "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2346 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2347 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2348 MMI_SDC1(%[ftmp1], %[stack], 0x18)
2349 MMI_SDC1(%[ftmp3], %[stack], 0x28)
2350 MMI_SDC1(%[ftmp7], %[stack], 0x38)
2351 MMI_SDC1(%[ftmp5], %[stack], 0x48)
2352 MMI_SDC1(%[ftmp6], %[stack], 0x58)
2353 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2354 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2355 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2356 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2357 [ftmp8]
"=&f"(ftmp[8]),
2359 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
2360 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
2361 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
2362 [addr6]
"=&r"(addr[6]), [addr7]
"=&r"(addr[7])
2371 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2372 PTR_ADDI "%[addr1], %[pix], -0x02 \n\t"
2373 PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2374 PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2375 PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2376 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2377 MMI_LDC1(%[ftmp0], %[stack], 0x10)
2378 MMI_LDC1(%[ftmp1], %[stack], 0x20)
2379 MMI_LDC1(%[ftmp2], %[stack], 0x30)
2380 MMI_LDC1(%[ftmp3], %[stack], 0x40)
2381 "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2382 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2383 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2384 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2385 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2386 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2387 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2388 MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2389 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2390 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2391 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2392 MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2393 MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2394 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2395 "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2396 MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2397 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2398 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2399 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2400 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2401 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2402 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2403 PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2404 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2405 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2406 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2407 MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2408 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2409 "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2410 PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2411 MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2412 PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2413 MMI_LDC1(%[ftmp0], %[stack], 0x18)
2414 MMI_LDC1(%[ftmp1], %[stack], 0x28)
2415 MMI_LDC1(%[ftmp2], %[stack], 0x38)
2416 MMI_LDC1(%[ftmp3], %[stack], 0x48)
2417 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2418 "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2419 PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2420 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2421 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2422 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2423 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2424 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2425 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2426 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2427 MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2428 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2429 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2430 MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2431 MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2432 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2433 "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2434 MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2435 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2436 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2437 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2438 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2439 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2440 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2441 PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2442 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2443 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2444 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2445 MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2446 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2447 "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2448 MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2449 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2450 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2451 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2452 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2453 [ftmp8]
"=&f"(ftmp[8]),
2456 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
2457 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
2458 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
2459 [addr6]
"=&r"(addr[6]), [addr7]
"=&r"(addr[7])
2476 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2477 PTR_ADDI "%[addr1], %[pix], -0x04 \n\t"
2478 PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2479 PTR_ADDU "%[addr3], %[addr0], %[addr0] \n\t"
2480 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2481 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2482 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2483 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2484 MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2485 MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2486 PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2487 MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2488 PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2489 MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2490 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2491 MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2492 MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2493 PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2494 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2495 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2496 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2497 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2498 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2499 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2500 MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2501 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2502 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2503 MMI_SDC1(%[ftmp3], %[ptmp], 0x00)
2504 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2505 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2506 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2507 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2508 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2509 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2510 MMI_SDC1(%[ftmp2], %[ptmp], 0x20)
2511 MMI_LDC1(%[ftmp2], %[ptmp], 0x00)
2512 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2513 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2514 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2515 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2516 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2517 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2518 MMI_SDC1(%[ftmp0], %[ptmp], 0x00)
2519 MMI_SDC1(%[ftmp5], %[ptmp], 0x10)
2520 MMI_SDC1(%[ftmp7], %[ptmp], 0x40)
2521 MMI_SDC1(%[ftmp4], %[ptmp], 0x50)
2522 MMI_LDC1(%[ftmp8], %[ptmp], 0x20)
2523 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2524 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2525 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2526 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2527 PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2528 MMI_SDC1(%[ftmp3], %[ptmp], 0x20)
2529 MMI_SDC1(%[ftmp0], %[ptmp], 0x30)
2530 MMI_SDC1(%[ftmp6], %[ptmp], 0x60)
2531 MMI_SDC1(%[ftmp5], %[ptmp], 0x70)
2532 PTR_ADDU "%[addr1], %[addr1], %[addr5] \n\t"
2533 PTR_ADDU "%[addr4], %[addr4], %[addr5] \n\t"
2534 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2535 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2536 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2537 MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2538 MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2539 PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2540 MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2541 PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2542 MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2543 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2544 MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2545 MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2546 PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2547 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2548 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2549 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2550 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2551 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2552 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2553 MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2554 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2555 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2556 MMI_SDC1(%[ftmp3], %[ptmp], 0x08)
2557 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2558 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2559 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2560 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2561 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2562 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2563 MMI_SDC1(%[ftmp2], %[ptmp], 0x28)
2564 MMI_LDC1(%[ftmp2], %[ptmp], 0x08)
2565 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2566 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2567 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2568 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2569 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2570 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2571 MMI_SDC1(%[ftmp0], %[ptmp], 0x08)
2572 MMI_SDC1(%[ftmp5], %[ptmp], 0x18)
2573 MMI_SDC1(%[ftmp7], %[ptmp], 0x48)
2574 MMI_SDC1(%[ftmp4], %[ptmp], 0x58)
2575 MMI_LDC1(%[ftmp8], %[ptmp], 0x28)
2576 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2577 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2578 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2579 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2580 MMI_SDC1(%[ftmp3], %[ptmp], 0x28)
2581 MMI_SDC1(%[ftmp0], %[ptmp], 0x38)
2582 MMI_SDC1(%[ftmp6], %[ptmp], 0x68)
2583 MMI_SDC1(%[ftmp5], %[ptmp], 0x78)
2584 PTR_S "%[addr1], 0x00(%[pdat]) \n\t"
2585 PTR_S "%[addr2], 0x08(%[pdat]) \n\t"
2586 PTR_S "%[addr0], 0x10(%[pdat]) \n\t"
2587 PTR_S "%[addr3], 0x18(%[pdat]) \n\t"
2588 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2589 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2590 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2591 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2592 [ftmp8]
"=&f"(ftmp[8]),
2594 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
2595 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
2596 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
2597 [addr6]
"=&r"(addr[6])
2599 [ptmp]
"r"(ptmp), [pdat]
"r"(pdat)
2606 PTR_L "%[addr1], 0x00(%[pdat]) \n\t"
2607 PTR_L "%[addr2], 0x08(%[pdat]) \n\t"
2608 PTR_L "%[addr0], 0x10(%[pdat]) \n\t"
2609 PTR_L "%[addr3], 0x18(%[pdat]) \n\t"
2610 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2611 MMI_LDC1(%[ftmp0], %[ptmp], 0x08)
2612 MMI_LDC1(%[ftmp1], %[ptmp], 0x18)
2613 MMI_LDC1(%[ftmp2], %[ptmp], 0x28)
2614 MMI_LDC1(%[ftmp3], %[ptmp], 0x38)
2615 MMI_LDC1(%[ftmp4], %[ptmp], 0x48)
2616 MMI_LDC1(%[ftmp5], %[ptmp], 0x58)
2617 MMI_LDC1(%[ftmp6], %[ptmp], 0x68)
2618 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2619 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2620 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2621 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2622 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2623 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2624 MMI_LDC1(%[ftmp8], %[ptmp], 0x78)
2625 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2626 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2627 MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2628 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2629 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2630 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2631 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2632 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2633 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2634 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2635 MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2636 MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2637 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2638 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2639 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2640 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2641 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2642 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2643 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2644 MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2645 PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2646 MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2647 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2648 MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2649 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2650 MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2651 MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2652 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2653 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2654 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2655 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2656 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2657 MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2658 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2659 MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2660 PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2661 MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2662 PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2663 MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2664 PTR_SUBU "%[addr1], %[addr1], %[addr5] \n\t"
2665 PTR_SUBU "%[addr4], %[addr4], %[addr5] \n\t"
2666 MMI_LDC1(%[ftmp0], %[ptmp], 0x00)
2667 MMI_LDC1(%[ftmp1], %[ptmp], 0x10)
2668 MMI_LDC1(%[ftmp2], %[ptmp], 0x20)
2669 MMI_LDC1(%[ftmp3], %[ptmp], 0x30)
2670 MMI_LDC1(%[ftmp4], %[ptmp], 0x40)
2671 MMI_LDC1(%[ftmp5], %[ptmp], 0x50)
2672 MMI_LDC1(%[ftmp6], %[ptmp], 0x60)
2673 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2674 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2675 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2676 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2677 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2678 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2679 MMI_LDC1(%[ftmp8], %[ptmp], 0x70)
2680 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2681 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2682 MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2683 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2684 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2685 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2686 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2687 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2688 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2689 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2690 MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2691 MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2692 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2693 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2694 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2695 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2696 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2697 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2698 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2699 MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2700 PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2701 MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2702 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2703 MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2704 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2705 MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2706 MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2707 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2708 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2709 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2710 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2711 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2712 MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2713 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2714 MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2715 PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2716 MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2717 MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2718 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2719 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2720 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2721 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2722 [ftmp8]
"=&f"(ftmp[8]),
2724 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
2725 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
2726 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
2727 [addr6]
"=&r"(addr[6])
2729 [ptmp]
"r"(ptmp), [pdat]
"r"(pdat)
void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
static float alpha(float a)
void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input, int qmul)
void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
static const uint8_t offset[127][2]
void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
static const uint8_t scan8[16 *3+3]
static int weight(int i, int blen, int offset)
void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
GLint GLenum GLboolean GLsizei stride
void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
static int16_t block1[64]
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> dc
void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)