Go to the documentation of this file.
28 #define DECLARE_DOUBLE_1 double db_1
29 #define DECLARE_DOUBLE_2 double db_2
30 #define DECLARE_UINT32_T uint32_t it_1
31 #define RESTRICT_ASM_DOUBLE_1 [db_1]"=&f"(db_1)
32 #define RESTRICT_ASM_DOUBLE_2 [db_2]"=&f"(db_2)
33 #define RESTRICT_ASM_UINT32_T [it_1]"=&r"(it_1)
35 #define MMI_PCMPGTUB(dst, src1, src2) \
36 "pcmpeqb %[db_1], "#src1", "#src2" \n\t" \
37 "pmaxub %[db_2], "#src1", "#src2" \n\t" \
38 "pcmpeqb %[db_2], %[db_2], "#src1" \n\t" \
39 "xor "#dst", %[db_2], %[db_1] \n\t"
41 #define MMI_BTOH(dst_l, dst_r, src) \
42 "xor %[db_1], %[db_1], %[db_1] \n\t" \
43 "pcmpgtb %[db_2], %[db_1], "#src" \n\t" \
44 "punpcklbh "#dst_r", "#src", %[db_2] \n\t" \
45 "punpckhbh "#dst_l", "#src", %[db_2] \n\t"
47 #define MMI_VP8_LOOP_FILTER \
49 "dmtc1 %[thresh], %[ftmp3] \n\t" \
50 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
51 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
52 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
53 "pasubub %[ftmp0], %[p1], %[p0] \n\t" \
54 "pasubub %[ftmp1], %[q1], %[q0] \n\t" \
55 "pmaxub %[ftmp0], %[ftmp0], %[ftmp1] \n\t" \
56 MMI_PCMPGTUB(%[hev], %[ftmp0], %[ftmp3]) \
58 "pasubub %[ftmp1], %[p0], %[q0] \n\t" \
59 "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
60 "pasubub %[ftmp2], %[p1], %[q1] \n\t" \
61 "li %[tmp0], 0x09 \n\t" \
62 "dmtc1 %[tmp0], %[ftmp3] \n\t" \
63 PSRLB_MMI(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5], %[ftmp2]) \
64 "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
65 "dmtc1 %[e], %[ftmp3] \n\t" \
66 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
67 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
68 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
69 MMI_PCMPGTUB(%[mask], %[ftmp1], %[ftmp3]) \
70 "pmaxub %[mask], %[mask], %[ftmp0] \n\t" \
71 "pasubub %[ftmp1], %[p3], %[p2] \n\t" \
72 "pasubub %[ftmp2], %[p2], %[p1] \n\t" \
73 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
74 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
75 "pasubub %[ftmp1], %[q3], %[q2] \n\t" \
76 "pasubub %[ftmp2], %[q2], %[q1] \n\t" \
77 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
78 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
79 "dmtc1 %[i], %[ftmp3] \n\t" \
80 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
81 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
82 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
83 MMI_PCMPGTUB(%[mask], %[mask], %[ftmp3]) \
84 "pcmpeqw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
85 "xor %[mask], %[mask], %[ftmp3] \n\t" \
87 "li %[tmp0], 0x80808080 \n\t" \
88 "dmtc1 %[tmp0], %[ftmp7] \n\t" \
89 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \
90 "xor %[p2], %[p2], %[ftmp7] \n\t" \
91 "xor %[p1], %[p1], %[ftmp7] \n\t" \
92 "xor %[p0], %[p0], %[ftmp7] \n\t" \
93 "xor %[q0], %[q0], %[ftmp7] \n\t" \
94 "xor %[q1], %[q1], %[ftmp7] \n\t" \
95 "xor %[q2], %[q2], %[ftmp7] \n\t" \
96 "psubsb %[ftmp4], %[p1], %[q1] \n\t" \
97 "psubb %[ftmp5], %[q0], %[p0] \n\t" \
98 MMI_BTOH(%[ftmp1], %[ftmp0], %[ftmp5]) \
99 MMI_BTOH(%[ftmp3], %[ftmp2], %[ftmp4]) \
101 "paddh %[ftmp5], %[ftmp0], %[ftmp0] \n\t" \
102 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" \
103 "paddh %[ftmp0], %[ftmp2], %[ftmp0] \n\t" \
105 "paddh %[ftmp5], %[ftmp1], %[ftmp1] \n\t" \
106 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \
107 "paddh %[ftmp1], %[ftmp3], %[ftmp1] \n\t" \
109 "packsshb %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \
110 "and %[ftmp1], %[ftmp1], %[mask] \n\t" \
111 "and %[ftmp2], %[ftmp1], %[hev] \n\t" \
112 "li %[tmp0], 0x04040404 \n\t" \
113 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
114 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
115 "paddsb %[ftmp3], %[ftmp2], %[ftmp0] \n\t" \
116 "li %[tmp0], 0x0B \n\t" \
117 "dmtc1 %[tmp0], %[ftmp4] \n\t" \
118 PSRAB_MMI(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], %[ftmp3]) \
119 "li %[tmp0], 0x03030303 \n\t" \
120 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
121 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
122 "paddsb %[ftmp4], %[ftmp2], %[ftmp0] \n\t" \
123 "li %[tmp0], 0x0B \n\t" \
124 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
125 PSRAB_MMI(%[ftmp4], %[ftmp2], %[ftmp5], %[ftmp6], %[ftmp4]) \
126 "psubsb %[q0], %[q0], %[ftmp3] \n\t" \
127 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
129 "pcmpeqw %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
130 "xor %[hev], %[hev], %[ftmp0] \n\t" \
131 "and %[ftmp1], %[ftmp1], %[hev] \n\t" \
132 MMI_BTOH(%[ftmp5], %[ftmp6], %[ftmp1]) \
133 "li %[tmp0], 0x07 \n\t" \
134 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
135 "li %[tmp0], 0x001b001b \n\t" \
136 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
137 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
138 "li %[tmp0], 0x003f003f \n\t" \
139 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
140 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
142 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
143 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
144 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
146 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
147 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
148 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
150 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
151 "psubsb %[q0], %[q0], %[ftmp4] \n\t" \
152 "xor %[q0], %[q0], %[ftmp7] \n\t" \
153 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
154 "xor %[p0], %[p0], %[ftmp7] \n\t" \
155 "li %[tmp0], 0x00120012 \n\t" \
156 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
157 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
159 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
160 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
161 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
163 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
164 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
165 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
167 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
168 "psubsb %[q1], %[q1], %[ftmp4] \n\t" \
169 "xor %[q1], %[q1], %[ftmp7] \n\t" \
170 "paddsb %[p1], %[p1], %[ftmp4] \n\t" \
171 "xor %[p1], %[p1], %[ftmp7] \n\t" \
172 "li %[tmp0], 0x03 \n\t" \
173 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
175 "psllh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
176 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" \
177 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
178 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
180 "psllh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
181 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
182 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
183 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
185 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
186 "psubsb %[q2], %[q2], %[ftmp4] \n\t" \
187 "xor %[q2], %[q2], %[ftmp7] \n\t" \
188 "paddsb %[p2], %[p2], %[ftmp4] \n\t" \
189 "xor %[p2], %[p2], %[ftmp7] \n\t"
191 #define PUT_VP8_EPEL4_H6_MMI(src, dst) \
192 MMI_ULWC1(%[ftmp1], src, 0x00) \
193 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
194 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
196 MMI_ULWC1(%[ftmp1], src, -0x01) \
197 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
198 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
199 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
201 MMI_ULWC1(%[ftmp1], src, -0x02) \
202 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
203 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
204 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
206 MMI_ULWC1(%[ftmp1], src, 0x01) \
207 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
208 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
210 MMI_ULWC1(%[ftmp1], src, 0x02) \
211 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
212 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
213 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
215 MMI_ULWC1(%[ftmp1], src, 0x03) \
216 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
217 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
218 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
220 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
221 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
222 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
223 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
225 MMI_SWC1(%[ftmp1], dst, 0x00)
228 #define PUT_VP8_EPEL4_H4_MMI(src, dst) \
229 MMI_ULWC1(%[ftmp1], src, 0x00) \
230 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
231 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
233 MMI_ULWC1(%[ftmp1], src, -0x01) \
234 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
235 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
236 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
238 MMI_ULWC1(%[ftmp1], src, 0x01) \
239 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
240 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
242 MMI_ULWC1(%[ftmp1], src, 0x02) \
243 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
244 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
245 "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
247 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
249 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
250 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
252 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
253 MMI_SWC1(%[ftmp1], dst, 0x00)
256 #define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride) \
257 MMI_ULWC1(%[ftmp1], src, 0x00) \
258 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
259 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
261 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
262 MMI_ULWC1(%[ftmp1], src1, 0x00) \
263 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
264 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
265 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
267 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
268 MMI_ULWC1(%[ftmp1], src1, 0x00) \
269 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
270 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
271 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
273 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
274 MMI_ULWC1(%[ftmp1], src1, 0x00) \
275 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
276 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
278 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
279 MMI_ULWC1(%[ftmp1], src1, 0x00) \
280 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
281 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
282 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
284 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
285 MMI_ULWC1(%[ftmp1], src1, 0x00) \
286 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
287 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
288 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
290 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
292 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
293 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
294 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
296 MMI_SWC1(%[ftmp1], dst, 0x00)
299 #define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride) \
300 MMI_ULWC1(%[ftmp1], src, 0x00) \
301 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
302 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
304 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
305 MMI_ULWC1(%[ftmp1], src1, 0x00) \
306 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
307 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
308 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
310 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
311 MMI_ULWC1(%[ftmp1], src1, 0x00) \
312 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
313 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
315 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
316 MMI_ULWC1(%[ftmp1], src1, 0x00) \
317 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
318 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
319 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
321 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
323 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
324 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
325 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
327 MMI_SWC1(%[ftmp1], dst, 0x00)
330 #define PUT_VP8_EPEL8_H6_MMI(src, dst) \
331 MMI_ULDC1(%[ftmp1], src, 0x00) \
332 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
333 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
334 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
335 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
337 MMI_ULDC1(%[ftmp1], src, -0x01) \
338 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
339 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
340 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
341 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
342 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
343 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
345 MMI_ULDC1(%[ftmp1], src, -0x02) \
346 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
347 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
348 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
349 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
350 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
351 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
353 MMI_ULDC1(%[ftmp1], src, 0x01) \
354 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
355 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
356 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
357 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
359 MMI_ULDC1(%[ftmp1], src, 0x02) \
360 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
361 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
362 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
363 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
364 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
365 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
367 MMI_ULDC1(%[ftmp1], src, 0x03) \
368 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
369 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
370 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
371 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
372 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
373 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
375 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
376 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
378 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
379 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
380 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
381 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
382 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
384 MMI_SDC1(%[ftmp1], dst, 0x00)
387 #define PUT_VP8_EPEL8_H4_MMI(src, dst) \
388 MMI_ULDC1(%[ftmp1], src, 0x00) \
389 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
390 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
391 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
392 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
394 MMI_ULDC1(%[ftmp1], src, -0x01) \
395 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
396 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
397 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
398 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
399 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
400 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
402 MMI_ULDC1(%[ftmp1], src, 0x01) \
403 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
404 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
405 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
406 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
408 MMI_ULDC1(%[ftmp1], src, 0x02) \
409 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
410 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
411 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
412 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
413 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
414 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
416 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
417 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
419 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
420 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
421 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
422 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
424 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
425 MMI_SDC1(%[ftmp1], dst, 0x00)
428 #define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride) \
429 MMI_ULDC1(%[ftmp1], src, 0x00) \
430 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
431 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
432 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
433 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
435 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
436 MMI_ULDC1(%[ftmp1], src1, 0x00) \
437 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
438 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
439 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
440 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
441 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
442 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
444 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
445 MMI_ULDC1(%[ftmp1], src1, 0x00) \
446 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
447 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
448 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
449 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
450 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
451 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
453 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
454 MMI_ULDC1(%[ftmp1], src1, 0x00) \
455 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
456 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
457 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
458 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
460 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
461 MMI_ULDC1(%[ftmp1], src1, 0x00) \
462 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
463 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
464 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
465 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
466 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
467 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
469 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
470 MMI_ULDC1(%[ftmp1], src1, 0x00) \
471 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
472 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
473 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
474 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
475 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
476 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
478 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
479 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
481 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
482 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
483 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
484 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
485 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
487 MMI_SDC1(%[ftmp1], dst, 0x00)
490 #define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride) \
491 MMI_ULDC1(%[ftmp1], src, 0x00) \
492 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
493 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
494 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
495 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
497 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
498 MMI_ULDC1(%[ftmp1], src1, 0x00) \
499 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
500 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
501 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
502 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
503 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
504 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
506 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
507 MMI_ULDC1(%[ftmp1], src1, 0x00) \
508 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
509 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
510 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
511 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
513 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
514 MMI_ULDC1(%[ftmp1], src1, 0x00) \
515 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
516 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
517 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
518 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
519 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
520 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
522 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
523 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
525 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
526 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
527 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
528 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
529 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
531 MMI_SDC1(%[ftmp1], dst, 0x00)
534 #define PUT_VP8_BILINEAR8_H_MMI(src, dst) \
535 MMI_ULDC1(%[ftmp1], src, 0x00) \
536 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
537 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
538 "pmullh %[ftmp5], %[ftmp2], %[a] \n\t" \
539 "pmullh %[ftmp6], %[ftmp3], %[a] \n\t" \
541 MMI_ULDC1(%[ftmp1], src, 0x01) \
542 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
543 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
544 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
545 "pmullh %[ftmp3], %[ftmp3], %[b] \n\t" \
546 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
547 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
549 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
550 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
551 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
552 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
554 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
555 MMI_SDC1(%[ftmp1], dst, 0x00)
558 #define PUT_VP8_BILINEAR4_H_MMI(src, dst) \
559 MMI_ULWC1(%[ftmp1], src, 0x00) \
560 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
561 "pmullh %[ftmp3], %[ftmp2], %[a] \n\t" \
563 MMI_ULWC1(%[ftmp1], src, 0x01) \
564 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
565 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
566 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
568 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
569 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
571 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
572 MMI_SWC1(%[ftmp1], dst, 0x00)
575 #define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride) \
576 MMI_ULDC1(%[ftmp1], src, 0x00) \
577 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
578 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
579 "pmullh %[ftmp5], %[ftmp2], %[c] \n\t" \
580 "pmullh %[ftmp6], %[ftmp3], %[c] \n\t" \
582 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
583 MMI_ULDC1(%[ftmp1], src1, 0x00) \
584 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
585 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
586 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
587 "pmullh %[ftmp3], %[ftmp3], %[d] \n\t" \
588 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
589 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
591 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
592 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
593 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
594 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
596 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
597 MMI_SDC1(%[ftmp1], dst, 0x00)
600 #define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride) \
601 MMI_ULWC1(%[ftmp1], src, 0x00) \
602 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
603 "pmullh %[ftmp3], %[ftmp2], %[c] \n\t" \
605 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
606 MMI_ULWC1(%[ftmp1], src1, 0x00) \
607 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
608 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
609 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
611 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
612 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
614 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
615 MMI_SWC1(%[ftmp1], dst, 0x00)
619 {0x0000000000000000, 0x0006000600060006, 0x007b007b007b007b,
620 0x000c000c000c000c, 0x0001000100010001, 0x0000000000000000},
622 {0x0002000200020002, 0x000b000b000b000b, 0x006c006c006c006c,
623 0x0024002400240024, 0x0008000800080008, 0x0001000100010001},
625 {0x0000000000000000, 0x0009000900090009, 0x005d005d005d005d,
626 0x0032003200320032, 0x0006000600060006, 0x0000000000000000},
628 {0x0003000300030003, 0x0010001000100010, 0x004d004d004d004d,
629 0x004d004d004d004d, 0x0010001000100010, 0x0003000300030003},
631 {0x0000000000000000, 0x0006000600060006, 0x0032003200320032,
632 0x005d005d005d005d, 0x0009000900090009, 0x0000000000000000},
634 {0x0001000100010001, 0x0008000800080008, 0x0024002400240024,
635 0x006c006c006c006c, 0x000b000b000b000b, 0x0002000200020002},
637 {0x0000000000000000, 0x0001000100010001, 0x000c000c000c000c,
638 0x007b007b007b007b, 0x0006000600060006, 0x0000000000000000}
642 #define FILTER_6TAP(src, F, stride) \
643 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
644 F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \
645 F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
647 #define FILTER_4TAP(src, F, stride) \
648 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
649 F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
652 { 0, 6, 123, 12, 1, 0 },
653 { 2, 11, 108, 36, 8, 1 },
654 { 0, 9, 93, 50, 6, 0 },
655 { 3, 16, 77, 77, 16, 3 },
656 { 0, 6, 50, 93, 9, 0 },
657 { 1, 8, 36, 108, 11, 2 },
658 { 0, 1, 12, 123, 6, 0 },
661 #define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
662 #define MUL_35468(a) (((a) * 35468) >> 16)
665 #define clip_int8(n) (cm[(n) + 0x80] - 0x80)
682 f1 =
FFMIN(
a + 4, 127) >> 3;
683 f2 =
FFMIN(
a + 3, 127) >> 3;
706 f1 =
FFMIN(
a + 4, 127) >> 3;
707 f2 =
FFMIN(
a + 3, 127) >> 3;
754 a0 = (27 *
w + 63) >> 7;
755 a1 = (18 *
w + 63) >> 7;
756 a2 = (9 *
w + 63) >> 7;
785 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
794 "gsldlc1 %[q0], 0x07(%[dst]) \n\t"
795 "gsldrc1 %[q0], 0x00(%[dst]) \n\t"
796 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
797 "gsldlc1 %[p0], 0x07(%[tmp0]) \n\t"
798 "gsldrc1 %[p0], 0x00(%[tmp0]) \n\t"
799 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
800 "gsldlc1 %[p1], 0x07(%[tmp0]) \n\t"
801 "gsldrc1 %[p1], 0x00(%[tmp0]) \n\t"
802 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
803 "gsldlc1 %[p2], 0x07(%[tmp0]) \n\t"
804 "gsldrc1 %[p2], 0x00(%[tmp0]) \n\t"
805 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
806 "gsldlc1 %[p3], 0x07(%[tmp0]) \n\t"
807 "gsldrc1 %[p3], 0x00(%[tmp0]) \n\t"
808 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
809 "gsldlc1 %[q1], 0x07(%[tmp0]) \n\t"
810 "gsldrc1 %[q1], 0x00(%[tmp0]) \n\t"
811 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
812 "gsldlc1 %[q2], 0x07(%[tmp0]) \n\t"
813 "gsldrc1 %[q2], 0x00(%[tmp0]) \n\t"
814 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
815 "gsldlc1 %[q3], 0x07(%[tmp0]) \n\t"
816 "gsldrc1 %[q3], 0x00(%[tmp0]) \n\t"
819 "gssdlc1 %[q0], 0x07(%[dst]) \n\t"
820 "gssdrc1 %[q0], 0x00(%[dst]) \n\t"
821 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
822 "gssdlc1 %[p0], 0x07(%[tmp0]) \n\t"
823 "gssdrc1 %[p0], 0x00(%[tmp0]) \n\t"
824 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
825 "gssdlc1 %[p1], 0x07(%[tmp0]) \n\t"
826 "gssdrc1 %[p1], 0x00(%[tmp0]) \n\t"
827 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
828 "gssdlc1 %[p2], 0x07(%[tmp0]) \n\t"
829 "gssdrc1 %[p2], 0x00(%[tmp0]) \n\t"
830 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
831 "gssdlc1 %[q1], 0x07(%[tmp0]) \n\t"
832 "gssdrc1 %[q1], 0x00(%[tmp0]) \n\t"
833 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
834 "gssdlc1 %[q2], 0x07(%[tmp0]) \n\t"
835 "gssdrc1 %[q2], 0x00(%[tmp0]) \n\t"
836 : [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
837 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
838 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
839 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
840 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
841 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
842 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
843 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
844 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
845 [dst]
"+&r"(dst), [tmp0]
"=&r"(
tmp[0]),
855 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
859 for (
i = 0;
i < 8;
i++)
861 int hv =
hev(dst +
i * 1,
stride, hev_thresh);
870 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
879 "gsldlc1 %[p3], 0x03(%[dst]) \n\t"
880 "gsldrc1 %[p3], -0x04(%[dst]) \n\t"
881 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
882 "gsldlc1 %[p2], 0x03(%[tmp0]) \n\t"
883 "gsldrc1 %[p2], -0x04(%[tmp0]) \n\t"
884 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
885 "gsldlc1 %[p1], 0x03(%[tmp0]) \n\t"
886 "gsldrc1 %[p1], -0x04(%[tmp0]) \n\t"
887 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
888 "gsldlc1 %[p0], 0x03(%[tmp0]) \n\t"
889 "gsldrc1 %[p0], -0x04(%[tmp0]) \n\t"
890 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
891 "gsldlc1 %[q0], 0x03(%[tmp0]) \n\t"
892 "gsldrc1 %[q0], -0x04(%[tmp0]) \n\t"
893 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
894 "gsldlc1 %[q1], 0x03(%[tmp0]) \n\t"
895 "gsldrc1 %[q1], -0x04(%[tmp0]) \n\t"
896 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
897 "gsldlc1 %[q2], 0x03(%[tmp0]) \n\t"
898 "gsldrc1 %[q2], -0x04(%[tmp0]) \n\t"
899 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
900 "gsldlc1 %[q3], 0x03(%[tmp0]) \n\t"
901 "gsldrc1 %[q3], -0x04(%[tmp0]) \n\t"
904 %[
q0], %[
q1], %[q2], %[q3],
905 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
909 %[
q0], %[
q1], %[q2], %[q3],
910 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
912 "gssdlc1 %[p3], 0x03(%[dst]) \n\t"
913 "gssdrc1 %[p3], -0x04(%[dst]) \n\t"
914 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
915 "gssdlc1 %[p2], 0x03(%[dst]) \n\t"
916 "gssdrc1 %[p2], -0x04(%[dst]) \n\t"
917 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
918 "gssdlc1 %[p1], 0x03(%[dst]) \n\t"
919 "gssdrc1 %[p1], -0x04(%[dst]) \n\t"
920 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
921 "gssdlc1 %[p0], 0x03(%[dst]) \n\t"
922 "gssdrc1 %[p0], -0x04(%[dst]) \n\t"
923 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
924 "gssdlc1 %[q0], 0x03(%[dst]) \n\t"
925 "gssdrc1 %[q0], -0x04(%[dst]) \n\t"
926 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
927 "gssdlc1 %[q1], 0x03(%[dst]) \n\t"
928 "gssdrc1 %[q1], -0x04(%[dst]) \n\t"
929 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
930 "gssdlc1 %[q2], 0x03(%[dst]) \n\t"
931 "gssdrc1 %[q2], -0x04(%[dst]) \n\t"
932 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
933 "gssdlc1 %[q3], 0x03(%[dst]) \n\t"
934 "gssdrc1 %[q3], -0x04(%[dst]) \n\t"
935 : [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
936 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
937 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
938 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
939 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
940 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
941 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
942 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
943 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
944 [dst]
"+&r"(dst), [tmp0]
"=&r"(
tmp[0]),
954 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
958 for (
i = 0;
i < 8;
i++)
960 int hv =
hev(dst +
i *
stride, 1, hev_thresh);
975 MMI_LDC1(%[ftmp0], %[
dc], 0x00)
976 MMI_LDC1(%[ftmp1], %[
dc], 0x08)
977 MMI_LDC1(%[ftmp2], %[
dc], 0x10)
978 MMI_LDC1(%[ftmp3], %[
dc], 0x18)
979 "paddsh %[ftmp4], %[ftmp0], %[ftmp3] \n\t"
980 "psubsh %[ftmp5], %[ftmp0], %[ftmp3] \n\t"
981 "paddsh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
982 "psubsh %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
983 "paddsh %[ftmp0], %[ftmp4], %[ftmp6] \n\t"
984 "paddsh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
985 "psubsh %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
986 "psubsh %[ftmp3], %[ftmp5], %[ftmp7] \n\t"
987 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
988 MMI_SDC1(%[ftmp1], %[
dc], 0x08)
989 MMI_SDC1(%[ftmp2], %[
dc], 0x10)
990 MMI_SDC1(%[ftmp3], %[
dc], 0x18)
991 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
992 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
993 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
994 [ftmp6]
"=&f"(ftmp[6]),
996 [ftmp7]
"=&f"(ftmp[7])
1011 block[2][0][0] = (
dc[8] +
dc[11] + 3 +
dc[9] +
dc[10]) >> 3;
1012 block[2][1][0] = (
dc[8] -
dc[11] + 3 +
dc[9] -
dc[10]) >> 3;
1013 block[2][2][0] = (
dc[8] +
dc[11] + 3 -
dc[9] -
dc[10]) >> 3;
1014 block[2][3][0] = (
dc[8] -
dc[11] + 3 -
dc[9] +
dc[10]) >> 3;
1016 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
1017 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
1018 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
1019 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1022 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1023 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
1024 MMI_SDC1(%[ftmp0], %[
dc], 0x08)
1025 MMI_SDC1(%[ftmp0], %[
dc], 0x10)
1026 MMI_SDC1(%[ftmp0], %[
dc], 0x18)
1027 : RESTRICT_ASM_ALL64
1028 [ftmp0]
"=&f"(ftmp[0])
1033 int t00, t01, t02, t03,
t10,
t11,
t12, t13, t20, t21, t22, t23, t30, t31, t32, t33;
1035 t00 =
dc[0] +
dc[12];
1037 t20 =
dc[2] +
dc[14];
1038 t30 =
dc[3] +
dc[15];
1040 t03 =
dc[0] -
dc[12];
1041 t13 =
dc[1] -
dc[13];
1042 t23 =
dc[2] -
dc[14];
1043 t33 =
dc[3] -
dc[15];
1045 t01 =
dc[4] +
dc[ 8];
1047 t21 =
dc[6] +
dc[10];
1048 t31 =
dc[7] +
dc[11];
1050 t02 =
dc[4] -
dc[ 8];
1052 t22 =
dc[6] -
dc[10];
1053 t32 =
dc[7] -
dc[11];
1085 block[2][0][0] = (
dc[8] +
dc[11] + 3 +
dc[9] +
dc[10]) >> 3;
1086 block[2][1][0] = (
dc[8] -
dc[11] + 3 +
dc[9] -
dc[10]) >> 3;
1087 block[2][2][0] = (
dc[8] +
dc[11] + 3 -
dc[9] -
dc[10]) >> 3;
1088 block[2][3][0] = (
dc[8] -
dc[11] + 3 -
dc[9] +
dc[10]) >> 3;
1090 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
1091 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
1092 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
1093 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1104 int val = (
dc[0] + 3) >> 3;
1129 DECLARE_ALIGNED(8,
const uint64_t, ff_ph_4e7b) = {0x4e7b4e7b4e7b4e7bULL};
1130 DECLARE_ALIGNED(8,
const uint64_t, ff_ph_22a3) = {0x22a322a322a322a3ULL};
1137 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1138 MMI_LDC1(%[ftmp1], %[
block], 0x00)
1139 MMI_LDC1(%[ftmp2], %[
block], 0x08)
1140 MMI_LDC1(%[ftmp3], %[
block], 0x10)
1141 MMI_LDC1(%[ftmp4], %[
block], 0x18)
1143 "li %[tmp0], 0x02 \n\t"
1144 "mtc1 %[tmp0], %[ftmp11] \n\t"
1147 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1149 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1151 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1152 "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t"
1154 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1155 "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t"
1157 "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t"
1158 "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
1160 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1161 "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t"
1164 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
1165 "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
1167 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t"
1168 "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
1170 "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t"
1171 "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
1173 "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t"
1174 "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1176 MMI_SDC1(%[ftmp0], %[
block], 0x00)
1177 MMI_SDC1(%[ftmp0], %[
block], 0x08)
1178 MMI_SDC1(%[ftmp0], %[
block], 0x10)
1179 MMI_SDC1(%[ftmp0], %[
block], 0x18)
1182 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1185 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1187 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1189 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1190 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1191 "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t"
1192 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1193 "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1195 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1196 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1197 "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t"
1198 "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t"
1199 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1201 "li %[tmp0], 0x03 \n\t"
1202 "mtc1 %[tmp0], %[ftmp11] \n\t"
1203 "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t"
1204 "paddh %[ftmp1], %[ftmp1], %[ff_pw_4] \n\t"
1205 "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
1206 "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t"
1207 "paddh %[ftmp2], %[ftmp2], %[ff_pw_4] \n\t"
1208 "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
1209 "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t"
1210 "paddh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t"
1211 "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
1212 "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
1213 "paddh %[ftmp4], %[ftmp4], %[ff_pw_4] \n\t"
1214 "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
1217 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1219 MMI_LWC1(%[ftmp5], %[dst0], 0x00)
1220 MMI_LWC1(%[ftmp6], %[dst1], 0x00)
1221 MMI_LWC1(%[ftmp7], %[dst2], 0x00)
1222 MMI_LWC1(%[ftmp8], %[dst3], 0x00)
1224 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1225 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1226 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1227 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1229 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1230 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1231 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1232 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1234 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1235 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1236 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1237 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1239 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1240 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1241 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1242 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1243 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1244 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1245 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1246 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1247 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1248 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1252 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
1255 [ff_ph_4e7b]
"f"(ff_ph_4e7b), [ff_ph_22a3]
"f"(ff_ph_22a3)
1262 for (
i = 0;
i < 4;
i++) {
1278 for (
i = 0;
i < 4;
i++) {
1281 t2 = MUL_35468(
tmp[4 +
i]) - MUL_20091(
tmp[12 +
i]);
1282 t3 = MUL_20091(
tmp[4 +
i]) + MUL_35468(
tmp[12 +
i]);
1284 dst[0] = av_clip_uint8(dst[0] + ((
t0 +
t3 + 4) >> 3));
1285 dst[1] = av_clip_uint8(dst[1] + ((
t1 +
t2 + 4) >> 3));
1286 dst[2] = av_clip_uint8(dst[2] + ((
t1 -
t2 + 4) >> 3));
1287 dst[3] = av_clip_uint8(dst[3] + ((
t0 -
t3 + 4) >> 3));
1303 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1304 "mtc1 %[dc], %[ftmp5] \n\t"
1305 MMI_LWC1(%[ftmp1], %[dst0], 0x00)
1306 MMI_LWC1(%[ftmp2], %[dst1], 0x00)
1307 MMI_LWC1(%[ftmp3], %[dst2], 0x00)
1308 MMI_LWC1(%[ftmp4], %[dst3], 0x00)
1309 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1310 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1311 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1312 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1313 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1314 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1315 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1316 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1317 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1318 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1319 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1320 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1321 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1322 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1323 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1324 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1325 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1326 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1327 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1328 [ftmp4]
"=&f"(ftmp[4]),
1330 [ftmp5]
"=&f"(ftmp[5])
1331 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
1341 for (
i = 0;
i < 4;
i++) {
1342 dst[0] = av_clip_uint8(dst[0] +
dc);
1343 dst[1] = av_clip_uint8(dst[1] +
dc);
1344 dst[2] = av_clip_uint8(dst[2] +
dc);
1345 dst[3] = av_clip_uint8(dst[3] +
dc);
1371 int flim_I,
int hev_thresh)
1378 int flim_I,
int hev_thresh)
1386 int flim_E,
int flim_I,
int hev_thresh)
1393 int flim_E,
int flim_I,
int hev_thresh)
1401 int flim_E,
int flim_I,
int hev_thresh)
1405 for (
i = 0;
i < 16;
i++)
1407 int hv =
hev(dst +
i * 1,
stride, hev_thresh);
1416 int flim_E,
int flim_I,
int hev_thresh)
1420 for (
i = 0;
i < 16;
i++)
1422 int hv =
hev(dst +
i *
stride, 1, hev_thresh);
1431 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1438 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1448 for (
i = 0;
i < 16;
i++)
1457 for (
i = 0;
i < 16;
i++)
1463 ptrdiff_t srcstride,
int h,
int x,
int y)
1473 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1474 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1475 "ldl %[tmp0], 0x0f(%[src]) \n\t"
1476 "ldr %[tmp0], 0x08(%[src]) \n\t"
1477 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
1478 "ldl %[tmp1], 0x0f(%[addr0]) \n\t"
1479 "ldr %[tmp1], 0x08(%[addr0]) \n\t"
1480 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1481 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1482 "sdl %[tmp0], 0x0f(%[dst]) \n\t"
1483 "sdr %[tmp0], 0x08(%[dst]) \n\t"
1484 "addiu %[h], %[h], -0x02 \n\t"
1485 MMI_SDC1(%[ftmp1], %[addr1], 0x00)
1486 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1487 "sdl %[tmp1], 0x0f(%[addr1]) \n\t"
1488 "sdr %[tmp1], 0x08(%[addr1]) \n\t"
1489 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1490 "bnez %[h], 1b \n\t"
1491 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1492 [tmp0]
"=&r"(
tmp[0]), [tmp1]
"=&r"(
tmp[1]),
1494 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1495 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1497 : [dststride]
"r"((
mips_reg)dststride),
1498 [srcstride]
"r"((
mips_reg)srcstride)
1504 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1505 memcpy(dst,
src, 16);
1510 ptrdiff_t srcstride,
int h,
int x,
int y)
1520 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1521 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1522 "ldl %[tmp0], 0x07(%[addr0]) \n\t"
1523 "ldr %[tmp0], 0x00(%[addr0]) \n\t"
1524 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1525 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1526 "addiu %[h], %[h], -0x02 \n\t"
1527 "sdl %[tmp0], 0x07(%[addr1]) \n\t"
1528 "sdr %[tmp0], 0x00(%[addr1]) \n\t"
1529 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1530 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1531 "bnez %[h], 1b \n\t"
1532 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1534 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1535 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1537 : [dststride]
"r"((
mips_reg)dststride),
1538 [srcstride]
"r"((
mips_reg)srcstride)
1544 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1545 memcpy(dst,
src, 8);
1550 ptrdiff_t srcstride,
int h,
int x,
int y)
1560 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1561 MMI_LWC1(%[ftmp0], %[
src], 0x00)
1562 "lwl %[tmp0], 0x03(%[addr0]) \n\t"
1563 "lwr %[tmp0], 0x00(%[addr0]) \n\t"
1564 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1565 MMI_SWC1(%[ftmp0], %[dst], 0x00)
1566 "addiu %[h], %[h], -0x02 \n\t"
1567 "swl %[tmp0], 0x03(%[addr1]) \n\t"
1568 "swr %[tmp0], 0x00(%[addr1]) \n\t"
1569 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1570 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1571 "bnez %[h], 1b \n\t"
1572 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1574 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1575 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1577 : [dststride]
"r"((
mips_reg)dststride),
1578 [srcstride]
"r"((
mips_reg)srcstride)
1584 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1585 memcpy(dst,
src, 4);
1590 ptrdiff_t srcstride,
int h,
int mx,
int my)
1619 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1620 "li %[tmp0], 0x07 \n\t"
1621 "mtc1 %[tmp0], %[ftmp4] \n\t"
1631 "addiu %[h], %[h], -0x01 \n\t"
1632 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1633 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1634 "bnez %[h], 1b \n\t"
1635 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1636 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1637 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1638 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1639 [ftmp8]
"=&f"(ftmp[8]),
1640 [tmp0]
"=&r"(
tmp[0]),
1642 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1644 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1646 [srcstride]
"r"((
mips_reg)srcstride),
1647 [dststride]
"r"((
mips_reg)dststride),
1657 for (y = 0; y <
h; y++) {
1658 for (x = 0; x < 16; x++)
1667 ptrdiff_t srcstride,
int h,
int mx,
int my)
1686 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1687 "li %[tmp0], 0x07 \n\t"
1688 "mtc1 %[tmp0], %[ftmp4] \n\t"
1693 "addiu %[h], %[h], -0x01 \n\t"
1694 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1695 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1696 "bnez %[h], 1b \n\t"
1697 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1698 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1699 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1700 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1701 [ftmp8]
"=&f"(ftmp[8]),
1702 [tmp0]
"=&r"(
tmp[0]),
1705 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1707 [srcstride]
"r"((
mips_reg)srcstride),
1708 [dststride]
"r"((
mips_reg)dststride),
1718 for (y = 0; y <
h; y++) {
1719 for (x = 0; x < 8; x++)
1728 ptrdiff_t srcstride,
int h,
int mx,
int my)
1743 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1744 "li %[tmp0], 0x07 \n\t"
1745 "mtc1 %[tmp0], %[ftmp4] \n\t"
1750 "addiu %[h], %[h], -0x01 \n\t"
1751 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1752 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1753 "bnez %[h], 1b \n\t"
1754 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1755 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1756 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1757 [tmp0]
"=&r"(
tmp[0]),
1760 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1762 [srcstride]
"r"((
mips_reg)srcstride),
1763 [dststride]
"r"((
mips_reg)dststride),
1773 for (y = 0; y <
h; y++) {
1774 for (x = 0; x < 4; x++)
1783 ptrdiff_t srcstride,
int h,
int mx,
int my)
1812 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1813 "li %[tmp0], 0x07 \n\t"
1814 "mtc1 %[tmp0], %[ftmp4] \n\t"
1824 "addiu %[h], %[h], -0x01 \n\t"
1825 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1826 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1827 "bnez %[h], 1b \n\t"
1828 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1829 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1830 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1831 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1832 [ftmp8]
"=&f"(ftmp[8]),
1833 [tmp0]
"=&r"(
tmp[0]),
1835 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1837 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1839 [srcstride]
"r"((
mips_reg)srcstride),
1840 [dststride]
"r"((
mips_reg)dststride),
1851 for (y = 0; y <
h; y++) {
1852 for (x = 0; x < 16; x++)
1861 ptrdiff_t srcstride,
int h,
int mx,
int my)
1880 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1881 "li %[tmp0], 0x07 \n\t"
1882 "mtc1 %[tmp0], %[ftmp4] \n\t"
1887 "addiu %[h], %[h], -0x01 \n\t"
1888 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1889 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1890 "bnez %[h], 1b \n\t"
1891 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1892 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1893 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1894 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1895 [ftmp8]
"=&f"(ftmp[8]),
1896 [tmp0]
"=&r"(
tmp[0]),
1899 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1901 [srcstride]
"r"((
mips_reg)srcstride),
1902 [dststride]
"r"((
mips_reg)dststride),
1913 for (y = 0; y <
h; y++) {
1914 for (x = 0; x < 8; x++)
1923 ptrdiff_t srcstride,
int h,
int mx,
int my)
1938 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1939 "li %[tmp0], 0x07 \n\t"
1940 "mtc1 %[tmp0], %[ftmp4] \n\t"
1945 "addiu %[h], %[h], -0x01 \n\t"
1946 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1947 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1948 "bnez %[h], 1b \n\t"
1949 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1950 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1951 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1952 [tmp0]
"=&r"(
tmp[0]),
1955 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1957 [srcstride]
"r"((
mips_reg)srcstride),
1958 [dststride]
"r"((
mips_reg)dststride),
1969 for (y = 0; y <
h; y++) {
1970 for (x = 0; x < 4; x++)
1979 ptrdiff_t srcstride,
int h,
int mx,
int my)
2008 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2009 "li %[tmp0], 0x07 \n\t"
2010 "mtc1 %[tmp0], %[ftmp4] \n\t"
2020 "addiu %[h], %[h], -0x01 \n\t"
2021 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2022 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2023 "bnez %[h], 1b \n\t"
2024 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2025 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2026 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2027 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2028 [ftmp8]
"=&f"(ftmp[8]),
2029 [tmp0]
"=&r"(
tmp[0]),
2031 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2034 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2036 [srcstride]
"r"((
mips_reg)srcstride),
2037 [dststride]
"r"((
mips_reg)dststride),
2047 for (y = 0; y <
h; y++) {
2048 for (x = 0; x < 16; x++)
2057 ptrdiff_t srcstride,
int h,
int mx,
int my)
2077 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2078 "li %[tmp0], 0x07 \n\t"
2079 "mtc1 %[tmp0], %[ftmp4] \n\t"
2084 "addiu %[h], %[h], -0x01 \n\t"
2085 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2086 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2087 "bnez %[h], 1b \n\t"
2088 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2089 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2090 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2091 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2092 [ftmp8]
"=&f"(ftmp[8]),
2093 [tmp0]
"=&r"(
tmp[0]),
2097 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2099 [srcstride]
"r"((
mips_reg)srcstride),
2100 [dststride]
"r"((
mips_reg)dststride),
2110 for (y = 0; y <
h; y++) {
2111 for (x = 0; x < 8; x++)
2120 ptrdiff_t srcstride,
int h,
int mx,
int my)
2136 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2137 "li %[tmp0], 0x07 \n\t"
2138 "mtc1 %[tmp0], %[ftmp4] \n\t"
2143 "addiu %[h], %[h], -0x01 \n\t"
2144 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2145 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2146 "bnez %[h], 1b \n\t"
2147 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2148 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2149 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2150 [tmp0]
"=&r"(
tmp[0]),
2154 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2156 [srcstride]
"r"((
mips_reg)srcstride),
2157 [dststride]
"r"((
mips_reg)dststride),
2167 for (y = 0; y <
h; y++) {
2168 for (x = 0; x < 4; x++)
2177 ptrdiff_t srcstride,
int h,
int mx,
int my)
2206 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2207 "li %[tmp0], 0x07 \n\t"
2208 "mtc1 %[tmp0], %[ftmp4] \n\t"
2218 "addiu %[h], %[h], -0x01 \n\t"
2219 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2220 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2221 "bnez %[h], 1b \n\t"
2222 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2223 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2224 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2225 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2226 [ftmp8]
"=&f"(ftmp[8]),
2227 [tmp0]
"=&r"(
tmp[0]),
2229 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2232 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2234 [srcstride]
"r"((
mips_reg)srcstride),
2235 [dststride]
"r"((
mips_reg)dststride),
2246 for (y = 0; y <
h; y++) {
2247 for (x = 0; x < 16; x++)
2256 ptrdiff_t srcstride,
int h,
int mx,
int my)
2276 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2277 "li %[tmp0], 0x07 \n\t"
2278 "mtc1 %[tmp0], %[ftmp4] \n\t"
2283 "addiu %[h], %[h], -0x01 \n\t"
2284 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2285 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2286 "bnez %[h], 1b \n\t"
2287 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2288 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2289 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2290 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2291 [ftmp8]
"=&f"(ftmp[8]),
2292 [tmp0]
"=&r"(
tmp[0]),
2296 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2298 [srcstride]
"r"((
mips_reg)srcstride),
2299 [dststride]
"r"((
mips_reg)dststride),
2310 for (y = 0; y <
h; y++) {
2311 for (x = 0; x < 8; x++)
2320 ptrdiff_t srcstride,
int h,
int mx,
int my)
2336 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2337 "li %[tmp0], 0x07 \n\t"
2338 "mtc1 %[tmp0], %[ftmp4] \n\t"
2343 "addiu %[h], %[h], -0x01 \n\t"
2344 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2345 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2346 "bnez %[h], 1b \n\t"
2347 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2348 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2349 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2350 [tmp0]
"=&r"(
tmp[0]),
2354 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2356 [srcstride]
"r"((
mips_reg)srcstride),
2357 [dststride]
"r"((
mips_reg)dststride),
2368 for (y = 0; y <
h; y++) {
2369 for (x = 0; x < 4; x++)
2378 ptrdiff_t srcstride,
int h,
int mx,
int my)
2386 tmp = tmp_array + 16;
2397 for (y = 0; y <
h + 3; y++) {
2398 for (x = 0; x < 16; x++)
2404 tmp = tmp_array + 16;
2407 for (y = 0; y <
h; y++) {
2408 for (x = 0; x < 16; x++)
2417 ptrdiff_t srcstride,
int h,
int mx,
int my)
2425 tmp = tmp_array + 8;
2436 for (y = 0; y <
h + 3; y++) {
2437 for (x = 0; x < 8; x++)
2443 tmp = tmp_array + 8;
2446 for (y = 0; y <
h; y++) {
2447 for (x = 0; x < 8; x++)
2456 ptrdiff_t srcstride,
int h,
int mx,
int my)
2464 tmp = tmp_array + 4;
2475 for (y = 0; y <
h + 3; y++) {
2476 for (x = 0; x < 4; x++)
2481 tmp = tmp_array + 4;
2484 for (y = 0; y <
h; y++) {
2485 for (x = 0; x < 4; x++)
2494 ptrdiff_t srcstride,
int h,
int mx,
int my)
2500 src -= 2 * srcstride;
2502 tmp = tmp_array + 32;
2511 src -= 2 * srcstride;
2513 for (y = 0; y <
h + 5; y++) {
2514 for (x = 0; x < 16; x++)
2520 tmp = tmp_array + 32;
2523 for (y = 0; y <
h; y++) {
2524 for (x = 0; x < 16; x++)
2533 ptrdiff_t srcstride,
int h,
int mx,
int my)
2539 src -= 2 * srcstride;
2541 tmp = tmp_array + 16;
2550 src -= 2 * srcstride;
2552 for (y = 0; y <
h + 5; y++) {
2553 for (x = 0; x < 8; x++)
2559 tmp = tmp_array + 16;
2562 for (y = 0; y <
h; y++) {
2563 for (x = 0; x < 8; x++)
2572 ptrdiff_t srcstride,
int h,
int mx,
int my)
2578 src -= 2 * srcstride;
2580 tmp = tmp_array + 8;
2589 src -= 2 * srcstride;
2591 for (y = 0; y <
h + 5; y++) {
2592 for (x = 0; x < 4; x++)
2598 tmp = tmp_array + 8;
2601 for (y = 0; y <
h; y++) {
2602 for (x = 0; x < 4; x++)
2611 ptrdiff_t srcstride,
int h,
int mx,
int my)
2619 tmp = tmp_array + 16;
2630 for (y = 0; y <
h + 3; y++) {
2631 for (x = 0; x < 16; x++)
2637 tmp = tmp_array + 16;
2640 for (y = 0; y <
h; y++) {
2641 for (x = 0; x < 16; x++)
2650 ptrdiff_t srcstride,
int h,
int mx,
int my)
2658 tmp = tmp_array + 8;
2669 for (y = 0; y <
h + 3; y++) {
2670 for (x = 0; x < 8; x++)
2676 tmp = tmp_array + 8;
2679 for (y = 0; y <
h; y++) {
2680 for (x = 0; x < 8; x++)
2689 ptrdiff_t srcstride,
int h,
int mx,
int my)
2697 tmp = tmp_array + 4;
2708 for (y = 0; y <
h + 3; y++) {
2709 for (x = 0; x < 4; x++)
2715 tmp = tmp_array + 4;
2718 for (y = 0; y <
h; y++) {
2719 for (x = 0; x < 4; x++)
2728 ptrdiff_t srcstride,
int h,
int mx,
int my)
2734 src -= 2 * srcstride;
2736 tmp = tmp_array + 32;
2745 src -= 2 * srcstride;
2747 for (y = 0; y <
h + 5; y++) {
2748 for (x = 0; x < 16; x++)
2754 tmp = tmp_array + 32;
2757 for (y = 0; y <
h; y++) {
2758 for (x = 0; x < 16; x++)
2767 ptrdiff_t srcstride,
int h,
int mx,
int my)
2773 src -= 2 * srcstride;
2775 tmp = tmp_array + 16;
2784 src -= 2 * srcstride;
2786 for (y = 0; y <
h + 5; y++) {
2787 for (x = 0; x < 8; x++)
2793 tmp = tmp_array + 16;
2796 for (y = 0; y <
h; y++) {
2797 for (x = 0; x < 8; x++)
2806 ptrdiff_t srcstride,
int h,
int mx,
int my)
2812 src -= 2 * srcstride;
2814 tmp = tmp_array + 8;
2823 src -= 2 * srcstride;
2825 for (y = 0; y <
h + 5; y++) {
2826 for (x = 0; x < 4; x++)
2832 tmp = tmp_array + 8;
2835 for (y = 0; y <
h; y++) {
2836 for (x = 0; x < 4; x++)
2845 ptrdiff_t sstride,
int h,
int mx,
int my)
2848 int a = 8 - mx,
b = mx;
2874 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2875 "li %[tmp0], 0x03 \n\t"
2876 "mtc1 %[tmp0], %[ftmp4] \n\t"
2877 "pshufh %[a], %[a], %[ftmp0] \n\t"
2878 "pshufh %[b], %[b], %[ftmp0] \n\t"
2888 "addiu %[h], %[h], -0x01 \n\t"
2889 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
2890 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
2891 "bnez %[h], 1b \n\t"
2892 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2893 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2894 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2895 [ftmp6]
"=&f"(ftmp[6]),
2896 [tmp0]
"=&r"(
tmp[0]),
2898 [dst0]
"=&r"(dst0), [
src0]
"=&r"(
src0),
2900 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
2901 [
a]
"+&f"(
a), [
b]
"+&f"(
b)
2908 int a = 8 - mx,
b = mx;
2911 for (y = 0; y <
h; y++) {
2912 for (x = 0; x < 16; x++)
2913 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
2921 ptrdiff_t sstride,
int h,
int mx,
int my)
2924 int c = 8 - my, d = my;
2941 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2942 "li %[tmp0], 0x03 \n\t"
2943 "mtc1 %[tmp0], %[ftmp4] \n\t"
2944 "pshufh %[c], %[c], %[ftmp0] \n\t"
2945 "pshufh %[d], %[d], %[ftmp0] \n\t"
2955 "addiu %[h], %[h], -0x01 \n\t"
2956 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
2957 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
2958 "bnez %[h], 1b \n\t"
2959 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2960 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2961 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2962 [ftmp6]
"=&f"(ftmp[6]),
2963 [tmp0]
"=&r"(
tmp[0]),
2965 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2968 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
2969 [
c]
"+&f"(
c), [d]
"+&f"(d)
2976 int c = 8 - my, d = my;
2979 for (y = 0; y <
h; y++) {
2980 for (x = 0; x < 16; x++)
2981 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
2989 ptrdiff_t sstride,
int h,
int mx,
int my)
2998 int a = 8 - mx,
b = mx;
2999 int c = 8 - my, d = my;
3004 for (y = 0; y <
h + 1; y++) {
3005 for (x = 0; x < 16; x++)
3013 for (y = 0; y <
h; y++) {
3014 for (x = 0; x < 16; x++)
3015 dst[x] = (
c *
tmp[x] + d *
tmp[x + 16] + 4) >> 3;
3023 ptrdiff_t sstride,
int h,
int mx,
int my)
3026 int a = 8 - mx,
b = mx;
3042 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3043 "li %[tmp0], 0x03 \n\t"
3044 "mtc1 %[tmp0], %[ftmp4] \n\t"
3045 "pshufh %[a], %[a], %[ftmp0] \n\t"
3046 "pshufh %[b], %[b], %[ftmp0] \n\t"
3051 "addiu %[h], %[h], -0x01 \n\t"
3052 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3053 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3054 "bnez %[h], 1b \n\t"
3055 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3056 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3057 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3058 [ftmp6]
"=&f"(ftmp[6]),
3059 [tmp0]
"=&r"(
tmp[0]),
3062 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3063 [
a]
"+&f"(
a), [
b]
"+&f"(
b)
3070 int a = 8 - mx,
b = mx;
3073 for (y = 0; y <
h; y++) {
3074 for (x = 0; x < 8; x++)
3075 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
3083 ptrdiff_t sstride,
int h,
int mx,
int my)
3086 int c = 8 - my, d = my;
3103 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3104 "li %[tmp0], 0x03 \n\t"
3105 "mtc1 %[tmp0], %[ftmp4] \n\t"
3106 "pshufh %[c], %[c], %[ftmp0] \n\t"
3107 "pshufh %[d], %[d], %[ftmp0] \n\t"
3112 "addiu %[h], %[h], -0x01 \n\t"
3113 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3114 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3115 "bnez %[h], 1b \n\t"
3116 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3117 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3118 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3119 [ftmp6]
"=&f"(ftmp[6]),
3120 [tmp0]
"=&r"(
tmp[0]),
3124 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3125 [
c]
"+&f"(
c), [d]
"+&f"(d)
3132 int c = 8 - my, d = my;
3135 for (y = 0; y <
h; y++) {
3136 for (x = 0; x < 8; x++)
3137 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
3145 ptrdiff_t sstride,
int h,
int mx,
int my)
3154 int a = 8 - mx,
b = mx;
3155 int c = 8 - my, d = my;
3160 for (y = 0; y <
h + 1; y++) {
3161 for (x = 0; x < 8; x++)
3169 for (y = 0; y <
h; y++) {
3170 for (x = 0; x < 8; x++)
3171 dst[x] = (
c *
tmp[x] + d *
tmp[x + 8] + 4) >> 3;
3179 ptrdiff_t sstride,
int h,
int mx,
int my)
3182 int a = 8 - mx,
b = mx;
3195 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3196 "li %[tmp0], 0x03 \n\t"
3197 "mtc1 %[tmp0], %[ftmp4] \n\t"
3198 "pshufh %[a], %[a], %[ftmp0] \n\t"
3199 "pshufh %[b], %[b], %[ftmp0] \n\t"
3204 "addiu %[h], %[h], -0x01 \n\t"
3205 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3206 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3207 "bnez %[h], 1b \n\t"
3208 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3209 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3210 [ftmp4]
"=&f"(ftmp[4]),
3211 [tmp0]
"=&r"(
tmp[0]),
3215 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3216 [
a]
"+&f"(
a), [
b]
"+&f"(
b)
3223 int a = 8 - mx,
b = mx;
3226 for (y = 0; y <
h; y++) {
3227 for (x = 0; x < 4; x++)
3228 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
3236 ptrdiff_t sstride,
int h,
int mx,
int my)
3239 int c = 8 - my, d = my;
3253 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3254 "li %[tmp0], 0x03 \n\t"
3255 "mtc1 %[tmp0], %[ftmp4] \n\t"
3256 "pshufh %[c], %[c], %[ftmp0] \n\t"
3257 "pshufh %[d], %[d], %[ftmp0] \n\t"
3262 "addiu %[h], %[h], -0x01 \n\t"
3263 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3264 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3265 "bnez %[h], 1b \n\t"
3266 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3267 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3268 [ftmp4]
"=&f"(ftmp[4]),
3269 [tmp0]
"=&r"(
tmp[0]),
3274 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3275 [
c]
"+&f"(
c), [d]
"+&f"(d)
3282 int c = 8 - my, d = my;
3285 for (y = 0; y <
h; y++) {
3286 for (x = 0; x < 4; x++)
3287 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
3295 ptrdiff_t sstride,
int h,
int mx,
int my)
3304 int a = 8 - mx,
b = mx;
3305 int c = 8 - my, d = my;
3310 for (y = 0; y <
h + 1; y++) {
3311 for (x = 0; x < 4; x++)
3319 for (y = 0; y <
h; y++) {
3320 for (x = 0; x < 4; x++)
3321 dst[x] = (
c *
tmp[x] + d *
tmp[x + 4] + 4) >> 3;
#define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride)
static const uint8_t q1[256]
void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define FILTER_4TAP(src, F, stride)
static av_always_inline void vp8_filter_common_isnot4tap(uint8_t *p, ptrdiff_t stride)
void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
void ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
void ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
#define RESTRICT_ASM_DOUBLE_1
void ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
#define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride)
void ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16])
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
#define PUT_VP8_BILINEAR4_H_MMI(src, dst)
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 4X4 half word packaged data.
void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
static av_always_inline void vp8_filter_common_is4tap(uint8_t *p, ptrdiff_t stride)
void ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static double val(void *priv, double ch)
static const uint64_t fourtap_subpel_filters[7][6]
void ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static const uint16_t mask[17]
static av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride)
#define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride)
void ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
void ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static const uint8_t q0[256]
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
#define FILTER_6TAP(src, F, stride)
void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR8_H_MMI(src, dst)
#define RESTRICT_ASM_UINT32_T
#define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride)
void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define MMI_VP8_LOOP_FILTER
void ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL4_H4_MMI(src, dst)
void ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define RESTRICT_ASM_DOUBLE_2
void ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
void ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
void ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
void ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
static av_always_inline int vp8_normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I)
void ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define DECLARE_ALIGNED(n, t, v)
void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define i(width, name, range_min, range_max)
void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride)
void ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_H4_MMI(src, dst)
void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
void ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 8x8 byte packaged data.
#define PUT_VP8_EPEL4_H6_MMI(src, dst)
void ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
The exact code depends on how similar the blocks are and how related they are to the block
#define PUT_VP8_EPEL8_H6_MMI(src, dst)
void ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static const uint8_t subpel_filters[7][6]