FFmpeg
h264qpel_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264qpel
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "h264dsp_mips.h"
25 #include "hpeldsp_mips.h"
28 
29 static inline void copy_block4_mmi(uint8_t *dst, const uint8_t *src,
30  int dstStride, int srcStride, int h)
31 {
32  double ftmp[1];
34 
35  __asm__ volatile (
36  "1: \n\t"
37  MMI_ULWC1(%[ftmp0], %[src], 0x00)
38  MMI_SWC1(%[ftmp0], %[dst], 0x00)
39  "addi %[h], %[h], -0x01 \n\t"
40  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
41  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
42  "bnez %[h], 1b \n\t"
43  : [ftmp0]"=&f"(ftmp[0]),
44  [dst]"+&r"(dst), [src]"+&r"(src),
46  [h]"+&r"(h)
47  : [dstStride]"r"((mips_reg)dstStride),
48  [srcStride]"r"((mips_reg)srcStride)
49  : "memory"
50  );
51 }
52 
53 static inline void copy_block8_mmi(uint8_t *dst, const uint8_t *src,
54  int dstStride, int srcStride, int h)
55 {
56  double ftmp[1];
58 
59  __asm__ volatile (
60  "1: \n\t"
61  MMI_ULDC1(%[ftmp0], %[src], 0x00)
62  MMI_SDC1(%[ftmp0], %[dst], 0x00)
63  "addi %[h], %[h], -0x01 \n\t"
64  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
65  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
66  "bnez %[h], 1b \n\t"
67  : [ftmp0]"=&f"(ftmp[0]),
69  [dst]"+&r"(dst), [src]"+&r"(src),
70  [h]"+&r"(h)
71  : [dstStride]"r"((mips_reg)dstStride),
72  [srcStride]"r"((mips_reg)srcStride)
73  : "memory"
74  );
75 }
76 
77 static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src,
78  int dstStride, int srcStride, int h)
79 {
80  double ftmp[1];
81  uint64_t tmp[1];
83 
84  __asm__ volatile (
85  "1: \n\t"
86  MMI_ULDC1(%[ftmp0], %[src], 0x00)
87  "ldl %[tmp0], 0x0f(%[src]) \n\t"
88  "ldr %[tmp0], 0x08(%[src]) \n\t"
89  MMI_SDC1(%[ftmp0], %[dst], 0x00)
90  "sdl %[tmp0], 0x0f(%[dst]) \n\t"
91  "sdr %[tmp0], 0x08(%[dst]) \n\t"
92  "addi %[h], %[h], -0x01 \n\t"
93  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
94  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
95  "bnez %[h], 1b \n\t"
96  : [ftmp0]"=&f"(ftmp[0]),
97  [tmp0]"=&r"(tmp[0]),
99  [dst]"+&r"(dst), [src]"+&r"(src),
100  [h]"+&r"(h)
101  : [dstStride]"r"((mips_reg)dstStride),
102  [srcStride]"r"((mips_reg)srcStride)
103  : "memory"
104  );
105 }
106 
107 #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
108 #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
109 static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
110  int dstStride, int srcStride)
111 {
112  double ftmp[10];
113  uint64_t tmp[1];
115 
116  __asm__ volatile (
117  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
118  "dli %[tmp0], 0x04 \n\t"
119  "1: \n\t"
120  MMI_ULWC1(%[ftmp1], %[src], -0x02)
121  MMI_ULWC1(%[ftmp2], %[src], -0x01)
122  MMI_ULWC1(%[ftmp3], %[src], 0x00)
123  MMI_ULWC1(%[ftmp4], %[src], 0x01)
124  MMI_ULWC1(%[ftmp5], %[src], 0x02)
125  MMI_ULWC1(%[ftmp6], %[src], 0x03)
126 
127  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
128  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
129  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
130  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
131  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
132  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
133  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
134  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
135  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
136  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
137  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
138  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
139  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
140  "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
141  "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
142  "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
143  MMI_SWC1(%[ftmp9], %[dst], 0x00)
144  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
145  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
146  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
147  "bnez %[tmp0], 1b \n\t"
148  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
149  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
150  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
151  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
152  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
153  [tmp0]"=&r"(tmp[0]),
155  [dst]"+&r"(dst), [src]"+&r"(src)
156  : [dstStride]"r"((mips_reg)dstStride),
157  [srcStride]"r"((mips_reg)srcStride),
158  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
159  [ff_pw_16]"f"(ff_pw_16.f)
160  : "memory"
161  );
162 }
163 
164 static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
165  int dstStride, int srcStride)
166 {
167  double ftmp[11];
168  uint64_t tmp[1];
170 
171  __asm__ volatile (
172  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
173  "dli %[tmp0], 0x08 \n\t"
174  "1: \n\t"
175  MMI_ULDC1(%[ftmp1], %[src], -0x02)
176  MMI_ULDC1(%[ftmp2], %[src], -0x01)
177  MMI_ULDC1(%[ftmp3], %[src], 0x00)
178  MMI_ULDC1(%[ftmp4], %[src], 0x01)
179  MMI_ULDC1(%[ftmp5], %[src], 0x02)
180  MMI_ULDC1(%[ftmp6], %[src], 0x03)
181  "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
182  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
183  "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
184  "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
185  "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
186  "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
187  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
188  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
189  "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
190  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
191  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
192  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
193  "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
194  "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
195  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
196  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
197  "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
198  "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
199  "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
200  "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
201  "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
202  "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
203  "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
204  "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
205  "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
206  "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
207  "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
208  "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
209  "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
210  "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
211  "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
212  MMI_SDC1(%[ftmp9], %[dst], 0x00)
213  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
214  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
215  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
216  "bnez %[tmp0], 1b \n\t"
217  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
218  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
219  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
220  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
221  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
222  [ftmp10]"=&f"(ftmp[10]),
223  [tmp0]"=&r"(tmp[0]),
225  [dst]"+&r"(dst), [src]"+&r"(src)
226  : [dstStride]"r"((mips_reg)dstStride),
227  [srcStride]"r"((mips_reg)srcStride),
228  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
229  [ff_pw_16]"f"(ff_pw_16.f)
230  : "memory"
231  );
232 }
233 
234 static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
235  int dstStride, int srcStride)
236 {
237  put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
238  put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
239  src += 8*srcStride;
240  dst += 8*dstStride;
241  put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
242  put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
243 }
244 
245 static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
246  int dstStride, int srcStride)
247 {
248  double ftmp[11];
249  uint64_t tmp[1];
251 
252  __asm__ volatile (
253  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
254  "dli %[tmp0], 0x04 \n\t"
255  "1: \n\t"
256  MMI_ULWC1(%[ftmp1], %[src], -0x02)
257  MMI_ULWC1(%[ftmp2], %[src], -0x01)
258  MMI_ULWC1(%[ftmp3], %[src], 0x00)
259  MMI_ULWC1(%[ftmp4], %[src], 0x01)
260  MMI_ULWC1(%[ftmp5], %[src], 0x02)
261  MMI_ULWC1(%[ftmp6], %[src], 0x03)
262  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
263  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
264  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
265  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
266  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
267  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
268  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
269  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
270  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
271  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
272  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
273  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
274  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
275  "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
276  "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
277  "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
278  MMI_LWC1(%[ftmp10], %[dst], 0x00)
279  "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
280  MMI_SWC1(%[ftmp9], %[dst], 0x00)
281  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
282  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
283  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
284  "bnez %[tmp0], 1b \n\t"
285  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
286  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
287  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
288  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
289  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
290  [ftmp10]"=&f"(ftmp[10]),
291  [tmp0]"=&r"(tmp[0]),
293  [dst]"+&r"(dst), [src]"+&r"(src)
294  : [dstStride]"r"((mips_reg)dstStride),
295  [srcStride]"r"((mips_reg)srcStride),
296  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
297  [ff_pw_16]"f"(ff_pw_16.f)
298  : "memory"
299  );
300 }
301 
302 static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
303  int dstStride, int srcStride)
304 {
305  double ftmp[11];
306  uint64_t tmp[1];
308 
309  __asm__ volatile (
310  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
311  "dli %[tmp0], 0x08 \n\t"
312  "1: \n\t"
313  MMI_ULDC1(%[ftmp1], %[src], -0x02)
314  MMI_ULDC1(%[ftmp2], %[src], -0x01)
315  MMI_ULDC1(%[ftmp3], %[src], 0x00)
316  MMI_ULDC1(%[ftmp4], %[src], 0x01)
317  MMI_ULDC1(%[ftmp5], %[src], 0x02)
318  MMI_ULDC1(%[ftmp6], %[src], 0x03)
319  "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
320  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
321  "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
322  "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
323  "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
324  "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
325  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
326  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
327  "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
328  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
329  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
330  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
331  "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
332  "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
333  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
334  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
335  "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
336  "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
337  "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
338  "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
339  "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
340  "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
341  "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
342  "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
343  "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
344  "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
345  "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
346  "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
347  "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
348  "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
349  "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
350  MMI_LDC1(%[ftmp10], %[dst], 0x00)
351  "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
352  MMI_SDC1(%[ftmp9], %[dst], 0x00)
353  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
354  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
355  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
356  "bnez %[tmp0], 1b \n\t"
357  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
358  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
359  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
360  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
361  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
362  [ftmp10]"=&f"(ftmp[10]),
363  [tmp0]"=&r"(tmp[0]),
365  [dst]"+&r"(dst), [src]"+&r"(src)
366  : [dstStride]"r"((mips_reg)dstStride),
367  [srcStride]"r"((mips_reg)srcStride),
368  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
369  [ff_pw_16]"f"(ff_pw_16.f)
370  : "memory"
371  );
372 }
373 
374 static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
375  int dstStride, int srcStride)
376 {
377  avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
378  avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
379  src += 8*srcStride;
380  dst += 8*dstStride;
381  avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
382  avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
383 }
384 
385 static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
386  int dstStride, int srcStride)
387 {
388  double ftmp[12];
389  uint64_t tmp[1];
391 
392  src -= 2 * srcStride;
393 
394  __asm__ volatile (
395  ".set push \n\t"
396  ".set noreorder \n\t"
397  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
398  "dli %[tmp0], 0x02 \n\t"
399  MMI_LWC1(%[ftmp1], %[src], 0x00)
400  "mtc1 %[tmp0], %[ftmp10] \n\t"
401  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
402  "dli %[tmp0], 0x05 \n\t"
403  MMI_LWC1(%[ftmp2], %[src], 0x00)
404  "mtc1 %[tmp0], %[ftmp11] \n\t"
405  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
406  MMI_LWC1(%[ftmp3], %[src], 0x00)
407  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
408  MMI_LWC1(%[ftmp4], %[src], 0x00)
409  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
410  MMI_LWC1(%[ftmp5], %[src], 0x00)
411  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
412  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
413  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
414  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
415  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
416  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
417  MMI_LWC1(%[ftmp6], %[src], 0x00)
418  "paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
419  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
420  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
421  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
422  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
423  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
424  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
425  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
426  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
427  "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
428  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
429  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
430  MMI_SWC1(%[ftmp7], %[dst], 0x00)
431  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
432  MMI_LWC1(%[ftmp1], %[src], 0x00)
433  "paddh %[ftmp7], %[ftmp4], %[ftmp5] \n\t"
434  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
435  "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
436  "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
437  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
438  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
439  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
440  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
441  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
442  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
443  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
444  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
445  MMI_SWC1(%[ftmp7], %[dst], 0x00)
446  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
447  MMI_LWC1(%[ftmp2], %[src], 0x00)
448  "paddh %[ftmp7], %[ftmp5], %[ftmp6] \n\t"
449  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
450  "psubh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
451  "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
452  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
453  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
454  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
455  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
456  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
457  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
458  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
459  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
460  MMI_SWC1(%[ftmp7], %[dst], 0x00)
461  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
462  MMI_LWC1(%[ftmp3], %[src], 0x00)
463  "paddh %[ftmp7], %[ftmp6], %[ftmp1] \n\t"
464  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
465  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
466  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
467  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
468  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
469  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
470  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
471  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
472  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
473  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
474  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
475  MMI_SWC1(%[ftmp7], %[dst], 0x00)
476  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
477  ".set pop \n\t"
478  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
479  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
480  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
481  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
482  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
483  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
484  [tmp0]"=&r"(tmp[0]),
486  [dst]"+&r"(dst), [src]"+&r"(src)
487  : [dstStride]"r"((mips_reg)dstStride),
488  [srcStride]"r"((mips_reg)srcStride),
489  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
490  : "memory"
491  );
492 }
493 
494 static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
495  int dstStride, int srcStride)
496 {
497  int w = 2;
498  int h = 8;
499  double ftmp[10];
500  uint64_t tmp[1];
502 
503  src -= 2 * srcStride;
504 
505  while (w--) {
506  __asm__ volatile (
507  ".set push \n\t"
508  ".set noreorder \n\t"
509  "dli %[tmp0], 0x02 \n\t"
510  MMI_LWC1(%[ftmp0], %[src], 0x00)
511  "mtc1 %[tmp0], %[ftmp8] \n\t"
512  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
513  "dli %[tmp0], 0x05 \n\t"
514  MMI_LWC1(%[ftmp1], %[src], 0x00)
515  "mtc1 %[tmp0], %[ftmp9] \n\t"
516  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
517  MMI_LWC1(%[ftmp2], %[src], 0x00)
518  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
519  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
520  MMI_LWC1(%[ftmp3], %[src], 0x00)
521  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
522  MMI_LWC1(%[ftmp4], %[src], 0x00)
523  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
524  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
525  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
526  MMI_LWC1(%[ftmp5], %[src], 0x00)
527  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
528  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
529  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
530  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
531  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
532  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
533  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
534  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
535  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
536  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
537  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
538  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
539  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
540  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
541  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
542  MMI_SWC1(%[ftmp6], %[dst], 0x00)
543  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
544  MMI_LWC1(%[ftmp0], %[src], 0x00)
545  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
546  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
547  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
548  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
549  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
550  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
551  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
552  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
553  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
554  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
555  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
556  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
557  MMI_SWC1(%[ftmp6], %[dst], 0x00)
558  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
559  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
560  MMI_LWC1(%[ftmp1], %[src], 0x00)
561  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
562  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
563  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
564  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
565  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
566  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
567  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
568  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
569  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
570  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
571  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
572  MMI_SWC1(%[ftmp6], %[dst], 0x00)
573  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
574  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
575  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
576  MMI_LWC1(%[ftmp2], %[src], 0x00)
577  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
578  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
579  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
580  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
581  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
582  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
583  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
584  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
585  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
586  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
587  MMI_SWC1(%[ftmp6], %[dst], 0x00)
588  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
589  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
590  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
591  MMI_LWC1(%[ftmp3], %[src], 0x00)
592  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
593  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
594  "punpcklbh %[ftmp3] , %[ftmp3], %[ftmp7] \n\t"
595  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
596  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
597  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
598  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
599  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
600  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
601  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
602  MMI_SWC1(%[ftmp6], %[dst], 0x00)
603  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
604  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
605  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
606  MMI_LWC1(%[ftmp4], %[src], 0x00)
607  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
608  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
609  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
610  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
611  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
612  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
613  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
614  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
615  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
616  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
617  MMI_SWC1(%[ftmp6], %[dst], 0x00)
618  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
619  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
620  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
621  MMI_LWC1(%[ftmp5], %[src], 0x00)
622  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
623  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
624  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
625  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
626  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
627  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
628  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
629  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
630  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
631  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
632  MMI_SWC1(%[ftmp6], %[dst], 0x00)
633  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
634  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
635  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
636  MMI_LWC1(%[ftmp0], %[src], 0x00)
637  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
638  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
639  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
640  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
641  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
642  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
643  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
644  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
645  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
646  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
647  MMI_SWC1(%[ftmp6], %[dst], 0x00)
648  "bne %[h], 0x10, 2f \n\t"
649  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
650  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
651  MMI_LWC1(%[ftmp1], %[src], 0x00)
652  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
653  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
654  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
655  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
656  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
657  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
658  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
659  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
660  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
661  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
662  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
663  MMI_SWC1(%[ftmp6], %[dst], 0x00)
664  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
665  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
666  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
667  MMI_LWC1(%[ftmp2], %[src], 0x00)
668  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
669  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
670  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
671  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
672  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
673  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
674  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
675  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
676  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
677  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
678  MMI_SWC1(%[ftmp6], %[dst], 0x00)
679  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
680  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
681  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
682  MMI_LWC1(%[ftmp3], %[src], 0x00)
683  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
684  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
685  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
686  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
687  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
688  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
689  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
690  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
691  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
692  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
693  MMI_SWC1(%[ftmp6], %[dst], 0x00)
694  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
695  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
696  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
697  MMI_LWC1(%[ftmp4], %[src], 0x00)
698  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
699  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
700  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
701  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
702  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
703  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
704  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
705  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
706  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
707  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
708  MMI_SWC1(%[ftmp6], %[dst], 0x00)
709  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
710  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
711  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
712  MMI_LWC1(%[ftmp5], %[src], 0x00)
713  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
714  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
715  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
716  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
717  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
718  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
719  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
720  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
721  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
722  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
723  MMI_SWC1(%[ftmp6], %[dst], 0x00)
724  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
725  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
726  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
727  MMI_LWC1(%[ftmp0], %[src], 0x00)
728  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
729  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
730  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
731  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
732  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
733  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
734  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
735  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
736  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
737  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
738  MMI_SWC1(%[ftmp6], %[dst], 0x00)
739  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
740  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
741  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
742  MMI_LWC1(%[ftmp1], %[src], 0x00)
743  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
744  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
745  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
746  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
747  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
748  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
749  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
750  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
751  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
752  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
753  MMI_SWC1(%[ftmp6], %[dst], 0x00)
754  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
755  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
756  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
757  MMI_LWC1(%[ftmp2], %[src], 0x00)
758  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
759  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
760  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
761  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
762  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
763  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
764  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
765  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
766  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
767  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
768  MMI_SWC1(%[ftmp6], %[dst], 0x00)
769  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
770  "2: \n\t"
771  ".set pop \n\t"
772  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
773  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
774  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
775  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
776  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
777  [tmp0]"=&r"(tmp[0]),
779  [src]"+&r"(src), [dst]"+&r"(dst),
780  [h]"+&r"(h)
781  : [dstStride]"r"((mips_reg)dstStride),
782  [srcStride]"r"((mips_reg)srcStride),
783  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
784  : "memory"
785  );
786 
787  src += 4 - (h + 5) * srcStride;
788  dst += 4 - h * dstStride;
789  }
790 }
791 
792 static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
793  int dstStride, int srcStride)
794 {
795  put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
796  put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
797  src += 8*srcStride;
798  dst += 8*dstStride;
799  put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
800  put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
801 }
802 
803 static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
804  int dstStride, int srcStride)
805 {
806  double ftmp[10];
807  uint64_t tmp[1];
808 
809  src -= 2 * srcStride;
810 
811  __asm__ volatile (
812  ".set push \n\t"
813  ".set noreorder \n\t"
814  "dli %[tmp0], 0x02 \n\t"
815  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
816  "mtc1 %[tmp0], %[ftmp9] \n\t"
817  "dli %[tmp0], 0x05 \n\t"
818  MMI_LWC1(%[ftmp0], %[src], 0x00)
819  "mtc1 %[tmp0], %[ftmp8] \n\t"
820  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
821  MMI_LWC1(%[ftmp1], %[src], 0x00)
822  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
823  MMI_LWC1(%[ftmp2], %[src], 0x00)
824  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
825  MMI_LWC1(%[ftmp3], %[src], 0x00)
826  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
827  MMI_LWC1(%[ftmp4], %[src], 0x00)
828  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
829  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
830  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
831  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
832  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
833  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
834  MMI_LWC1(%[ftmp5], %[src], 0x00)
835  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
836  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
837  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
838  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
839  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
840  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
841  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
842  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
843  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
844  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
845  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
846  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
847  MMI_LWC1(%[ftmp0], %[dst], 0x00)
848  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
849  MMI_SWC1(%[ftmp6], %[dst], 0x00)
850  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
851  MMI_LWC1(%[ftmp0], %[src], 0x00)
852  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
853  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
854  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
855  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
856  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
857  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
858  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
859  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
860  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
861  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
862  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
863  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
864  MMI_LWC1(%[ftmp1], %[dst], 0x00)
865  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
866  MMI_SWC1(%[ftmp6], %[dst], 0x00)
867  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
868  MMI_LWC1(%[ftmp1], %[src], 0x00)
869  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
870  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
871  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
872  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
873  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
874  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
875  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
876  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
877  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
878  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
879  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
880  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
881  MMI_LWC1(%[ftmp2], %[dst], 0x00)
882  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
883  MMI_SWC1(%[ftmp6], %[dst], 0x00)
884  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
885  MMI_LWC1(%[ftmp2], %[src], 0x00)
886  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
887  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
888  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
889  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
890  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
891  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
892  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
893  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
894  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
895  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
896  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
897  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
898  MMI_LWC1(%[ftmp3], %[dst], 0x00)
899  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
900  MMI_SWC1(%[ftmp6], %[dst], 0x00)
901  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
902  ".set pop \n\t"
903  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
904  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
905  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
906  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
907  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
908  [tmp0]"=&r"(tmp[0]),
909  [src]"+&r"(src), [dst]"+&r"(dst)
910  : [dstStride]"r"((mips_reg)dstStride),
911  [srcStride]"r"((mips_reg)srcStride),
912  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
913  : "memory"
914  );
915 }
916 
917 static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
918  int dstStride, int srcStride)
919 {
920  int w = 2;
921  int h = 8;
922  double ftmp[10];
923  uint64_t tmp[1];
925 
926  src -= 2 * srcStride;
927 
928  while (w--) {
929  __asm__ volatile (
930  ".set push \n\t"
931  ".set noreorder \n\t"
932  "dli %[tmp0], 0x02 \n\t"
933  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
934  "mtc1 %[tmp0], %[ftmp9] \n\t"
935  "dli %[tmp0], 0x05 \n\t"
936  MMI_LWC1(%[ftmp0], %[src], 0x00)
937  "mtc1 %[tmp0], %[ftmp8] \n\t"
938  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
939  MMI_LWC1(%[ftmp1], %[src], 0x00)
940  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
941  MMI_LWC1(%[ftmp2], %[src], 0x00)
942  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
943  MMI_LWC1(%[ftmp3], %[src], 0x00)
944  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
945  MMI_LWC1(%[ftmp4], %[src], 0x00)
946  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
947  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
948  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
949  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
950  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
951  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
952  MMI_LWC1(%[ftmp5], %[src], 0x00)
953  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
954  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
955  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
956  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
957  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
958  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
959  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
960  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
961  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
962  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
963  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
964  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
965  MMI_LWC1(%[ftmp0], %[dst], 0x00)
966  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
967  MMI_SWC1(%[ftmp6], %[dst], 0x00)
968  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
969  MMI_LWC1(%[ftmp0], %[src], 0x00)
970  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
971  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
972  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
973  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
974  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
975  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
976  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
977  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
978  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
979  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
980  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
981  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
982  MMI_LWC1(%[ftmp1], %[dst], 0x00)
983  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
984  MMI_SWC1(%[ftmp6], %[dst], 0x00)
985  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
986  MMI_LWC1(%[ftmp1], %[src], 0x00)
987  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
988  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
989  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
990  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
991  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
992  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
993  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
994  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
995  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
996  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
997  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
998  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
999  MMI_LWC1(%[ftmp2], %[dst], 0x00)
1000  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1001  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1002  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1003  MMI_LWC1(%[ftmp2], %[src], 0x00)
1004  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1005  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1006  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1007  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1008  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1009  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1010  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1011  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1012  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1013  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1014  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1015  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1016  MMI_LWC1(%[ftmp3], %[dst], 0x00)
1017  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1018  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1019  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1020  MMI_LWC1(%[ftmp3], %[src], 0x00)
1021  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1022  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1023  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1024  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1025  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1026  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1027  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1028  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1029  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1030  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1031  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1032  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1033  MMI_LWC1(%[ftmp4], %[dst], 0x00)
1034  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1035  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1036  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1037  MMI_LWC1(%[ftmp4], %[src], 0x00)
1038  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1039  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1040  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1041  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1042  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1043  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1044  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1045  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1046  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1047  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1048  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1049  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1050  MMI_LWC1(%[ftmp5], %[dst], 0x00)
1051  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1052  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1053  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1054  MMI_LWC1(%[ftmp5], %[src], 0x00)
1055  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1056  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1057  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1058  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1059  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1060  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1061  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1062  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1063  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1064  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1065  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1066  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1067  MMI_LWC1(%[ftmp0], %[dst], 0x00)
1068  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1069  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1070  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1071  MMI_LWC1(%[ftmp0], %[src], 0x00)
1072  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1073  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1074  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1075  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1076  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1077  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1078  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1079  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1080  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1081  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1082  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1083  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1084  MMI_LWC1(%[ftmp1], %[dst], 0x00)
1085  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1086  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1087  "bne %[h], 0x10, 2f \n\t"
1088  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1089  MMI_LWC1(%[ftmp1], %[src], 0x00)
1090  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1091  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1092  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1093  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1094  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1095  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1096  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1097  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1098  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1099  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1100  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1101  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1102  MMI_LWC1(%[ftmp2], %[dst], 0x00)
1103  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1104  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1105  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1106  MMI_LWC1(%[ftmp2], %[src], 0x00)
1107  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1108  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1109  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1110  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1111  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1112  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1113  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1114  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1115  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1116  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1117  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1118  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1119  MMI_LWC1(%[ftmp3], %[dst], 0x00)
1120  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1121  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1122  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1123  MMI_LWC1(%[ftmp3], %[src], 0x00)
1124  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1125  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1126  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1127  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1128  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1129  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1130  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1131  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1132  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1133  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1134  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1135  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1136  MMI_LWC1(%[ftmp4], %[dst], 0x00)
1137  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1138  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1139  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1140  MMI_LWC1(%[ftmp4], %[src], 0x00)
1141  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1142  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1143  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1144  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1145  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1146  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1147  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1148  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1149  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1150  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1151  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1152  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1153  MMI_LWC1(%[ftmp5], %[dst], 0x00)
1154  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1155  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1156  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1157  MMI_LWC1(%[ftmp5], %[src], 0x00)
1158  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1159  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1160  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1161  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1162  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1163  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1164  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1165  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1166  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1167  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1168  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1169  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1170  MMI_LWC1(%[ftmp0], %[dst], 0x00)
1171  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1172  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1173  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1174  MMI_LWC1(%[ftmp0], %[src], 0x00)
1175  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1176  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1177  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1178  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1179  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1180  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1181  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1182  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1183  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1184  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1185  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1186  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1187  MMI_LWC1(%[ftmp1], %[dst], 0x00)
1188  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1189  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1190  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1191  MMI_LWC1(%[ftmp1], %[src], 0x00)
1192  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1193  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1194  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1195  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1196  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1197  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1198  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1199  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1200  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1201  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1202  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1203  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1204  MMI_LWC1(%[ftmp2], %[dst], 0x00)
1205  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1206  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1207  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1208  MMI_LWC1(%[ftmp2], %[src], 0x00)
1209  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1210  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1211  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1212  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1213  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1214  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1215  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1216  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1217  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1218  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1219  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1220  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1221  MMI_LWC1(%[ftmp3], %[dst], 0x00)
1222  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1223  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1224  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1225  "2: \n\t"
1226  ".set pop \n\t"
1227  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1228  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1229  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1230  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1231  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1232  [tmp0]"=&r"(tmp[0]),
1234  [src]"+&r"(src), [dst]"+&r"(dst),
1235  [h]"+&r"(h)
1236  : [dstStride]"r"((mips_reg)dstStride),
1237  [srcStride]"r"((mips_reg)srcStride),
1238  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
1239  : "memory"
1240  );
1241 
1242  src += 4 - (h + 5) * srcStride;
1243  dst += 4 - h * dstStride;
1244  }
1245 }
1246 
1247 static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1248  int dstStride, int srcStride)
1249 {
1250  avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1251  avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1252  src += 8*srcStride;
1253  dst += 8*dstStride;
1254  avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1255  avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1256 }
1257 
1258 static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1259  int dstStride, int srcStride)
1260 {
1261  INIT_CLIP
1262  int i;
1263  int16_t _tmp[36];
1264  int16_t *tmp = _tmp;
1265  double ftmp[10];
1266  uint64_t tmp0;
1268 
1269  src -= 2*srcStride;
1270 
1271  __asm__ volatile (
1272  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1273  "dli %[tmp0], 0x09 \n\t"
1274  "1: \n\t"
1275  MMI_ULWC1(%[ftmp1], %[src], -0x02)
1276  MMI_ULWC1(%[ftmp2], %[src], -0x01)
1277  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1278  MMI_ULWC1(%[ftmp4], %[src], 0x01)
1279  MMI_ULWC1(%[ftmp5], %[src], 0x02)
1280  MMI_ULWC1(%[ftmp6], %[src], 0x03)
1281  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1282  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1283  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1284  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1285  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1286  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1287  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1288  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1289  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1290  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1291  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1292  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1293  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1294  MMI_SDC1(%[ftmp9], %[tmp], 0x00)
1295  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1296  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1297  PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1298  "bnez %[tmp0], 1b \n\t"
1299  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1300  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1301  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1302  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1303  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1304  [tmp0]"=&r"(tmp0),
1306  [tmp]"+&r"(tmp), [src]"+&r"(src)
1307  : [tmpStride]"r"(8),
1308  [srcStride]"r"((mips_reg)srcStride),
1309  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
1310  : "memory"
1311  );
1312 
1313  tmp -= 28;
1314 
1315  for (i=0; i<4; i++) {
1316  const int16_t tmpB= tmp[-8];
1317  const int16_t tmpA= tmp[-4];
1318  const int16_t tmp0= tmp[ 0];
1319  const int16_t tmp1= tmp[ 4];
1320  const int16_t tmp2= tmp[ 8];
1321  const int16_t tmp3= tmp[12];
1322  const int16_t tmp4= tmp[16];
1323  const int16_t tmp5= tmp[20];
1324  const int16_t tmp6= tmp[24];
1325  op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1326  op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1327  op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1328  op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1329  dst++;
1330  tmp++;
1331  }
1332 }
1333 
1335  const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
1336 {
1337  int w = (size + 8) >> 2;
1338  double ftmp[11];
1339  uint64_t tmp0;
1341 
1342  src -= 2 * srcStride + 2;
1343 
1344  while (w--) {
1345  __asm__ volatile (
1346  "dli %[tmp0], 0x02 \n\t"
1347  MMI_ULWC1(%[ftmp0], %[src], 0x00)
1348  "mtc1 %[tmp0], %[ftmp10] \n\t"
1349  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1350  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1351  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1352  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1353  MMI_ULWC1(%[ftmp2], %[src], 0x00)
1354  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1355  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1356  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1357  MMI_ULWC1(%[ftmp4], %[src], 0x00)
1358  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1359  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1360  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1361  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1362  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1363  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1364  MMI_ULWC1(%[ftmp5], %[src], 0x00)
1365  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1366  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1367  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1368  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1369  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1370  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1371  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1372  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1373  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1374  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1375  MMI_SDC1(%[ftmp6], %[tmp], 0x00)
1376  MMI_ULWC1(%[ftmp0], %[src], 0x00)
1377  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1378  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1379  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1380  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1381  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1382  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1383  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1384  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1385  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1386  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1387  MMI_SDC1(%[ftmp6], %[tmp], 0x30)
1388  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1389  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1390  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1391  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1392  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1393  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1394  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1395  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1396  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1397  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1398  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1399  MMI_SDC1(%[ftmp6], %[tmp], 0x60)
1400  MMI_ULWC1(%[ftmp2], %[src], 0x00)
1401  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1402  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1403  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1404  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1405  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1406  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1407  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1408  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1409  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1410  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1411  MMI_SDC1(%[ftmp6], %[tmp], 0x90)
1412  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1413  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1414  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1415  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1416  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1417  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1418  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1419  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1420  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1421  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1422  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1423  MMI_SDC1(%[ftmp6], %[tmp], 0xc0)
1424  MMI_ULWC1(%[ftmp4], %[src], 0x00)
1425  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1426  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1427  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1428  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1429  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1430  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1431  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1432  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1433  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1434  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1435  MMI_SDC1(%[ftmp6], %[tmp], 0xf0)
1436  MMI_ULWC1(%[ftmp5], %[src], 0x00)
1437  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1438  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1439  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1440  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1441  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1442  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1443  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1444  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1445  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1446  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1447  MMI_SDC1(%[ftmp6], %[tmp], 0x120)
1448  MMI_ULWC1(%[ftmp0], %[src], 0x00)
1449  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1450  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1451  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1452  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1453  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1454  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1455  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1456  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1457  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1458  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1459  MMI_SDC1(%[ftmp6], %[tmp], 0x150)
1460  "bne %[size], 0x10, 2f \n\t"
1461 
1462  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1463  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1464  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1465  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1466  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1467  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1468  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1469  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1470  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1471  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1472  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1473  MMI_SDC1(%[ftmp6], %[tmp], 0x180)
1474  MMI_ULWC1(%[ftmp2], %[src], 0x00)
1475  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1476  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1477  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1478  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1479  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1480  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1481  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1482  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1483  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1484  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1485  MMI_SDC1(%[ftmp6], %[tmp], 0x1b0)
1486  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1487  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1488  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1489  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1490  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1491  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1492  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1493  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1494  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1495  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1496  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1497  MMI_SDC1(%[ftmp6], %[tmp], 0x1e0)
1498  MMI_ULWC1(%[ftmp4], %[src], 0x00)
1499  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1500  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1501  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1502  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1503  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1504  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1505  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1506  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1507  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1508  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1509  MMI_SDC1(%[ftmp6], %[tmp], 0x210)
1510  MMI_ULWC1(%[ftmp5], %[src], 0x00)
1511  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1512  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1513  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1514  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1515  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1516  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1517  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1518  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1519  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1520  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1521  MMI_SDC1(%[ftmp6], %[tmp], 0x240)
1522  MMI_ULWC1(%[ftmp0], %[src], 0x00)
1523  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1524  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1525  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1526  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1527  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1528  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1529  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1530  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1531  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1532  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1533  MMI_SDC1(%[ftmp6], %[tmp], 0x270)
1534  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1535  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1536  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1537  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1538  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1539  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1540  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1541  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1542  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1543  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1544  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1545  MMI_SDC1(%[ftmp6], %[tmp], 0x2a0)
1546  MMI_ULWC1(%[ftmp2], %[src], 0x00)
1547  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1548  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1549  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1550  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1551  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1552  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1553  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1554  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1555  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1556  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1557  MMI_SDC1(%[ftmp6], %[tmp], 0x2d0)
1558  "2: \n\t"
1559  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1560  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1561  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1562  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1563  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1564  [ftmp10]"=&f"(ftmp[10]),
1565  [tmp0]"=&r"(tmp0),
1567  [src]"+&r"(src)
1568  : [tmp]"r"(tmp), [size]"r"(size),
1569  [srcStride]"r"((mips_reg)srcStride),
1570  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
1571  : "memory"
1572  );
1573 
1574  tmp += 4;
1575  src += 4 - (size + 5) * srcStride;
1576  }
1577 }
1578 
1579 static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
1580  int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
1581 {
1582  int w = size >> 4;
1583  double ftmp[10];
1584  uint64_t tmp0;
1586 
1587  do {
1588  int h = size;
1589 
1590  __asm__ volatile (
1591  "dli %[tmp0], 0x02 \n\t"
1592  "mtc1 %[tmp0], %[ftmp8] \n\t"
1593  "dli %[tmp0], 0x06 \n\t"
1594  "mtc1 %[tmp0], %[ftmp9] \n\t"
1595  "1: \n\t"
1596  MMI_LDC1(%[ftmp0], %[tmp], 0x00)
1597  MMI_LDC1(%[ftmp3], %[tmp], 0x08)
1598  MMI_LDC1(%[ftmp6], %[tmp], 0x10)
1599  MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
1600  MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
1601  MMI_ULDC1(%[ftmp5], %[tmp], 0x12)
1602  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1603  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1604  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1605  "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1606  MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
1607  MMI_ULDC1(%[ftmp6], %[tmp], 0x06)
1608  MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
1609  MMI_ULDC1(%[ftmp7], %[tmp], 0x0e)
1610  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1611  "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1612  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1613  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1614  "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1615  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1616  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1617  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1618  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1619  "paddsh %[ftmp3] , %[ftmp3], %[ftmp5] \n\t"
1620  "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1621  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1622  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1623  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1624  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1625  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1626  "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
1627  "addi %[h], %[h], -0x01 \n\t"
1628  MMI_SDC1(%[ftmp0], %[dst], 0x00)
1629  PTR_ADDIU "%[tmp], %[tmp], 0x30 \n\t"
1630  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1631  "bnez %[h], 1b \n\t"
1632  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1633  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1634  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1635  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1636  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1637  [tmp0]"=&r"(tmp0),
1639  [tmp]"+&r"(tmp), [dst]"+&r"(dst),
1640  [h]"+&r"(h)
1641  : [dstStride]"r"((mips_reg)dstStride)
1642  : "memory"
1643  );
1644 
1645  tmp += 8 - size * 24;
1646  dst += 8 - size * dstStride;
1647  } while (w--);
1648 }
1649 
1650 static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1651  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1652  ptrdiff_t srcStride, int size)
1653 {
1654  put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
1655  put_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
1656 }
1657 
1658 static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1659  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1660  ptrdiff_t srcStride)
1661 {
1662  put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1663  srcStride, 8);
1664 }
1665 
1666 static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1667  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1668  ptrdiff_t srcStride)
1669 {
1670  put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1671  srcStride, 16);
1672 }
1673 
1674 static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1675  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1676 {
1677  int h = 8;
1678  double ftmp[9];
1679  uint64_t tmp[1];
1682 
1683  __asm__ volatile (
1684  "dli %[tmp0], 0x02 \n\t"
1685  "mtc1 %[tmp0], %[ftmp7] \n\t"
1686  "dli %[tmp0], 0x05 \n\t"
1687  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1688  "mtc1 %[tmp0], %[ftmp8] \n\t"
1689  "1: \n\t"
1690  MMI_ULDC1(%[ftmp1], %[src], 0x00)
1691  MMI_ULDC1(%[ftmp3], %[src], 0x01)
1692  "punpckhbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
1693  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1694  "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1695  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1696  "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1697  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1698  "psllh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1699  "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1700  MMI_ULDC1(%[ftmp3], %[src], -0x01)
1701  MMI_ULDC1(%[ftmp5], %[src], 0x02)
1702  "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1703  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1704  "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1705  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1706  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1707  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1708  "psubh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1709  "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1710  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
1711  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
1712  MMI_ULWC1(%[ftmp3], %[src], -0x02)
1713  MMI_ULWC1(%[ftmp6], %[src], 0x07)
1714  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1715  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1716  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1717  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1718  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1719  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1720  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1721  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1722  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
1723  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1724  MMI_LDC1(%[ftmp5], %[src2], 0x00)
1725  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1726  PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
1727  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1728  PTR_ADDU "%[h], %[h], -0x01 \n\t"
1729  MMI_SDC1(%[ftmp1], %[dst], 0x00)
1730  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1731  PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
1732  "bgtz %[h], 1b \n\t"
1733  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1734  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1735  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1736  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1737  [ftmp8]"=&f"(ftmp[8]),
1738  [tmp0]"=&r"(tmp[0]),
1741  [src]"+&r"(src), [dst]"+&r"(dst),
1742  [src2]"+&r"(src2), [h]"+&r"(h)
1743  : [src2Stride]"r"((mips_reg)src2Stride),
1744  [dstStride]"r"((mips_reg)dstStride),
1745  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
1746  : "memory"
1747  );
1748 }
1749 
1750 static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1751  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1752 {
1753  double ftmp[7];
1754  uint64_t tmp0;
1757 
1758  do {
1759  __asm__ volatile (
1760  "dli %[tmp0], 0x05 \n\t"
1761  MMI_ULDC1(%[ftmp0], %[src16], 0x00)
1762  "mtc1 %[tmp0], %[ftmp6] \n\t"
1763  MMI_ULDC1(%[ftmp1], %[src16], 0x08)
1764  MMI_ULDC1(%[ftmp2], %[src16], 0x30)
1765  MMI_ULDC1(%[ftmp3], %[src16], 0x38)
1766  "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1767  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1768  "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1769  "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1770  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1771  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1772  MMI_LDC1(%[ftmp5], %[src8], 0x00)
1773  MMI_LDXC1(%[ftmp4], %[src8], %[src8Stride], 0x00)
1774  "pavgb %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1775  "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1776  MMI_SDC1(%[ftmp0], %[dst], 0x00)
1777  MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
1778  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1779  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1780  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1781  [ftmp6]"=&f"(ftmp[6]),
1784  [tmp0]"=&r"(tmp0)
1785  : [src8]"r"(src8), [src16]"r"(src16),
1786  [dst]"r"(dst),
1787  [src8Stride]"r"((mips_reg)src8Stride),
1788  [dstStride]"r"((mips_reg)dstStride)
1789  : "memory"
1790  );
1791 
1792  src8 += 2 * src8Stride;
1793  src16 += 48;
1794  dst += 2 * dstStride;
1795  } while (h -= 2);
1796 }
1797 
1798 static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1799  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1800 {
1801  put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1802  put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1803  src2Stride);
1804 
1805  src += 8 * dstStride;
1806  dst += 8 * dstStride;
1807  src2 += 8 * src2Stride;
1808 
1809  put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1810  put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1811  src2Stride);
1812 }
1813 
1814 static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1815  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1816 {
1817  put_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, h);
1818  put_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
1819  src8Stride, h);
1820 }
1821 
1822 static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1823  int dstStride, int srcStride)
1824 {
1825  INIT_CLIP
1826  int i;
1827  int16_t _tmp[36];
1828  int16_t *tmp = _tmp;
1829  double ftmp[10];
1830  uint64_t tmp0;
1832 
1833  src -= 2*srcStride;
1834 
1835  __asm__ volatile (
1836  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1837  "dli %[tmp0], 0x09 \n\t"
1838  "1: \n\t"
1839  MMI_ULWC1(%[ftmp1], %[src], -0x02)
1840  MMI_ULWC1(%[ftmp2], %[src], -0x01)
1841  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1842  MMI_ULWC1(%[ftmp4], %[src], 0x01)
1843  MMI_ULWC1(%[ftmp5], %[src], 0x02)
1844  MMI_ULWC1(%[ftmp6], %[src], 0x03)
1845  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1846  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1847  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1848  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1849  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1850  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1851  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1852  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1853  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1854  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1855  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1856  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1857  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1858  MMI_SDC1(%[ftmp9], %[tmp], 0x00)
1859  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1860  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1861  PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1862  "bnez %[tmp0], 1b \n\t"
1863  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1864  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1865  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1866  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1867  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1868  [tmp0]"=&r"(tmp0),
1870  [tmp]"+&r"(tmp), [src]"+&r"(src)
1871  : [tmpStride]"r"(8),
1872  [srcStride]"r"((mips_reg)srcStride),
1873  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
1874  : "memory"
1875  );
1876 
1877  tmp -= 28;
1878 
1879  for (i=0; i<4; i++) {
1880  const int16_t tmpB= tmp[-8];
1881  const int16_t tmpA= tmp[-4];
1882  const int16_t tmp0= tmp[ 0];
1883  const int16_t tmp1= tmp[ 4];
1884  const int16_t tmp2= tmp[ 8];
1885  const int16_t tmp3= tmp[12];
1886  const int16_t tmp4= tmp[16];
1887  const int16_t tmp5= tmp[20];
1888  const int16_t tmp6= tmp[24];
1889  op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1890  op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1891  op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1892  op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1893  dst++;
1894  tmp++;
1895  }
1896 }
1897 
1898 static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
1899  int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
1900 {
1901  int w = size >> 4;
1902  double ftmp[11];
1903  uint64_t tmp0;
1905 
1906  do {
1907  int h = size;
1908  __asm__ volatile (
1909  "dli %[tmp0], 0x02 \n\t"
1910  "mtc1 %[tmp0], %[ftmp9] \n\t"
1911  "dli %[tmp0], 0x06 \n\t"
1912  "mtc1 %[tmp0], %[ftmp10] \n\t"
1913  "1: \n\t"
1914  MMI_LDC1(%[ftmp0], %[tmp], 0x00)
1915  MMI_LDC1(%[ftmp3], %[tmp], 0x08)
1916  MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
1917  MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
1918  MMI_LDC1(%[ftmp7], %[tmp], 0x10)
1919  MMI_ULDC1(%[ftmp8], %[tmp], 0x12)
1920  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1921  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1922  "paddh %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1923  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1924  MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
1925  MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
1926  MMI_ULDC1(%[ftmp7], %[tmp], 0x06)
1927  MMI_ULDC1(%[ftmp8], %[tmp], 0x0e)
1928  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1929  "paddh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1930  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1931  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1932  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1933  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1934  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1935  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1936  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1937  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1938  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1939  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1940  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1941  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1942  "psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
1943  "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
1944  "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
1945  MMI_LDC1(%[ftmp6], %[dst], 0x00)
1946  "pavgb %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1947  MMI_SDC1(%[ftmp0], %[dst], 0x00)
1948  "addi %[h], %[h], -0x01 \n\t"
1949  PTR_ADDI "%[tmp], %[tmp], 0x30 \n\t"
1950  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1951  "bnez %[h], 1b \n\t"
1952  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1953  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1954  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1955  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1956  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1957  [ftmp10]"=&f"(ftmp[10]),
1958  [tmp0]"=&r"(tmp0),
1960  [tmp]"+&r"(tmp), [dst]"+&r"(dst),
1961  [h]"+&r"(h)
1962  : [dstStride]"r"((mips_reg)dstStride)
1963  : "memory"
1964  );
1965 
1966  tmp += 8 - size * 24;
1967  dst += 8 - size * dstStride;
1968  } while (w--);
1969 }
1970 
1971 static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1972  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1973  ptrdiff_t srcStride, int size)
1974 {
1975  put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
1976  avg_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
1977 }
1978 
1979 static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1980  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1981  ptrdiff_t srcStride)
1982 {
1983  avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1984  srcStride, 8);
1985 }
1986 
1987 static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1988  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1989  ptrdiff_t srcStride)
1990 {
1991  avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1992  srcStride, 16);
1993 }
1994 
1995 static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1996  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1997 {
1998  double ftmp[10];
1999  uint64_t tmp[2];
2002 
2003  __asm__ volatile (
2004  "dli %[tmp1], 0x02 \n\t"
2005  "ori %[tmp0], $0, 0x8 \n\t"
2006  "mtc1 %[tmp1], %[ftmp7] \n\t"
2007  "dli %[tmp1], 0x05 \n\t"
2008  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2009  "mtc1 %[tmp1], %[ftmp8] \n\t"
2010  "1: \n\t"
2011  MMI_ULDC1(%[ftmp1], %[src], 0x00)
2012  MMI_ULDC1(%[ftmp2], %[src], 0x01)
2013  "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
2014  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2015  "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2016  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2017  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2018  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2019  "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2020  "psllh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2021  MMI_ULDC1(%[ftmp2], %[src], -0x01)
2022  MMI_ULDC1(%[ftmp5], %[src], 0x02)
2023  "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2024  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2025  "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
2026  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
2027  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2028  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
2029  "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2030  "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2031  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
2032  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
2033  MMI_ULWC1(%[ftmp2], %[src], -0x02)
2034  MMI_ULWC1(%[ftmp6], %[src], 0x07)
2035  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2036  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2037  "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2038  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
2039  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
2040  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
2041  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2042  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2043  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
2044  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2045  MMI_LDC1(%[ftmp5], %[src2], 0x00)
2046  "packushb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2047  MMI_LDC1(%[ftmp9], %[dst], 0x00)
2048  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2049  "pavgb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
2050  PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
2051  MMI_SDC1(%[ftmp1], %[dst], 0x00)
2052  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
2053  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
2054  PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
2055  "bgtz %[tmp0], 1b \n\t"
2056  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2057  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2058  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2059  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2060  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2061  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
2064  [dst]"+&r"(dst), [src]"+&r"(src),
2065  [src2]"+&r"(src2)
2066  : [dstStride]"r"((mips_reg)dstStride),
2067  [src2Stride]"r"((mips_reg)src2Stride),
2068  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
2069  : "memory"
2070  );
2071 }
2072 
2073 static void avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
2074  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
2075 {
2076  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2077  avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2078  src2Stride);
2079 
2080  src += 8 * dstStride;
2081  dst += 8 * dstStride;
2082  src2 += 8 * src2Stride;
2083 
2084  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2085  avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2086  src2Stride);
2087 }
2088 
2089 static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2090  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2091 {
2092  double ftmp[8];
2093  uint64_t tmp0;
2096 
2097  do {
2098  __asm__ volatile (
2099  "dli %[tmp0], 0x05 \n\t"
2100  MMI_ULDC1(%[ftmp0], %[src16], 0x00)
2101  "mtc1 %[tmp0], %[ftmp6] \n\t"
2102  MMI_ULDC1(%[ftmp1], %[src16], 0x08)
2103  MMI_ULDC1(%[ftmp2], %[src16], 0x30)
2104  MMI_ULDC1(%[ftmp3], %[src16], 0x38)
2105  "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
2106  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2107  "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2108  "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2109  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2110  MMI_LDC1(%[ftmp4], %[src8], 0x00)
2111  MMI_LDXC1(%[ftmp5], %[src8], %[src8Stride], 0x00)
2112  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2113  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2114  "pavgb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2115  MMI_LDC1(%[ftmp7], %[dst], 0x00)
2116  "pavgb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
2117  MMI_SDC1(%[ftmp0], %[dst], 0x00)
2118  MMI_LDXC1(%[ftmp7], %[dst], %[dstStride], 0x00)
2119  "pavgb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2120  MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
2121  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2122  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2123  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2124  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2127  [tmp0]"=&r"(tmp0)
2128  : [src8]"r"(src8), [src16]"r"(src16),
2129  [dst]"r"(dst),
2130  [src8Stride]"r"((mips_reg)src8Stride),
2131  [dstStride]"r"((mips_reg)dstStride)
2132  : "memory"
2133  );
2134 
2135  src8 += 2 * src8Stride;
2136  src16 += 48;
2137  dst += 2 * dstStride;
2138  } while (b -= 2);
2139 }
2140 
2141 static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2142  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2143 {
2144  avg_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, b);
2145  avg_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
2146  src8Stride, b);
2147 }
2148 
2149 //DEF_H264_MC_MMI(put_, 4)
2150 void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
2151  ptrdiff_t stride)
2152 {
2153  ff_put_pixels4_8_mmi(dst, src, stride, 4);
2154 }
2155 
2156 void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
2157  ptrdiff_t stride)
2158 {
2159  uint8_t half[16];
2161  ff_put_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4);
2162 }
2163 
2164 void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
2165  ptrdiff_t stride)
2166 {
2168 }
2169 
2170 void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
2171  ptrdiff_t stride)
2172 {
2173  uint8_t half[16];
2175  ff_put_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4);
2176 }
2177 
2178 void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
2179  ptrdiff_t stride)
2180 {
2181  uint8_t full[36];
2182  uint8_t * const full_mid= full + 8;
2183  uint8_t half[16];
2184  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2185  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2186  ff_put_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2187 }
2188 
2189 void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
2190  ptrdiff_t stride)
2191 {
2192  uint8_t full[36];
2193  uint8_t * const full_mid= full + 8;
2194  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2195  put_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2196 }
2197 
2198 void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
2199  ptrdiff_t stride)
2200 {
2201  uint8_t full[36];
2202  uint8_t * const full_mid= full + 8;
2203  uint8_t half[16];
2204  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2205  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2206  ff_put_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2207 }
2208 
2209 void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
2210  ptrdiff_t stride)
2211 {
2212  uint8_t full[36];
2213  uint8_t * const full_mid= full + 8;
2214  uint8_t halfH[16];
2215  uint8_t halfV[16];
2217  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2218  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2219  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2220 }
2221 
2222 void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
2223  ptrdiff_t stride)
2224 {
2225  uint8_t full[36];
2226  uint8_t * const full_mid= full + 8;
2227  uint8_t halfH[16];
2228  uint8_t halfV[16];
2230  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2231  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2232  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2233 }
2234 
2235 void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
2236  ptrdiff_t stride)
2237 {
2238  uint8_t full[36];
2239  uint8_t * const full_mid= full + 8;
2240  uint8_t halfH[16];
2241  uint8_t halfV[16];
2243  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2244  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2245  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2246 }
2247 
2248 void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
2249  ptrdiff_t stride)
2250 {
2251  uint8_t full[36];
2252  uint8_t * const full_mid= full + 8;
2253  uint8_t halfH[16];
2254  uint8_t halfV[16];
2256  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2257  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2258  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2259 }
2260 
2261 void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
2262  ptrdiff_t stride)
2263 {
2265 }
2266 
2267 void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
2268  ptrdiff_t stride)
2269 {
2270  uint8_t halfH[16];
2271  uint8_t halfHV[16];
2274  ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2275 }
2276 
2277 void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
2278  ptrdiff_t stride)
2279 {
2280  uint8_t halfH[16];
2281  uint8_t halfHV[16];
2284  ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2285 }
2286 
2287 void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
2288  ptrdiff_t stride)
2289 {
2290  uint8_t full[36];
2291  uint8_t * const full_mid= full + 8;
2292  uint8_t halfV[16];
2293  uint8_t halfHV[16];
2294  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2295  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2297  ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2298 }
2299 
2300 void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
2301  ptrdiff_t stride)
2302 {
2303  uint8_t full[36];
2304  uint8_t * const full_mid= full + 8;
2305  uint8_t halfV[16];
2306  uint8_t halfHV[16];
2307  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2308  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2310  ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2311 }
2312 
2313 //DEF_H264_MC_MMI(avg_, 4)
2314 void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
2315  ptrdiff_t stride)
2316 {
2317  ff_avg_pixels4_8_mmi(dst, src, stride, 4);
2318 }
2319 
2320 void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
2321  ptrdiff_t stride)
2322 {
2323  uint8_t half[16];
2325  ff_avg_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4);
2326 }
2327 
2328 void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
2329  ptrdiff_t stride)
2330 {
2332 }
2333 
2334 void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
2335  ptrdiff_t stride)
2336 {
2337  uint8_t half[16];
2339  ff_avg_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4);
2340 }
2341 
2342 void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
2343  ptrdiff_t stride)
2344 {
2345  uint8_t full[36];
2346  uint8_t * const full_mid= full + 8;
2347  uint8_t half[16];
2348  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2349  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2350  ff_avg_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2351 }
2352 
2353 void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
2354  ptrdiff_t stride)
2355 {
2356  uint8_t full[36];
2357  uint8_t * const full_mid= full + 8;
2358  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2359  avg_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2360 }
2361 
2362 void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
2363  ptrdiff_t stride)
2364 {
2365  uint8_t full[36];
2366  uint8_t * const full_mid= full + 8;
2367  uint8_t half[16];
2368  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2369  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2370  ff_avg_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2371 }
2372 
2373 void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
2374  ptrdiff_t stride)
2375 {
2376  uint8_t full[36];
2377  uint8_t * const full_mid= full + 8;
2378  uint8_t halfH[16];
2379  uint8_t halfV[16];
2381  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2382  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2383  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2384 }
2385 
2386 void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
2387  ptrdiff_t stride)
2388 {
2389  uint8_t full[36];
2390  uint8_t * const full_mid= full + 8;
2391  uint8_t halfH[16];
2392  uint8_t halfV[16];
2394  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2395  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2396  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2397 }
2398 
2399 void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
2400  ptrdiff_t stride)
2401 {
2402  uint8_t full[36];
2403  uint8_t * const full_mid= full + 8;
2404  uint8_t halfH[16];
2405  uint8_t halfV[16];
2407  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2408  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2409  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2410 }
2411 
2412 void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
2413  ptrdiff_t stride)
2414 {
2415  uint8_t full[36];
2416  uint8_t * const full_mid= full + 8;
2417  uint8_t halfH[16];
2418  uint8_t halfV[16];
2420  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2421  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2422  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2423 }
2424 
2425 void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
2426  ptrdiff_t stride)
2427 {
2429 }
2430 
2431 void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
2432  ptrdiff_t stride)
2433 {
2434  uint8_t halfH[16];
2435  uint8_t halfHV[16];
2438  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2439 }
2440 
2441 void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
2442  ptrdiff_t stride)
2443 {
2444  uint8_t halfH[16];
2445  uint8_t halfHV[16];
2448  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2449 }
2450 
2451 void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
2452  ptrdiff_t stride)
2453 {
2454  uint8_t full[36];
2455  uint8_t * const full_mid= full + 8;
2456  uint8_t halfV[16];
2457  uint8_t halfHV[16];
2458  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2459  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2461  ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2462 }
2463 
2464 void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
2465  ptrdiff_t stride)
2466 {
2467  uint8_t full[36];
2468  uint8_t * const full_mid= full + 8;
2469  uint8_t halfV[16];
2470  uint8_t halfHV[16];
2471  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2472  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2474  ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2475 }
2476 
2477 //DEF_H264_MC_MMI(put_, 8)
2478 void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
2479  ptrdiff_t stride)
2480 {
2481  ff_put_pixels8_8_mmi(dst, src, stride, 8);
2482 }
2483 
2484 void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
2485  ptrdiff_t stride)
2486 {
2487  uint8_t half[64];
2489  ff_put_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8);
2490 }
2491 
2492 void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
2493  ptrdiff_t stride)
2494 {
2496 }
2497 
2498 void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
2499  ptrdiff_t stride)
2500 {
2501  uint8_t half[64];
2503  ff_put_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8);
2504 }
2505 
2506 void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
2507  ptrdiff_t stride)
2508 {
2509  uint8_t full[104];
2510  uint8_t * const full_mid= full + 16;
2511  uint8_t half[64];
2512  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2513  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2514  ff_put_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2515 }
2516 
2517 void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
2518  ptrdiff_t stride)
2519 {
2520  uint8_t full[104];
2521  uint8_t * const full_mid= full + 16;
2522  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2523  put_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2524 }
2525 
2526 void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
2527  ptrdiff_t stride)
2528 {
2529  uint8_t full[104];
2530  uint8_t * const full_mid= full + 16;
2531  uint8_t half[64];
2532  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2533  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2534  ff_put_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2535 }
2536 
2537 void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
2538  ptrdiff_t stride)
2539 {
2540  uint8_t full[104];
2541  uint8_t * const full_mid= full + 16;
2542  uint8_t halfH[64];
2543  uint8_t halfV[64];
2545  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2546  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2547  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2548 }
2549 
2550 void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
2551  ptrdiff_t stride)
2552 {
2553  uint8_t full[104];
2554  uint8_t * const full_mid= full + 16;
2555  uint8_t halfH[64];
2556  uint8_t halfV[64];
2558  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2559  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2560  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2561 }
2562 
2563 void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
2564  ptrdiff_t stride)
2565 {
2566  uint8_t full[104];
2567  uint8_t * const full_mid= full + 16;
2568  uint8_t halfH[64];
2569  uint8_t halfV[64];
2571  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2572  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2573  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2574 }
2575 
2576 void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
2577  ptrdiff_t stride)
2578 {
2579  uint8_t full[104];
2580  uint8_t * const full_mid= full + 16;
2581  uint8_t halfH[64];
2582  uint8_t halfV[64];
2584  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2585  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2586  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2587 }
2588 
2589 void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
2590  ptrdiff_t stride)
2591 {
2592  uint16_t __attribute__ ((aligned(8))) temp[192];
2593 
2595 }
2596 
2597 void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
2598  ptrdiff_t stride)
2599 {
2600  uint8_t __attribute__ ((aligned(8))) temp[448];
2601  uint8_t *const halfHV = temp;
2602  int16_t *const halfV = (int16_t *) (temp + 64);
2603 
2604  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2605  put_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8);
2606 }
2607 
2608 void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
2609  ptrdiff_t stride)
2610 {
2611  uint8_t __attribute__ ((aligned(8))) temp[448];
2612  uint8_t *const halfHV = temp;
2613  int16_t *const halfV = (int16_t *) (temp + 64);
2614 
2615  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2616  put_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8);
2617 }
2618 
2619 void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
2620  ptrdiff_t stride)
2621 {
2622  uint8_t __attribute__ ((aligned(8))) temp[448];
2623  uint8_t *const halfHV = temp;
2624  int16_t *const halfV = (int16_t *) (temp + 64);
2625 
2626  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2627  put_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2628 }
2629 
2630 void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
2631  ptrdiff_t stride)
2632 {
2633  uint8_t __attribute__ ((aligned(8))) temp[448];
2634  uint8_t *const halfHV = temp;
2635  int16_t *const halfV = (int16_t *) (temp + 64);
2636 
2637  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2638  put_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2639 }
2640 
2641 //DEF_H264_MC_MMI(avg_, 8)
2642 void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
2643  ptrdiff_t stride)
2644 {
2645  ff_avg_pixels8_8_mmi(dst, src, stride, 8);
2646 }
2647 
2648 void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
2649  ptrdiff_t stride)
2650 {
2651  uint8_t half[64];
2653  ff_avg_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8);
2654 }
2655 
2656 void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
2657  ptrdiff_t stride)
2658 {
2660 }
2661 
2662 void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
2663  ptrdiff_t stride)
2664 {
2665  uint8_t half[64];
2667  ff_avg_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8);
2668 }
2669 
2670 void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
2671  ptrdiff_t stride)
2672 {
2673  uint8_t full[104];
2674  uint8_t * const full_mid= full + 16;
2675  uint8_t half[64];
2676  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2677  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2678  ff_avg_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2679 }
2680 
2681 void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
2682  ptrdiff_t stride)
2683 {
2684  uint8_t full[104];
2685  uint8_t * const full_mid= full + 16;
2686  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2687  avg_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2688 }
2689 
2690 void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
2691  ptrdiff_t stride)
2692 {
2693  uint8_t full[104];
2694  uint8_t * const full_mid= full + 16;
2695  uint8_t half[64];
2696  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2697  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2698  ff_avg_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2699 }
2700 
2701 void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
2702  ptrdiff_t stride)
2703 {
2704  uint8_t full[104];
2705  uint8_t * const full_mid= full + 16;
2706  uint8_t halfH[64];
2707  uint8_t halfV[64];
2709  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2710  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2711  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2712 }
2713 
2714 void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
2715  ptrdiff_t stride)
2716 {
2717  uint8_t full[104];
2718  uint8_t * const full_mid= full + 16;
2719  uint8_t halfH[64];
2720  uint8_t halfV[64];
2722  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2723  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2724  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2725 }
2726 
2727 void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
2728  ptrdiff_t stride)
2729 {
2730  uint8_t full[104];
2731  uint8_t * const full_mid= full + 16;
2732  uint8_t halfH[64];
2733  uint8_t halfV[64];
2735  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2736  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2737  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2738 }
2739 
2740 void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
2741  ptrdiff_t stride)
2742 {
2743  uint8_t full[104];
2744  uint8_t * const full_mid= full + 16;
2745  uint8_t halfH[64];
2746  uint8_t halfV[64];
2748  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2749  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2750  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2751 }
2752 
2753 void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
2754  ptrdiff_t stride)
2755 {
2756  uint16_t __attribute__ ((aligned(8))) temp[192];
2757 
2759 }
2760 
2761 void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
2762  ptrdiff_t stride)
2763 {
2764  uint8_t __attribute__ ((aligned(8))) temp[448];
2765  uint8_t *const halfHV = temp;
2766  int16_t *const halfV = (int16_t *) (temp + 64);
2767 
2768  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2769  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8);
2770 }
2771 
2772 void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
2773  ptrdiff_t stride)
2774 {
2775  uint8_t __attribute__ ((aligned(8))) temp[448];
2776  uint8_t *const halfHV = temp;
2777  int16_t *const halfV = (int16_t *) (temp + 64);
2778 
2779  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2780  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8);
2781 }
2782 
2783 void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
2784  ptrdiff_t stride)
2785 {
2786  uint8_t __attribute__ ((aligned(8))) temp[448];
2787  uint8_t *const halfHV = temp;
2788  int16_t *const halfV = (int16_t *) (temp + 64);
2789 
2790  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2791  avg_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2792 }
2793 
2794 void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
2795  ptrdiff_t stride)
2796 {
2797  uint8_t __attribute__ ((aligned(8))) temp[448];
2798  uint8_t *const halfHV = temp;
2799  int16_t *const halfV = (int16_t *) (temp + 64);
2800 
2801  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2802  avg_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2803 }
2804 
2805 //DEF_H264_MC_MMI(put_, 16)
2806 void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
2807  ptrdiff_t stride)
2808 {
2809  ff_put_pixels16_8_mmi(dst, src, stride, 16);
2810 }
2811 
2812 void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
2813  ptrdiff_t stride)
2814 {
2815  uint8_t half[256];
2817  ff_put_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16);
2818 }
2819 
2820 void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
2821  ptrdiff_t stride)
2822 {
2824 }
2825 
2826 void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
2827  ptrdiff_t stride)
2828 {
2829  uint8_t half[256];
2831  ff_put_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16);
2832 }
2833 
2834 void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
2835  ptrdiff_t stride)
2836 {
2837  uint8_t full[336];
2838  uint8_t * const full_mid= full + 32;
2839  uint8_t half[256];
2840  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2841  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2842  ff_put_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
2843 }
2844 
2845 void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
2846  ptrdiff_t stride)
2847 {
2848  uint8_t full[336];
2849  uint8_t * const full_mid= full + 32;
2850  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2851  put_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
2852 }
2853 
2854 void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
2855  ptrdiff_t stride)
2856 {
2857  uint8_t full[336];
2858  uint8_t * const full_mid= full + 32;
2859  uint8_t half[256];
2860  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2861  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2862  ff_put_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
2863 }
2864 
2865 void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
2866  ptrdiff_t stride)
2867 {
2868  uint8_t full[336];
2869  uint8_t * const full_mid= full + 32;
2870  uint8_t halfH[256];
2871  uint8_t halfV[256];
2873  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2874  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2875  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2876 }
2877 
2878 void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
2879  ptrdiff_t stride)
2880 {
2881  uint8_t full[336];
2882  uint8_t * const full_mid= full + 32;
2883  uint8_t halfH[256];
2884  uint8_t halfV[256];
2886  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2887  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2888  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2889 }
2890 
2891 void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
2892  ptrdiff_t stride)
2893 {
2894  uint8_t full[336];
2895  uint8_t * const full_mid= full + 32;
2896  uint8_t halfH[256];
2897  uint8_t halfV[256];
2899  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2900  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2901  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2902 }
2903 
2904 void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
2905  ptrdiff_t stride)
2906 {
2907  uint8_t full[336];
2908  uint8_t * const full_mid= full + 32;
2909  uint8_t halfH[256];
2910  uint8_t halfV[256];
2912  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2913  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2914  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2915 }
2916 
2917 void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
2918  ptrdiff_t stride)
2919 {
2920  uint16_t __attribute__ ((aligned(8))) temp[384];
2921 
2923 }
2924 
2925 void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
2926  ptrdiff_t stride)
2927 {
2928  uint8_t __attribute__ ((aligned(8))) temp[1024];
2929  uint8_t *const halfHV = temp;
2930  int16_t *const halfV = (int16_t *) (temp + 256);
2931 
2932  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2933  put_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16);
2934 }
2935 
2936 void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
2937  ptrdiff_t stride)
2938 {
2939  uint8_t __attribute__ ((aligned(8))) temp[1024];
2940  uint8_t *const halfHV = temp;
2941  int16_t *const halfV = (int16_t *) (temp + 256);
2942 
2943  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2944  put_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16);
2945 }
2946 
2947 void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
2948  ptrdiff_t stride)
2949 {
2950  uint8_t __attribute__ ((aligned(8))) temp[1024];
2951  uint8_t *const halfHV = temp;
2952  int16_t *const halfV = (int16_t *) (temp + 256);
2953 
2954  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2955  put_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
2956 }
2957 
2958 void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
2959  ptrdiff_t stride)
2960 {
2961  uint8_t __attribute__ ((aligned(8))) temp[1024];
2962  uint8_t *const halfHV = temp;
2963  int16_t *const halfV = (int16_t *) (temp + 256);
2964 
2965  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2966  put_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
2967 }
2968 
2969 //DEF_H264_MC_MMI(avg_, 16)
2970 void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
2971  ptrdiff_t stride)
2972 {
2973  ff_avg_pixels16_8_mmi(dst, src, stride, 16);
2974 }
2975 
2976 void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
2977  ptrdiff_t stride)
2978 {
2979  uint8_t half[256];
2981  ff_avg_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16);
2982 }
2983 
2984 void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
2985  ptrdiff_t stride)
2986 {
2988 }
2989 
2990 void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
2991  ptrdiff_t stride)
2992 {
2993  uint8_t half[256];
2995  ff_avg_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16);
2996 }
2997 
2998 void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
2999  ptrdiff_t stride)
3000 {
3001  uint8_t full[336];
3002  uint8_t * const full_mid= full + 32;
3003  uint8_t half[256];
3004  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3005  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3006  ff_avg_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
3007 }
3008 
3009 void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
3010  ptrdiff_t stride)
3011 {
3012  uint8_t full[336];
3013  uint8_t * const full_mid= full + 32;
3014  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3015  avg_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
3016 }
3017 
3018 void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
3019  ptrdiff_t stride)
3020 {
3021  uint8_t full[336];
3022  uint8_t * const full_mid= full + 32;
3023  uint8_t half[256];
3024  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3025  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3026  ff_avg_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
3027 }
3028 
3029 void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
3030  ptrdiff_t stride)
3031 {
3032  uint8_t full[336];
3033  uint8_t * const full_mid= full + 32;
3034  uint8_t halfH[256];
3035  uint8_t halfV[256];
3037  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3038  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3039  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3040 }
3041 
3042 void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
3043  ptrdiff_t stride)
3044 {
3045  uint8_t full[336];
3046  uint8_t * const full_mid= full + 32;
3047  uint8_t halfH[256];
3048  uint8_t halfV[256];
3050  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3051  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3052  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3053 }
3054 
3055 void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
3056  ptrdiff_t stride)
3057 {
3058  uint8_t full[336];
3059  uint8_t * const full_mid= full + 32;
3060  uint8_t halfH[256];
3061  uint8_t halfV[256];
3063  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3064  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3065  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3066 }
3067 
3068 void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
3069  ptrdiff_t stride)
3070 {
3071  uint8_t full[336];
3072  uint8_t * const full_mid= full + 32;
3073  uint8_t halfH[256];
3074  uint8_t halfV[256];
3076  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3077  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3078  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3079 }
3080 
3081 void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
3082  ptrdiff_t stride)
3083 {
3084  uint16_t __attribute__ ((aligned(8))) temp[384];
3085 
3087 }
3088 
3089 void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
3090  ptrdiff_t stride)
3091 {
3092  uint8_t __attribute__ ((aligned(8))) temp[1024];
3093  uint8_t *const halfHV = temp;
3094  int16_t *const halfV = (int16_t *) (temp + 256);
3095 
3096  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3097  avg_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16);
3098 }
3099 
3100 void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
3101  ptrdiff_t stride)
3102 {
3103  uint8_t __attribute__ ((aligned(8))) temp[1024];
3104  uint8_t *const halfHV = temp;
3105  int16_t *const halfV = (int16_t *) (temp + 256);
3106 
3107  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3108  avg_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16);
3109 }
3110 
3111 void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
3112  ptrdiff_t stride)
3113 {
3114  uint8_t __attribute__ ((aligned(8))) temp[1024];
3115  uint8_t *const halfHV = temp;
3116  int16_t *const halfV = (int16_t *) (temp + 256);
3117 
3118  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3119  avg_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
3120 }
3121 
3122 void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
3123  ptrdiff_t stride)
3124 {
3125  uint8_t __attribute__ ((aligned(8))) temp[1024];
3126  uint8_t *const halfHV = temp;
3127  int16_t *const halfV = (int16_t *) (temp + 256);
3128 
3129  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3130  avg_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
3131 }
3132 
3133 #undef op2_avg
3134 #undef op2_put
op2_avg
#define op2_avg(a, b)
Definition: h264qpel_mmi.c:107
ff_put_h264_qpel8_mc13_mmi
void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2563
ff_put_h264_qpel16_mc03_mmi
void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2854
ff_put_h264_qpel4_mc23_mmi
void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2277
ff_avg_h264_qpel8_mc22_mmi
void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2753
ff_avg_h264_qpel4_mc02_mmi
void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2353
ff_put_pixels4_l2_8_mmi
void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:303
put_h264_qpel4_v_lowpass_mmi
static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:385
avg_h264_qpel16_hv_lowpass_mmi
static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
Definition: h264qpel_mmi.c:1987
ff_avg_h264_qpel8_mc11_mmi
void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2701
ff_avg_h264_qpel8_mc31_mmi
void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2714
ff_avg_h264_qpel8_mc10_mmi
void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2648
put_h264_qpel8or16_hv1_lowpass_mmi
static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp, const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
Definition: h264qpel_mmi.c:1334
ff_avg_h264_qpel8_mc30_mmi
void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2662
ff_avg_h264_qpel16_mc11_mmi
void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3029
ff_put_h264_qpel4_mc33_mmi
void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2248
put_h264_qpel8_h_lowpass_l2_mmi
static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
Definition: h264qpel_mmi.c:1674
ff_put_h264_qpel4_mc20_mmi
void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2164
ff_put_h264_qpel16_mc23_mmi
void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2936
DECLARE_VAR_LOW32
#define DECLARE_VAR_LOW32
Definition: mmiutils.h:37
avg_h264_qpel8_h_lowpass_mmi
static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:302
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
w
uint8_t w
Definition: llviddspenc.c:38
ff_put_h264_qpel8_mc00_mmi
void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2478
avg_h264_qpel8or16_hv_lowpass_mmi
static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
Definition: h264qpel_mmi.c:1971
ff_put_h264_qpel16_mc12_mmi
void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2947
b
#define b
Definition: input.c:34
half
static uint8_t half(int a, int b)
Definition: mobiclip.c:541
ff_avg_h264_qpel4_mc11_mmi
void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2373
ff_put_h264_qpel4_mc13_mmi
void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2235
mips_reg
#define mips_reg
Definition: asmdefs.h:46
ff_put_h264_qpel8_mc02_mmi
void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2517
ff_avg_h264_qpel16_mc31_mmi
void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3042
ff_avg_pixels8_8_mmi
void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_avg_h264_qpel4_mc10_mmi
void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2320
ff_put_h264_qpel4_mc32_mmi
void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2300
avg_h264_qpel16_h_lowpass_l2_mmi
static void avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
Definition: h264qpel_mmi.c:2073
PTR_ADDI
#define PTR_ADDI
Definition: asmdefs.h:51
put_h264_qpel8_hv_lowpass_mmi
static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
Definition: h264qpel_mmi.c:1658
ff_put_h264_qpel8_mc33_mmi
void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2576
ff_avg_pixels4_l2_8_mmi
void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:487
ff_put_h264_qpel4_mc02_mmi
void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2189
ff_put_h264_qpel4_mc12_mmi
void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2287
ff_avg_h264_qpel16_mc32_mmi
void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3122
ff_put_h264_qpel16_mc31_mmi
void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2878
ff_avg_h264_qpel16_mc21_mmi
void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3089
ff_avg_h264_qpel8_mc33_mmi
void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2740
put_h264_qpel4_h_lowpass_mmi
static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:109
avg_h264_qpel16_h_lowpass_mmi
static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:374
INIT_CLIP
#define INIT_CLIP
Definition: bit_depth_template.c:87
mmiutils.h
ff_avg_h264_qpel4_mc00_mmi
void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2314
ff_put_h264_qpel16_mc01_mmi
void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2834
ff_avg_h264_qpel4_mc32_mmi
void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2464
ff_put_h264_qpel8_mc31_mmi
void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2550
aligned
static int aligned(int val)
Definition: dashdec.c:168
ff_put_pixels8_8_mmi
void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_avg_h264_qpel16_mc10_mmi
void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2976
avg_h264_qpel8_h_lowpass_l2_mmi
static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
Definition: h264qpel_mmi.c:1995
ff_avg_h264_qpel4_mc12_mmi
void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2451
ff_avg_h264_qpel8_mc32_mmi
void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2794
ff_avg_h264_qpel16_mc23_mmi
void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3100
ff_pw_20
const union av_intfloat64 ff_pw_20
Definition: constants.c:39
put_h264_qpel8or16_hv2_lowpass_mmi
static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
Definition: h264qpel_mmi.c:1579
ff_avg_h264_qpel4_mc23_mmi
void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2441
put_h264_qpel8or16_hv_lowpass_mmi
static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
Definition: h264qpel_mmi.c:1650
ff_put_h264_qpel4_mc30_mmi
void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2170
h264dsp_mips.h
ff_put_h264_qpel4_mc01_mmi
void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2178
ff_avg_h264_qpel16_mc12_mmi
void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3111
ff_put_h264_qpel4_mc11_mmi
void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2209
avg_h264_qpel8_v_lowpass_mmi
static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:917
put_h264_qpel4_hv_lowpass_mmi
static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:1258
ff_avg_h264_qpel8_mc13_mmi
void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2727
ff_avg_h264_qpel4_mc30_mmi
void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2334
put_h264_qpel16_hv_lowpass_mmi
static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
Definition: h264qpel_mmi.c:1666
bit_depth_template.c
avg_h264_qpel8or16_hv2_lowpass_mmi
static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
Definition: h264qpel_mmi.c:1898
ff_avg_h264_qpel16_mc03_mmi
void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3018
copy_block16_mmi
static void copy_block16_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:77
put_h264_qpel16_h_lowpass_mmi
static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:234
ff_put_h264_qpel8_mc12_mmi
void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2619
ff_avg_h264_qpel4_mc31_mmi
void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2386
put_h264_qpel16_v_lowpass_mmi
static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:792
ff_avg_h264_qpel8_mc01_mmi
void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2670
ff_avg_h264_qpel4_mc01_mmi
void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2342
ff_put_h264_qpel16_mc00_mmi
void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2806
ff_avg_h264_qpel16_mc20_mmi
void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2984
ff_avg_h264_qpel8_mc12_mmi
void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2783
ff_put_h264_qpel8_mc11_mmi
void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2537
hpeldsp_mips.h
ff_avg_pixels16_l2_8_mmi
void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:605
ff_put_h264_qpel8_mc32_mmi
void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2630
avg_pixels8_l2_shift5_mmi
static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
Definition: h264qpel_mmi.c:2089
avg_h264_qpel16_v_lowpass_mmi
static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:1247
size
int size
Definition: twinvq_data.h:10344
ff_avg_h264_qpel16_mc13_mmi
void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3055
avg_h264_qpel4_h_lowpass_mmi
static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:245
ff_put_h264_qpel4_mc10_mmi
void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2156
ff_avg_h264_qpel8_mc03_mmi
void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2690
DECLARE_VAR_ALL64
#define DECLARE_VAR_ALL64
Definition: mmiutils.h:39
ff_put_h264_qpel4_mc03_mmi
void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2198
ff_put_h264_qpel16_mc11_mmi
void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2865
put_pixels8_l2_shift5_mmi
static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
Definition: h264qpel_mmi.c:1750
put_h264_qpel8_v_lowpass_mmi
static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:494
ff_avg_h264_qpel16_mc22_mmi
void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3081
avg_h264_qpel8_hv_lowpass_mmi
static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
Definition: h264qpel_mmi.c:1979
ff_avg_h264_qpel16_mc33_mmi
void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3068
ff_avg_h264_qpel8_mc02_mmi
void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2681
ff_put_h264_qpel8_mc10_mmi
void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2484
ff_avg_h264_qpel4_mc20_mmi
void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2328
ff_put_h264_qpel8_mc01_mmi
void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2506
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
src2
const pixel * src2
Definition: h264pred_template.c:422
avg_pixels16_l2_shift5_mmi
static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
Definition: h264qpel_mmi.c:2141
ff_avg_h264_qpel16_mc00_mmi
void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2970
ff_put_h264_qpel16_mc33_mmi
void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2904
put_pixels16_l2_shift5_mmi
static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
Definition: h264qpel_mmi.c:1814
avg_h264_qpel4_v_lowpass_mmi
static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:803
ff_avg_h264_qpel4_mc22_mmi
void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2425
put_h264_qpel8_h_lowpass_mmi
static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:164
copy_block8_mmi
static void copy_block8_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:53
av_intfloat64::f
double f
Definition: intfloat.h:34
ff_put_h264_qpel8_mc22_mmi
void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2589
stride
#define stride
Definition: h264pred_template.c:537
ff_avg_pixels8_l2_8_mmi
void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:533
ff_avg_h264_qpel16_mc02_mmi
void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3009
copy_block4_mmi
static void copy_block4_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:29
avg_h264_qpel4_hv_lowpass_mmi
static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:1822
ff_avg_pixels16_8_mmi
void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
ff_put_h264_qpel4_mc00_mmi
void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2150
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:49
put_h264_qpel16_h_lowpass_l2_mmi
static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
Definition: h264qpel_mmi.c:1798
ff_put_h264_qpel4_mc22_mmi
void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2261
ff_avg_h264_qpel16_mc01_mmi
void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2998
ff_put_h264_qpel8_mc21_mmi
void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2597
RESTRICT_ASM_LOW32
#define RESTRICT_ASM_LOW32
Definition: mmiutils.h:38
DECLARE_VAR_ADDRT
#define DECLARE_VAR_ADDRT
Definition: mmiutils.h:41
ff_put_h264_qpel8_mc30_mmi
void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2498
ff_put_pixels4_8_mmi
void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_avg_h264_qpel8_mc21_mmi
void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2761
ff_put_h264_qpel16_mc13_mmi
void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2891
ff_put_h264_qpel4_mc21_mmi
void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2267
ff_avg_h264_qpel4_mc03_mmi
void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2362
ff_avg_h264_qpel16_mc30_mmi
void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2990
op2_put
#define op2_put(a, b)
Definition: h264qpel_mmi.c:108
temp
else temp
Definition: vf_mcdeint.c:248
ff_put_h264_qpel8_mc03_mmi
void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2526
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
ff_put_h264_qpel8_mc20_mmi
void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2492
ff_put_h264_qpel8_mc23_mmi
void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2608
ff_avg_h264_qpel8_mc20_mmi
void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2656
ff_put_h264_qpel16_mc10_mmi
void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2812
ff_put_h264_qpel16_mc30_mmi
void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2826
ff_avg_h264_qpel4_mc33_mmi
void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2412
ff_put_h264_qpel16_mc21_mmi
void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2925
ff_avg_h264_qpel4_mc21_mmi
void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2431
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_put_pixels16_8_mmi
void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_put_h264_qpel16_mc22_mmi
void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2917
ff_put_h264_qpel16_mc20_mmi
void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2820
h
h
Definition: vp9dsp_template.c:2038
ff_avg_h264_qpel4_mc13_mmi
void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2399
ff_put_h264_qpel4_mc31_mmi
void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2222
ff_pw_5
const union av_intfloat64 ff_pw_5
Definition: constants.c:29
RESTRICT_ASM_ADDRT
#define RESTRICT_ASM_ADDRT
Definition: mmiutils.h:42
RESTRICT_ASM_ALL64
#define RESTRICT_ASM_ALL64
Definition: mmiutils.h:40
ff_avg_h264_qpel8_mc23_mmi
void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2772
ff_put_h264_qpel16_mc02_mmi
void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2845
ff_put_h264_qpel16_mc32_mmi
void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2958
ff_pw_16
const union av_intfloat64 ff_pw_16
Definition: constants.c:36
ff_put_pixels16_l2_8_mmi
void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:408
ff_avg_pixels4_8_mmi
void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_avg_h264_qpel8_mc00_mmi
void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2642
ff_put_pixels8_l2_8_mmi
void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:347