FFmpeg
h264dsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264dsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  * Heiher <r@hev.cc>
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
27 #include "h264dsp_mips.h"
29 
30 void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
31 {
32  double ftmp[9];
33  DECLARE_VAR_LOW32;
34 
35  __asm__ volatile (
36  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
37  MMI_LDC1(%[ftmp1], %[src], 0x00)
38  MMI_LDC1(%[ftmp2], %[src], 0x08)
39  MMI_LDC1(%[ftmp3], %[src], 0x10)
40  MMI_LDC1(%[ftmp4], %[src], 0x18)
41  MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
42  MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
43  MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
44  MMI_ULWC1(%[ftmp8], %[dst3], 0x00)
45  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
46  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
47  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
48  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
49  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
50  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
51  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
52  "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
53  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
54  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
55  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
56  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
57  MMI_SWC1(%[ftmp1], %[dst0], 0x00)
58  MMI_SWC1(%[ftmp2], %[dst1], 0x00)
59  MMI_SWC1(%[ftmp3], %[dst2], 0x00)
60  MMI_SWC1(%[ftmp4], %[dst3], 0x00)
61 
62  /* memset(src, 0, 32); */
63  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
64  "gssqc1 %[ftmp0], %[ftmp0], 0x00(%[src]) \n\t"
65  "gssqc1 %[ftmp0], %[ftmp0], 0x10(%[src]) \n\t"
66  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
67  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
68  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
69  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
70  RESTRICT_ASM_LOW32
71  [ftmp8]"=&f"(ftmp[8])
72  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
73  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
74  [src]"r"(src)
75  : "memory"
76  );
77 
78 }
79 
80 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
81 {
82  double ftmp[12];
83  uint64_t tmp[1];
84  DECLARE_VAR_LOW32;
85  DECLARE_VAR_ADDRT;
86 
87  __asm__ volatile (
88  "dli %[tmp0], 0x01 \n\t"
89  MMI_LDC1(%[ftmp0], %[block], 0x00)
90  "mtc1 %[tmp0], %[ftmp8] \n\t"
91  MMI_LDC1(%[ftmp1], %[block], 0x08)
92  "dli %[tmp0], 0x06 \n\t"
93  MMI_LDC1(%[ftmp2], %[block], 0x10)
94  "mtc1 %[tmp0], %[ftmp9] \n\t"
95  "psrah %[ftmp4], %[ftmp1], %[ftmp8] \n\t"
96  MMI_LDC1(%[ftmp3], %[block], 0x18)
97  "psrah %[ftmp5], %[ftmp3], %[ftmp8] \n\t"
98  "psubh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
99  "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
100  "paddh %[ftmp10], %[ftmp2], %[ftmp0] \n\t"
101  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
102  "paddh %[ftmp11], %[ftmp5], %[ftmp10] \n\t"
103  "psubh %[ftmp2], %[ftmp10], %[ftmp5] \n\t"
104  "paddh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
105  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
106  "punpckhhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
107  "punpcklhw %[ftmp5], %[ftmp11], %[ftmp10] \n\t"
108  "punpckhhw %[ftmp4], %[ftmp0], %[ftmp2] \n\t"
109  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
110  "punpckhwd %[ftmp2], %[ftmp5], %[ftmp0] \n\t"
111  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
112  "punpcklwd %[ftmp10], %[ftmp1], %[ftmp4] \n\t"
113  "punpckhwd %[ftmp0], %[ftmp1], %[ftmp4] \n\t"
114  "paddh %[ftmp5], %[ftmp5], %[ff_pw_32] \n\t"
115  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
116  "psrah %[ftmp3], %[ftmp0], %[ftmp8] \n\t"
117  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
118  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
119  "paddh %[ftmp1], %[ftmp10], %[ftmp5] \n\t"
120  "psubh %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
121  "paddh %[ftmp10], %[ftmp3], %[ftmp1] \n\t"
122  "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
123  "paddh %[ftmp11], %[ftmp4], %[ftmp5] \n\t"
124  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
125  "psubh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
126  MMI_SDC1(%[ftmp7], %[block], 0x00)
127  MMI_SDC1(%[ftmp7], %[block], 0x08)
128  MMI_SDC1(%[ftmp7], %[block], 0x10)
129  MMI_SDC1(%[ftmp7], %[block], 0x18)
130  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
131  "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
132  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
133  "psrah %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
134  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
135  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
136  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
137  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
138  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
139  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
140  MMI_SWC1(%[ftmp2], %[dst], 0x00)
141  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
142  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
143  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
144  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
145  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
146  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
147  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
148  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
149  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
150  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
151  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
152  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
153  MMI_SWC1(%[ftmp2], %[dst], 0x00)
154  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
155  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
156 
157  /* memset(block, 0, 32) */
158  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
159  "gssqc1 %[ftmp0], %[ftmp0], 0x00(%[block]) \n\t"
160  "gssqc1 %[ftmp0], %[ftmp0], 0x10(%[block]) \n\t"
161  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
162  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
163  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
164  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
165  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
166  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
167  RESTRICT_ASM_LOW32
168  RESTRICT_ASM_ADDRT
169  [tmp0]"=&r"(tmp[0])
170  : [dst]"r"(dst), [block]"r"(block),
171  [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32)
172  : "memory"
173  );
174 
175 }
176 
177 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
178 {
179  double ftmp[16];
180  uint64_t tmp[7];
181  mips_reg addr[1];
182  DECLARE_VAR_LOW32;
183  DECLARE_VAR_ADDRT;
184 
185  __asm__ volatile (
186  "lhu %[tmp0], 0x00(%[block]) \n\t"
187  PTR_ADDI "$29, $29, -0x20 \n\t"
188  PTR_ADDIU "%[tmp0], %[tmp0], 0x20 \n\t"
189  MMI_LDC1(%[ftmp1], %[block], 0x10)
190  "sh %[tmp0], 0x00(%[block]) \n\t"
191  MMI_LDC1(%[ftmp2], %[block], 0x20)
192  "dli %[tmp0], 0x01 \n\t"
193  MMI_LDC1(%[ftmp3], %[block], 0x30)
194  "mtc1 %[tmp0], %[ftmp8] \n\t"
195  MMI_LDC1(%[ftmp5], %[block], 0x50)
196  MMI_LDC1(%[ftmp6], %[block], 0x60)
197  MMI_LDC1(%[ftmp7], %[block], 0x70)
198  "mov.d %[ftmp0], %[ftmp1] \n\t"
199  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
200  "psrah %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
201  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
202  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
203  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
204  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
205  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
206  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
207  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
208  "psubh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
209  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
210  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
211  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
212  "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
213  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
214  "dli %[tmp0], 0x02 \n\t"
215  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
216  "mtc1 %[tmp0], %[ftmp9] \n\t"
217  "mov.d %[ftmp7], %[ftmp1] \n\t"
218  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
219  "psrah %[ftmp3], %[ftmp4], %[ftmp9] \n\t"
220  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
221  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
222  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
223  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
224  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
225  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
226  "mov.d %[ftmp5], %[ftmp6] \n\t"
227  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
228  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
229  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
230  "psubh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
231  MMI_LDC1(%[ftmp2], %[block], 0x00)
232  MMI_LDC1(%[ftmp5], %[block], 0x40)
233  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
234  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
235  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
236  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
237  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
238  "paddh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
239  "psubh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
240  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
241  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
242  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
243  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
244  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
245  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
246  "paddh %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
247  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
248  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
249  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
250  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
251  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
252  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
253  MMI_SDC1(%[ftmp6], %[block], 0x00)
254  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
255  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
256  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
257  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp1] \n\t"
258  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
259  "punpckhwd %[ftmp1], %[ftmp7], %[ftmp3] \n\t"
260  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
261  "punpckhwd %[ftmp3], %[ftmp6], %[ftmp0] \n\t"
262  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
263  MMI_LDC1(%[ftmp0], %[block], 0x00)
264  MMI_SDC1(%[ftmp7], $29, 0x00)
265  MMI_SDC1(%[ftmp1], $29, 0x10)
266  "dmfc1 %[tmp1], %[ftmp6] \n\t"
267  "dmfc1 %[tmp3], %[ftmp3] \n\t"
268  "punpckhhw %[ftmp3], %[ftmp5], %[ftmp2] \n\t"
269  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
270  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp0] \n\t"
271  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
272  "punpckhwd %[ftmp0], %[ftmp5], %[ftmp4] \n\t"
273  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
274  "punpckhwd %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
275  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
276  MMI_SDC1(%[ftmp5], $29, 0x08)
277  MMI_SDC1(%[ftmp0], $29, 0x18)
278  "dmfc1 %[tmp2], %[ftmp3] \n\t"
279  "dmfc1 %[tmp4], %[ftmp4] \n\t"
280  MMI_LDC1(%[ftmp1], %[block], 0x18)
281  MMI_LDC1(%[ftmp6], %[block], 0x28)
282  MMI_LDC1(%[ftmp2], %[block], 0x38)
283  MMI_LDC1(%[ftmp0], %[block], 0x58)
284  MMI_LDC1(%[ftmp3], %[block], 0x68)
285  MMI_LDC1(%[ftmp4], %[block], 0x78)
286  "mov.d %[ftmp7], %[ftmp1] \n\t"
287  "psrah %[ftmp5], %[ftmp0], %[ftmp8] \n\t"
288  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
289  "paddh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
290  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
291  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
292  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
293  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
294  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
295  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
296  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
297  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
298  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
299  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
300  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
301  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
302  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
303  "mov.d %[ftmp4], %[ftmp1] \n\t"
304  "psrah %[ftmp2], %[ftmp5], %[ftmp9] \n\t"
305  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
306  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
307  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
308  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
309  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
310  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
311  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
312  "mov.d %[ftmp0], %[ftmp3] \n\t"
313  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
314  "psrah %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
315  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
316  "psubh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
317  MMI_LDC1(%[ftmp6], %[block], 0x08)
318  MMI_LDC1(%[ftmp0], %[block], 0x48)
319  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
320  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
321  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
322  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
323  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
324  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
325  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
326  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
327  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
328  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
329  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
330  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
331  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
332  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
333  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
334  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
335  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
336  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
337  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
338  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
339  MMI_SDC1(%[ftmp3], %[block], 0x08)
340  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
341  "punpckhhw %[ftmp3], %[ftmp4], %[ftmp7] \n\t"
342  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
343  "punpckhhw %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
344  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
345  "punpckhwd %[ftmp1], %[ftmp4], %[ftmp2] \n\t"
346  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
347  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t"
348  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
349  MMI_LDC1(%[ftmp7], %[block], 0x08)
350  "dmfc1 %[tmp5], %[ftmp4] \n\t"
351  "mov.d %[ftmp10], %[ftmp1] \n\t"
352  "mov.d %[ftmp12], %[ftmp3] \n\t"
353  "mov.d %[ftmp14], %[ftmp2] \n\t"
354  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp6] \n\t"
355  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
356  "punpckhhw %[ftmp6], %[ftmp5], %[ftmp7] \n\t"
357  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
358  "punpckhwd %[ftmp7], %[ftmp0], %[ftmp5] \n\t"
359  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
360  "punpckhwd %[ftmp5], %[ftmp2], %[ftmp6] \n\t"
361  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
362  "dmfc1 %[tmp6], %[ftmp0] \n\t"
363  "mov.d %[ftmp11], %[ftmp7] \n\t"
364  "mov.d %[ftmp13], %[ftmp2] \n\t"
365  "mov.d %[ftmp15], %[ftmp5] \n\t"
366  PTR_ADDIU "%[addr0], %[dst], 0x04 \n\t"
367  "mov.d %[ftmp7], %[ftmp10] \n\t"
368  "dmtc1 %[tmp3], %[ftmp6] \n\t"
369  MMI_LDC1(%[ftmp1], $29, 0x10)
370  "dmtc1 %[tmp1], %[ftmp3] \n\t"
371  "mov.d %[ftmp4], %[ftmp1] \n\t"
372  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
373  "psrah %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
374  "paddh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
375  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
376  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
377  "paddh %[ftmp0], %[ftmp0], %[ftmp14] \n\t"
378  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
379  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
380  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
381  "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
382  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
383  "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
384  "psubh %[ftmp7], %[ftmp7], %[ftmp14] \n\t"
385  "psrah %[ftmp5], %[ftmp14], %[ftmp8] \n\t"
386  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
387  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
388  "mov.d %[ftmp5], %[ftmp1] \n\t"
389  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
390  "psrah %[ftmp6], %[ftmp0], %[ftmp9] \n\t"
391  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
392  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
393  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
394  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
395  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
396  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
397  "mov.d %[ftmp7], %[ftmp12] \n\t"
398  "psrah %[ftmp2], %[ftmp12], %[ftmp8] \n\t"
399  "psrah %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
400  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
401  "psubh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
402  MMI_LDC1(%[ftmp3], $29, 0x00)
403  "dmtc1 %[tmp5], %[ftmp7] \n\t"
404  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
405  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
406  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
407  "psubh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
408  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
409  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
410  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
411  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
412  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
413  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
414  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
415  "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
416  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
417  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
418  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
419  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
420  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
421  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
422  "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
423  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
424  MMI_SDC1(%[ftmp3], $29, 0x00)
425  "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
426  MMI_SDC1(%[ftmp0], $29, 0x10)
427  "dmfc1 %[tmp1], %[ftmp2] \n\t"
428  "xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
429  MMI_SDC1(%[ftmp2], %[block], 0x00)
430  MMI_SDC1(%[ftmp2], %[block], 0x08)
431  MMI_SDC1(%[ftmp2], %[block], 0x10)
432  MMI_SDC1(%[ftmp2], %[block], 0x18)
433  MMI_SDC1(%[ftmp2], %[block], 0x20)
434  MMI_SDC1(%[ftmp2], %[block], 0x28)
435  MMI_SDC1(%[ftmp2], %[block], 0x30)
436  MMI_SDC1(%[ftmp2], %[block], 0x38)
437  MMI_SDC1(%[ftmp2], %[block], 0x40)
438  MMI_SDC1(%[ftmp2], %[block], 0x48)
439  MMI_SDC1(%[ftmp2], %[block], 0x50)
440  MMI_SDC1(%[ftmp2], %[block], 0x58)
441  MMI_SDC1(%[ftmp2], %[block], 0x60)
442  MMI_SDC1(%[ftmp2], %[block], 0x68)
443  MMI_SDC1(%[ftmp2], %[block], 0x70)
444  MMI_SDC1(%[ftmp2], %[block], 0x78)
445  "dli %[tmp3], 0x06 \n\t"
446  "mtc1 %[tmp3], %[ftmp10] \n\t"
447  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
448  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
449  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
450  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
451  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
452  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
453  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
454  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
455  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
456  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
457  MMI_SWC1(%[ftmp3], %[dst], 0x00)
458  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
459  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
460  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
461  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
462  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
463  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
464  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
465  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
466  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
467  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
468  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
469  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
470  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
471  MMI_SWC1(%[ftmp3], %[dst], 0x00)
472  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
473  MMI_LDC1(%[ftmp5], $29, 0x00)
474  MMI_LDC1(%[ftmp4], $29, 0x10)
475  "dmtc1 %[tmp1], %[ftmp6] \n\t"
476  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
477  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
478  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
479  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
480  "psrah %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
481  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
482  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
483  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
484  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
485  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
486  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
487  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
488  MMI_SWC1(%[ftmp3], %[dst], 0x00)
489  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
490  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
491  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
492  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
493  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
494  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
495  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
496  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
497  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
498  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
499  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
500  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
501  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
502  MMI_SWC1(%[ftmp3], %[dst], 0x00)
503  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
504  "dmtc1 %[tmp4], %[ftmp1] \n\t"
505  "dmtc1 %[tmp2], %[ftmp6] \n\t"
506  MMI_LDC1(%[ftmp4], $29, 0x18)
507  "mov.d %[ftmp5], %[ftmp4] \n\t"
508  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
509  "psrah %[ftmp7], %[ftmp11], %[ftmp8] \n\t"
510  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
511  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
512  "paddh %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
513  "paddh %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
514  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
515  "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
516  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
517  "psubh %[ftmp3], %[ftmp11], %[ftmp1] \n\t"
518  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
519  "paddh %[ftmp5], %[ftmp5], %[ftmp15] \n\t"
520  "psubh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
521  "psrah %[ftmp2], %[ftmp15], %[ftmp8] \n\t"
522  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
523  "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
524  "mov.d %[ftmp2], %[ftmp4] \n\t"
525  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
526  "psrah %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
527  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
528  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
529  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
530  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
531  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
532  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
533  "mov.d %[ftmp3], %[ftmp13] \n\t"
534  "psrah %[ftmp0], %[ftmp13], %[ftmp8] \n\t"
535  "psrah %[ftmp7], %[ftmp6], %[ftmp8] \n\t"
536  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
537  "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
538  MMI_LDC1(%[ftmp6], $29, 0x08)
539  "dmtc1 %[tmp6], %[ftmp3] \n\t"
540  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
541  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
542  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
543  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
544  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
545  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
546  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
547  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
548  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
549  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
550  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
551  "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
552  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
553  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
554  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
555  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
556  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
557  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
558  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
559  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
560  MMI_SDC1(%[ftmp6], $29, 0x08)
561  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
562  MMI_SDC1(%[ftmp7], $29, 0x18)
563  "dmfc1 %[tmp2], %[ftmp0] \n\t"
564  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
565  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
566  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
567  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
568  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
569  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
570  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
571  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
572  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
573  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
574  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
575  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
576  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
577  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
578  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
579  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
580  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
581  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
582  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
583  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
584  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
585  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
586  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
587  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
588  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
589  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
590  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
591  MMI_LDC1(%[ftmp2], $29, 0x08)
592  MMI_LDC1(%[ftmp5], $29, 0x18)
593  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
594  "dmtc1 %[tmp2], %[ftmp1] \n\t"
595  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
596  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
597  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
598  "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
599  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
600  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
601  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
602  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
603  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
604  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
605  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
606  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
607  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
608  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
609  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
610  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
611  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
612  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
613  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
614  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
615  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
616  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
617  "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
618  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
619  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
620  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
621  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
622  PTR_ADDIU "$29, $29, 0x20 \n\t"
623 
624  /* memset(block, 0, 128) */
625  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
626  "gssqc1 %[ftmp0], %[ftmp0], 0x00(%[block]) \n\t"
627  "gssqc1 %[ftmp0], %[ftmp0], 0x10(%[block]) \n\t"
628  "gssqc1 %[ftmp0], %[ftmp0], 0x20(%[block]) \n\t"
629  "gssqc1 %[ftmp0], %[ftmp0], 0x30(%[block]) \n\t"
630  "gssqc1 %[ftmp0], %[ftmp0], 0x40(%[block]) \n\t"
631  "gssqc1 %[ftmp0], %[ftmp0], 0x50(%[block]) \n\t"
632  "gssqc1 %[ftmp0], %[ftmp0], 0x60(%[block]) \n\t"
633  "gssqc1 %[ftmp0], %[ftmp0], 0x70(%[block]) \n\t"
634  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
635  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
636  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
637  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
638  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
639  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
640  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
641  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
642  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
643  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
644  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
645  [tmp6]"=&r"(tmp[6]),
646  RESTRICT_ASM_LOW32
647  RESTRICT_ASM_ADDRT
648  [addr0]"=&r"(addr[0])
649  : [dst]"r"(dst), [block]"r"(block),
650  [stride]"r"((mips_reg)stride)
651  : "$29","memory"
652  );
653 
654 }
655 
657 {
658  int dc = (block[0] + 32) >> 6;
659  double ftmp[6];
660  DECLARE_VAR_LOW32;
661 
662  block[0] = 0;
663 
664  __asm__ volatile (
665  "mtc1 %[dc], %[ftmp5] \n\t"
666  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
667  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
668  MMI_ULWC1(%[ftmp1], %[dst0], 0x00)
669  MMI_ULWC1(%[ftmp2], %[dst1], 0x00)
670  MMI_ULWC1(%[ftmp3], %[dst2], 0x00)
671  MMI_ULWC1(%[ftmp4], %[dst3], 0x00)
672  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
673  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
674  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
675  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
676  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
677  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
678  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
679  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
680  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
681  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
682  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
683  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
684  MMI_SWC1(%[ftmp1], %[dst0], 0x00)
685  MMI_SWC1(%[ftmp2], %[dst1], 0x00)
686  MMI_SWC1(%[ftmp3], %[dst2], 0x00)
687  MMI_SWC1(%[ftmp4], %[dst3], 0x00)
688  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
689  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
690  [ftmp4]"=&f"(ftmp[4]),
691  RESTRICT_ASM_LOW32
692  [ftmp5]"=&f"(ftmp[5])
693  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
694  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
695  [dc]"r"(dc)
696  : "memory"
697  );
698 }
699 
701 {
702  int dc = (block[0] + 32) >> 6;
703  double ftmp[10];
704  DECLARE_VAR_ALL64;
705 
706  block[0] = 0;
707 
708  __asm__ volatile (
709  "mtc1 %[dc], %[ftmp5] \n\t"
710  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
711  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
712  MMI_LDC1(%[ftmp1], %[dst0], 0x00)
713  MMI_LDC1(%[ftmp2], %[dst1], 0x00)
714  MMI_LDC1(%[ftmp3], %[dst2], 0x00)
715  MMI_LDC1(%[ftmp4], %[dst3], 0x00)
716  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
717  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
718  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
719  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
720  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
721  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
722  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
723  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
724  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
725  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
726  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
727  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
728  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
729  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
730  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
731  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
732  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
733  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
734  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
735  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
736  MMI_SDC1(%[ftmp1], %[dst0], 0x00)
737  MMI_SDC1(%[ftmp2], %[dst1], 0x00)
738  MMI_SDC1(%[ftmp3], %[dst2], 0x00)
739  MMI_SDC1(%[ftmp4], %[dst3], 0x00)
740 
741  MMI_LDC1(%[ftmp1], %[dst4], 0x00)
742  MMI_LDC1(%[ftmp2], %[dst5], 0x00)
743  MMI_LDC1(%[ftmp3], %[dst6], 0x00)
744  MMI_LDC1(%[ftmp4], %[dst7], 0x00)
745  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
746  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
747  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
748  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
749  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
750  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
751  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
752  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
753  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
754  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
755  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
756  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
757  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
758  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
759  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
760  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
761  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
762  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
763  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
764  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
765  MMI_SDC1(%[ftmp1], %[dst4], 0x00)
766  MMI_SDC1(%[ftmp2], %[dst5], 0x00)
767  MMI_SDC1(%[ftmp3], %[dst6], 0x00)
768  MMI_SDC1(%[ftmp4], %[dst7], 0x00)
769  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
770  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
771  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
772  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
773  [ftmp8]"=&f"(ftmp[8]),
774  RESTRICT_ASM_ALL64
775  [ftmp9]"=&f"(ftmp[9])
776  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
777  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
778  [dst4]"r"(dst+4*stride), [dst5]"r"(dst+5*stride),
779  [dst6]"r"(dst+6*stride), [dst7]"r"(dst+7*stride),
780  [dc]"r"(dc)
781  : "memory"
782  );
783 }
784 
785 void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
786  int16_t *block, int stride, const uint8_t nnzc[15*8])
787 {
788  int i;
789  for(i=0; i<16; i++){
790  int nnz = nnzc[ scan8[i] ];
791  if(nnz){
792  if(nnz==1 && ((int16_t*)block)[i*16])
793  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
794  stride);
795  else
796  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
797  stride);
798  }
799  }
800 }
801 
802 void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
803  int16_t *block, int stride, const uint8_t nnzc[15*8])
804 {
805  int i;
806  for(i=0; i<16; i++){
807  if(nnzc[ scan8[i] ])
808  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
809  else if(((int16_t*)block)[i*16])
810  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
811  stride);
812  }
813 }
814 
815 void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
816  int16_t *block, int stride, const uint8_t nnzc[15*8])
817 {
818  int i;
819  for(i=0; i<16; i+=4){
820  int nnz = nnzc[ scan8[i] ];
821  if(nnz){
822  if(nnz==1 && ((int16_t*)block)[i*16])
823  ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
824  block + i*16, stride);
825  else
826  ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
827  stride);
828  }
829  }
830 }
831 
832 void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
833  int16_t *block, int stride, const uint8_t nnzc[15*8])
834 {
835  int i, j;
836  for(j=1; j<3; j++){
837  for(i=j*16; i<j*16+4; i++){
838  if(nnzc[ scan8[i] ])
839  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
840  block + i*16, stride);
841  else if(((int16_t*)block)[i*16])
842  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
843  block + i*16, stride);
844  }
845  }
846 }
847 
848 void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
849  int16_t *block, int stride, const uint8_t nnzc[15*8])
850 {
851  int i, j;
852 
853  for(j=1; j<3; j++){
854  for(i=j*16; i<j*16+4; i++){
855  if(nnzc[ scan8[i] ])
856  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
857  block + i*16, stride);
858  else if(((int16_t*)block)[i*16])
859  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
860  block + i*16, stride);
861  }
862  }
863 
864  for(j=1; j<3; j++){
865  for(i=j*16+4; i<j*16+8; i++){
866  if(nnzc[ scan8[i+4] ])
867  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
868  block + i*16, stride);
869  else if(((int16_t*)block)[i*16])
870  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
871  block + i*16, stride);
872  }
873  }
874 }
875 
877  int qmul)
878 {
879  double ftmp[10];
880  uint64_t tmp[2];
881  DECLARE_VAR_ALL64;
882 
883  __asm__ volatile (
884  ".set noreorder \n\t"
885  "dli %[tmp0], 0x08 \n\t"
886  MMI_LDC1(%[ftmp3], %[input], 0x18)
887  "mtc1 %[tmp0], %[ftmp8] \n\t"
888  MMI_LDC1(%[ftmp2], %[input], 0x10)
889  "dli %[tmp0], 0x20 \n\t"
890  MMI_LDC1(%[ftmp1], %[input], 0x08)
891  "mtc1 %[tmp0], %[ftmp9] \n\t"
892  MMI_LDC1(%[ftmp0], %[input], 0x00)
893  "mov.d %[ftmp4], %[ftmp3] \n\t"
894  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
895  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
896  "mov.d %[ftmp4], %[ftmp1] \n\t"
897  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
898  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
899  "mov.d %[ftmp4], %[ftmp3] \n\t"
900  "paddh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
901  "psubh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
902  "mov.d %[ftmp4], %[ftmp2] \n\t"
903  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
904  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
905  "mov.d %[ftmp4], %[ftmp3] \n\t"
906  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
907  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
908  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
909  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
910  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp0] \n\t"
911  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
912  "mov.d %[ftmp0], %[ftmp4] \n\t"
913  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
914  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
915  "mov.d %[ftmp1], %[ftmp0] \n\t"
916  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
917  "psubh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
918  "mov.d %[ftmp1], %[ftmp2] \n\t"
919  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
920  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
921  "mov.d %[ftmp1], %[ftmp0] \n\t"
922  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
923  "psubh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
924  "mov.d %[ftmp1], %[ftmp4] \n\t"
925  "daddi %[tmp0], %[qmul], -0x7fff \n\t"
926  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
927  "bgtz %[tmp0], 1f \n\t"
928  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
929  "ori %[tmp0], $0, 0x80 \n\t"
930  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
931  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
932  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
933  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
934  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
935  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
936  "mtc1 %[qmul], %[ftmp7] \n\t"
937  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
938  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
939  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
940  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
941  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
942  "psraw %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
943  "psraw %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
944  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
945  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
946  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
947  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
948  "dmfc1 %[tmp1], %[ftmp0] \n\t"
949  "dsrl %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
950  "mfc1 %[input], %[ftmp0] \n\t"
951  "sh %[tmp1], 0x00(%[output]) \n\t"
952  "sh %[input], 0x80(%[output]) \n\t"
953  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
954  PTR_SRL "%[input], %[input], 0x10 \n\t"
955  "sh %[tmp1], 0x20(%[output]) \n\t"
956  "sh %[input], 0xa0(%[output]) \n\t"
957  "dmfc1 %[tmp1], %[ftmp2] \n\t"
958  "dsrl %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
959  "mfc1 %[input], %[ftmp2] \n\t"
960  "sh %[tmp1], 0x40(%[output]) \n\t"
961  "sh %[input], 0xc0(%[output]) \n\t"
962  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
963  PTR_SRL "%[input], %[input], 0x10 \n\t"
964  "sh %[tmp1], 0x60(%[output]) \n\t"
965  "sh %[input], 0xe0(%[output]) \n\t"
966  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
967  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
968  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
969  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
970  "mtc1 %[qmul], %[ftmp7] \n\t"
971  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
972  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
973  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
974  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
975  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
976  "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
977  "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
978  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
979  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
980  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
981  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
982  "dmfc1 %[tmp1], %[ftmp3] \n\t"
983  "dsrl %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
984  "mfc1 %[input], %[ftmp3] \n\t"
985  "sh %[tmp1], 0x100(%[output]) \n\t"
986  "sh %[input], 0x180(%[output]) \n\t"
987  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
988  PTR_SRL "%[input], %[input], 0x10 \n\t"
989  "sh %[tmp1], 0x120(%[output]) \n\t"
990  "sh %[input], 0x1a0(%[output]) \n\t"
991  "dmfc1 %[tmp1], %[ftmp4] \n\t"
992  "dsrl %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
993  "mfc1 %[input], %[ftmp4] \n\t"
994  "sh %[tmp1], 0x140(%[output]) \n\t"
995  "sh %[input], 0x1c0(%[output]) \n\t"
996  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
997  PTR_SRL "%[input], %[input], 0x10 \n\t"
998  "sh %[tmp1], 0x160(%[output]) \n\t"
999  "j 2f \n\t"
1000  "sh %[input], 0x1e0(%[output]) \n\t"
1001  "1: \n\t"
1002  "ori %[tmp0], $0, 0x1f \n\t"
1003 #if HAVE_LOONGSON3
1004  "clz %[tmp1], %[qmul] \n\t"
1005 #elif HAVE_LOONGSON2
1006 #endif
1007  "ori %[input], $0, 0x07 \n\t"
1008  "dsubu %[tmp1], %[tmp0], %[tmp1] \n\t"
1009  "ori %[tmp0], $0, 0x80 \n\t"
1010  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
1011  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
1012  "dsubu %[tmp0], %[tmp1], %[input] \n\t"
1013  "movn %[tmp1], %[input], %[tmp0] \n\t"
1014  PTR_ADDIU "%[input], %[input], 0x01 \n\t"
1015  "andi %[tmp0], %[tmp1], 0xff \n\t"
1016  "srlv %[qmul], %[qmul], %[tmp0] \n\t"
1017  PTR_SUBU "%[input], %[input], %[tmp1] \n\t"
1018  "mtc1 %[input], %[ftmp6] \n\t"
1019  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
1020  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
1021  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
1022  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
1023  "mtc1 %[qmul], %[ftmp7] \n\t"
1024  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1025  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1026  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1027  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1028  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1029  "psraw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1030  "psraw %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1031  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1032  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1033  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1034  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1035  "dmfc1 %[tmp1], %[ftmp0] \n\t"
1036  "dsrl %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1037  "sh %[tmp1], 0x00(%[output]) \n\t"
1038  "mfc1 %[input], %[ftmp0] \n\t"
1039  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1040  "sh %[input], 0x80(%[output]) \n\t"
1041  "sh %[tmp1], 0x20(%[output]) \n\t"
1042  PTR_SRL "%[input], %[input], 0x10 \n\t"
1043  "dmfc1 %[tmp1], %[ftmp2] \n\t"
1044  "sh %[input], 0xa0(%[output]) \n\t"
1045  "dsrl %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
1046  "sh %[tmp1], 0x40(%[output]) \n\t"
1047  "mfc1 %[input], %[ftmp2] \n\t"
1048  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1049  "sh %[input], 0xc0(%[output]) \n\t"
1050  "sh %[tmp1], 0x60(%[output]) \n\t"
1051  PTR_SRL "%[input], %[input], 0x10 \n\t"
1052  "sh %[input], 0xe0(%[output]) \n\t"
1053  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
1054  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
1055  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
1056  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
1057  "mtc1 %[qmul], %[ftmp7] \n\t"
1058  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1059  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1060  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1061  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1062  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1063  "psraw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1064  "psraw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1065  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1066  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1067  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1068  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1069  "dmfc1 %[tmp1], %[ftmp3] \n\t"
1070  "dsrl %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1071  "mfc1 %[input], %[ftmp3] \n\t"
1072  "sh %[tmp1], 0x100(%[output]) \n\t"
1073  "sh %[input], 0x180(%[output]) \n\t"
1074  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1075  PTR_SRL "%[input], %[input], 0x10 \n\t"
1076  "sh %[tmp1], 0x120(%[output]) \n\t"
1077  "sh %[input], 0x1a0(%[output]) \n\t"
1078  "dmfc1 %[tmp1], %[ftmp4] \n\t"
1079  "dsrl %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1080  "mfc1 %[input], %[ftmp4] \n\t"
1081  "sh %[tmp1], 0x140(%[output]) \n\t"
1082  "sh %[input], 0x1c0(%[output]) \n\t"
1083  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1084  PTR_SRL "%[input], %[input], 0x10 \n\t"
1085  "sh %[tmp1], 0x160(%[output]) \n\t"
1086  "sh %[input], 0x1e0(%[output]) \n\t"
1087  "2: \n\t"
1088  ".set reorder \n\t"
1089  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1090  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1091  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1092  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1093  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1094  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
1095  RESTRICT_ASM_ALL64
1096  [output]"+&r"(output), [input]"+&r"(input),
1097  [qmul]"+&r"(qmul)
1098  : [ff_pw_1]"f"(ff_pw_1)
1099  : "memory"
1100  );
1101 }
1102 
1104 {
1105  int temp[8];
1106  int t[8];
1107 
1108  temp[0] = block[0] + block[16];
1109  temp[1] = block[0] - block[16];
1110  temp[2] = block[32] + block[48];
1111  temp[3] = block[32] - block[48];
1112  temp[4] = block[64] + block[80];
1113  temp[5] = block[64] - block[80];
1114  temp[6] = block[96] + block[112];
1115  temp[7] = block[96] - block[112];
1116 
1117  t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1118  t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1119  t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1120  t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1121  t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1122  t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1123  t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1124  t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1125 
1126  block[ 0]= (t[0]*qmul + 128) >> 8;
1127  block[ 32]= (t[1]*qmul + 128) >> 8;
1128  block[ 64]= (t[2]*qmul + 128) >> 8;
1129  block[ 96]= (t[3]*qmul + 128) >> 8;
1130  block[ 16]= (t[4]*qmul + 128) >> 8;
1131  block[ 48]= (t[5]*qmul + 128) >> 8;
1132  block[ 80]= (t[6]*qmul + 128) >> 8;
1133  block[112]= (t[7]*qmul + 128) >> 8;
1134 }
1135 
1137 {
1138  int a,b,c,d;
1139 
1140  d = block[0] - block[16];
1141  a = block[0] + block[16];
1142  b = block[32] - block[48];
1143  c = block[32] + block[48];
1144  block[0] = ((a+c)*qmul) >> 7;
1145  block[16]= ((d+b)*qmul) >> 7;
1146  block[32]= ((a-c)*qmul) >> 7;
1147  block[48]= ((d-b)*qmul) >> 7;
1148 }
1149 
1151  int log2_denom, int weight, int offset)
1152 {
1153  int y;
1154  double ftmp[8];
1155  DECLARE_VAR_ALL64;
1156 
1157  offset <<= log2_denom;
1158 
1159  if (log2_denom)
1160  offset += 1 << (log2_denom - 1);
1161 
1162  for (y=0; y<height; y++, block+=stride) {
1163  __asm__ volatile (
1164  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1165  MMI_LDC1(%[ftmp1], %[block0], 0x00)
1166  MMI_LDC1(%[ftmp2], %[block1], 0x00)
1167  "mtc1 %[weight], %[ftmp3] \n\t"
1168  "mtc1 %[offset], %[ftmp4] \n\t"
1169  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1170  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1171  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1172  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
1173  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
1174  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1175  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1176  "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1177  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1178  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1179  "pmullh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1180  "paddsh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1181  "paddsh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1182  "paddsh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1183  "paddsh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1184  "psrah %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1185  "psrah %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1186  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1187  "psrah %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1188  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1189  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1190  MMI_SDC1(%[ftmp1], %[block0], 0x00)
1191  MMI_SDC1(%[ftmp2], %[block1], 0x00)
1192  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1193  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1194  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1195  [ftmp6]"=&f"(ftmp[6]),
1196  RESTRICT_ASM_ALL64
1197  [ftmp7]"=&f"(ftmp[7])
1198  : [block0]"r"(block), [block1]"r"(block+8),
1199  [weight]"r"(weight), [offset]"r"(offset),
1200  [log2_denom]"r"(log2_denom)
1201  : "memory"
1202  );
1203  }
1204 }
1205 
1207  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1208  int offset)
1209 {
1210  int y;
1211  double ftmp[9];
1212  DECLARE_VAR_ALL64;
1213 
1214  offset = ((offset + 1) | 1) << log2_denom;
1215 
1216  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1217  __asm__ volatile (
1218  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1219  MMI_LDC1(%[ftmp1], %[src0], 0x00)
1220  MMI_LDC1(%[ftmp2], %[dst0], 0x00)
1221  "mtc1 %[weights], %[ftmp3] \n\t"
1222  "mtc1 %[weightd], %[ftmp4] \n\t"
1223  "mtc1 %[offset], %[ftmp5] \n\t"
1224  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1225  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1226  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1227  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1228  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1229  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1230  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1231  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1232  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1233  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1234  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1235  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1236  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1237  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1238  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1239  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1240  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1241  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1242  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1243  MMI_SDC1(%[ftmp1], %[dst0], 0x00)
1244  MMI_LDC1(%[ftmp1], %[src1], 0x00)
1245  MMI_LDC1(%[ftmp2], %[dst1], 0x00)
1246  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1247  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1248  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1249  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1250  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1251  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1252  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1253  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1254  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1255  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1256  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1257  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1258  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1259  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1260  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1261  MMI_SDC1(%[ftmp1], %[dst1], 0x00)
1262  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1263  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1264  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1265  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1266  RESTRICT_ASM_ALL64
1267  [ftmp8]"=&f"(ftmp[8])
1268  : [dst0]"r"(dst), [dst1]"r"(dst+8),
1269  [src0]"r"(src), [src1]"r"(src+8),
1270  [weights]"r"(weights), [weightd]"r"(weightd),
1271  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1272  : "memory"
1273  );
1274  }
1275 }
1276 
1278  int log2_denom, int weight, int offset)
1279 {
1280  int y;
1281  double ftmp[6];
1282  DECLARE_VAR_ALL64;
1283 
1284  offset <<= log2_denom;
1285 
1286  if (log2_denom)
1287  offset += 1 << (log2_denom - 1);
1288 
1289  for (y=0; y<height; y++, block+=stride) {
1290  __asm__ volatile (
1291  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1292  MMI_LDC1(%[ftmp1], %[block], 0x00)
1293  "mtc1 %[weight], %[ftmp2] \n\t"
1294  "mtc1 %[offset], %[ftmp3] \n\t"
1295  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1296  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1297  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1298  "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
1299  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1300  "pmullh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1301  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1302  "paddsh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1303  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1304  "psrah %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1305  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1306  "packushb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1307  MMI_SDC1(%[ftmp1], %[block], 0x00)
1308  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1309  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1310  [ftmp4]"=&f"(ftmp[4]),
1311  RESTRICT_ASM_ALL64
1312  [ftmp5]"=&f"(ftmp[5])
1313  : [block]"r"(block), [weight]"r"(weight),
1314  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1315  : "memory"
1316  );
1317  }
1318 }
1319 
1321  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1322  int offset)
1323 {
1324  int y;
1325  double ftmp[9];
1326  DECLARE_VAR_ALL64;
1327 
1328  offset = ((offset + 1) | 1) << log2_denom;
1329 
1330  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1331  __asm__ volatile (
1332  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1333  MMI_LDC1(%[ftmp1], %[src], 0x00)
1334  MMI_LDC1(%[ftmp2], %[dst], 0x00)
1335  "mtc1 %[weights], %[ftmp3] \n\t"
1336  "mtc1 %[weightd], %[ftmp4] \n\t"
1337  "mtc1 %[offset], %[ftmp5] \n\t"
1338  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1339  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1340  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1341  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1342  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1343  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1344  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1345  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1346  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1347  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1348  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1349  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1350  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1351  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1352  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1353  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1354  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1355  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1356  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1357  MMI_SDC1(%[ftmp1], %[dst], 0x00)
1358  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1359  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1360  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1361  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1362  RESTRICT_ASM_ALL64
1363  [ftmp8]"=&f"(ftmp[8])
1364  : [dst]"r"(dst), [src]"r"(src),
1365  [weights]"r"(weights), [weightd]"r"(weightd),
1366  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1367  : "memory"
1368  );
1369  }
1370 }
1371 
1373  int log2_denom, int weight, int offset)
1374 {
1375  int y;
1376  double ftmp[5];
1377  DECLARE_VAR_LOW32;
1378 
1379  offset <<= log2_denom;
1380 
1381  if (log2_denom)
1382  offset += 1 << (log2_denom - 1);
1383 
1384  for (y=0; y<height; y++, block+=stride) {
1385  __asm__ volatile (
1386  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1387  MMI_ULWC1(%[ftmp1], %[block], 0x00)
1388  "mtc1 %[weight], %[ftmp2] \n\t"
1389  "mtc1 %[offset], %[ftmp3] \n\t"
1390  "mtc1 %[log2_denom], %[ftmp4] \n\t"
1391  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1392  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1393  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1394  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1395  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1396  "psrah %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1397  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1398  MMI_SWC1(%[ftmp1], %[block], 0x00)
1399  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1400  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1401  RESTRICT_ASM_LOW32
1402  [ftmp4]"=&f"(ftmp[4])
1403  : [block]"r"(block), [weight]"r"(weight),
1404  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1405  : "memory"
1406  );
1407  }
1408 }
1409 
1411  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1412  int offset)
1413 {
1414  int y;
1415  double ftmp[7];
1416  DECLARE_VAR_LOW32;
1417 
1418  offset = ((offset + 1) | 1) << log2_denom;
1419 
1420  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1421  __asm__ volatile (
1422  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1423  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1424  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
1425  "mtc1 %[weight], %[ftmp3] \n\t"
1426  "mtc1 %[weightd], %[ftmp4] \n\t"
1427  "mtc1 %[offset], %[ftmp5] \n\t"
1428  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1429  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1430  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1431  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1432  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1433  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1434  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1435  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1436  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1437  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1438  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1439  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1440  MMI_SWC1(%[ftmp1], %[dst], 0x00)
1441  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1442  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1443  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1444  RESTRICT_ASM_LOW32
1445  [ftmp6]"=&f"(ftmp[6])
1446  : [dst]"r"(dst), [src]"r"(src),
1447  [weight]"r"(weights), [weightd]"r"(weightd),
1448  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1449  : "memory"
1450  );
1451  }
1452 }
1453 
1454 void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1455  int8_t *tc0)
1456 {
1457  double ftmp[12];
1458  mips_reg addr[2];
1459  DECLARE_VAR_LOW32;
1460  DECLARE_VAR_ALL64;
1461  DECLARE_VAR_ADDRT;
1462 
1463  __asm__ volatile (
1464  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1465  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1466  PTR_ADDU "%[addr1], %[stride], %[addr0] \n\t"
1467  "addi %[alpha], %[alpha], -0x01 \n\t"
1468  PTR_SUBU "%[addr1], $0, %[addr1] \n\t"
1469  "addi %[beta], %[beta], -0x01 \n\t"
1470  PTR_ADDU "%[addr1], %[addr1], %[pix] \n\t"
1471  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1472  MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00)
1473  MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1474  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1475  "mtc1 %[alpha], %[ftmp5] \n\t"
1476  "mtc1 %[beta], %[ftmp6] \n\t"
1477  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1478  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1479  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1480  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1481  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1482  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1483  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1484  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1485  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1486  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1487  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1488  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1489  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1490  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1491  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1492  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1493  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1494  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1495  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1496  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
1497  MMI_ULWC1(%[ftmp5], %[tc0], 0x00)
1498  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1499  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp5] \n\t"
1500  "pcmpgtb %[ftmp5], %[ftmp9], %[ftmp4] \n\t"
1501  MMI_LDC1(%[ftmp4], %[addr1], 0x00)
1502  "and %[ftmp10], %[ftmp5], %[ftmp8] \n\t"
1503  "psubusb %[ftmp8], %[ftmp4], %[ftmp2] \n\t"
1504  "psubusb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1505  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1506  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1507  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1508  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1509  "and %[ftmp5], %[ftmp10], %[ftmp9] \n\t"
1510  "psubb %[ftmp8], %[ftmp5], %[ftmp7] \n\t"
1511  "and %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1512  "pavgb %[ftmp5], %[ftmp2], %[ftmp3] \n\t"
1513  MMI_LDC1(%[ftmp11], %[addr1], 0x00)
1514  "pavgb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1515  "xor %[ftmp5], %[ftmp5], %[ftmp11] \n\t"
1516  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1517  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1518  "psubusb %[ftmp5], %[ftmp1], %[ftmp7] \n\t"
1519  "paddusb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1520  "pmaxub %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1521  "pminub %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1522  MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00)
1523  MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00)
1524  "psubusb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
1525  "psubusb %[ftmp7], %[ftmp3], %[ftmp5] \n\t"
1526  "psubusb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1527  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1528  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1529  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1530  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1531  "and %[ftmp6], %[ftmp9], %[ftmp7] \n\t"
1532  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1533  "pavgb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1534  MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00)
1535  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1536  "xor %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1537  "and %[ftmp7], %[ftmp7], %[ff_pb_1] \n\t"
1538  "psubusb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1539  "psubusb %[ftmp7], %[ftmp4], %[ftmp6] \n\t"
1540  "paddusb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1541  "pmaxub %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1542  "pminub %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1543  MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1544  "xor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1545  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1546  "and %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1547  "xor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1548  "xor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1549  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1550  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1551  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1552  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1553  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1554  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1555  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1556  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1557  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1558  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1559  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1560  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1561  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1562  MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1563  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1564  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1565  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1566  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1567  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1568  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1569  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1570  RESTRICT_ASM_LOW32
1571  RESTRICT_ASM_ALL64
1572  RESTRICT_ASM_ADDRT
1573  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
1574  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1575  [alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta),
1576  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
1577  [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
1578  : "memory"
1579  );
1580 }
1581 
1583  int beta)
1584 {
1585  DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]);
1586  double ftmp[16];
1587  uint64_t tmp[1];
1588  mips_reg addr[3];
1589  DECLARE_VAR_ALL64;
1590  DECLARE_VAR_ADDRT;
1591 
1592  __asm__ volatile (
1593  "ori %[tmp0], $0, 0x01 \n\t"
1594  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1595  "mtc1 %[tmp0], %[ftmp9] \n\t"
1596  PTR_SLL "%[addr0], %[stride], 0x02 \n\t"
1597  PTR_ADDU "%[addr2], %[stride], %[stride] \n\t"
1598  PTR_ADDIU "%[alpha], %[alpha], -0x01 \n\t"
1599  PTR_SLL "%[ftmp11], %[ftmp9], %[ftmp9] \n\t"
1600  "bltz %[alpha], 1f \n\t"
1601  PTR_ADDU "%[addr1], %[addr2], %[stride] \n\t"
1602  PTR_ADDIU "%[beta], %[beta], -0x01 \n\t"
1603  "bltz %[beta], 1f \n\t"
1604  PTR_SUBU "%[addr0], $0, %[addr0] \n\t"
1605  PTR_ADDU "%[addr0], %[addr0], %[pix] \n\t"
1606  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1607  MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00)
1608  MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00)
1609  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1610  "mtc1 %[alpha], %[ftmp5] \n\t"
1611  "mtc1 %[beta], %[ftmp6] \n\t"
1612  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1613  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1614  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1615  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1616  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1617  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1618  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1619  MMI_SDC1(%[ftmp5], %[stack], 0x10)
1620  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1621  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1622  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1623  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1624  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1625  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1626  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1627  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1628  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1629  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1630  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1631  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1632  MMI_LDC1(%[ftmp5], %[stack], 0x10)
1633  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1634  "ldc1 %[ftmp10], %[ff_pb_1] \n\t"
1635  MMI_SDC1(%[ftmp8], %[stack], 0x20)
1636  "pavgb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1637  "psubusb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1638  "pavgb %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
1639  "psubusb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1640  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1641  "psubusb %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1642  MMI_LDC1(%[ftmp15], %[stack], 0x20)
1643  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1644  "and %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
1645  MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00)
1646  "psubusb %[ftmp8], %[ftmp15], %[ftmp2] \n\t"
1647  "psubusb %[ftmp5], %[ftmp2], %[ftmp15] \n\t"
1648  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1649  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1650  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1651  "and %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1652  MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00)
1653  MMI_SDC1(%[ftmp5], %[stack], 0x30)
1654  "psubusb %[ftmp8], %[ftmp14], %[ftmp3] \n\t"
1655  "psubusb %[ftmp5], %[ftmp3], %[ftmp14] \n\t"
1656  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1657  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1658  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1659  "and %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1660  MMI_SDC1(%[ftmp5], %[stack], 0x40)
1661  "pavgb %[ftmp5], %[ftmp15], %[ftmp1] \n\t"
1662  "pavgb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1663  "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1664  MMI_SDC1(%[ftmp6], %[stack], 0x10)
1665  "paddb %[ftmp7], %[ftmp15], %[ftmp1] \n\t"
1666  "paddb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1667  "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1668  "mov.d %[ftmp8], %[ftmp7] \n\t"
1669  MMI_SDC1(%[ftmp7], %[stack], 0x00)
1670  "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1671  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1672  "xor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1673  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1674  "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1675  "pavgb %[ftmp6], %[ftmp15], %[ftmp4] \n\t"
1676  "psubb %[ftmp7], %[ftmp15], %[ftmp4] \n\t"
1677  "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1678  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1679  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1680  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1681  MMI_LDC1(%[ftmp13], %[stack], 0x10)
1682  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1683  "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1684  "pavgb %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1685  "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1686  "xor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1687  "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1688  "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1689  "xor %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
1690  "pavgb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1691  "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1692  "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1693  MMI_LDC1(%[ftmp13], %[stack], 0x30)
1694  "pavgb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1695  MMI_LDC1(%[ftmp12], %[stack], 0x20)
1696  "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1697  "xor %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
1698  "and %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1699  "and %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1700  "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1701  "xor %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1702  MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00)
1703  MMI_LDC1(%[ftmp6], %[addr0], 0x00)
1704  "paddb %[ftmp7], %[ftmp15], %[ftmp6] \n\t"
1705  "pavgb %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1706  MMI_LDC1(%[ftmp12], %[stack], 0x00)
1707  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1708  "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1709  "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1710  "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1711  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1712  "xor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1713  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1714  MMI_LDC1(%[ftmp12], %[stack], 0x30)
1715  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1716  "xor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1717  "xor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1718  "and %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1719  "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1720  "xor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1721  "xor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1722  MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00)
1723  MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00)
1724  "pavgb %[ftmp5], %[ftmp14], %[ftmp4] \n\t"
1725  "pavgb %[ftmp6], %[ftmp3], %[ftmp2] \n\t"
1726  "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1727  MMI_SDC1(%[ftmp6], %[stack], 0x10)
1728  "paddb %[ftmp7], %[ftmp14], %[ftmp4] \n\t"
1729  "paddb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1730  "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1731  "mov.d %[ftmp8], %[ftmp7] \n\t"
1732  MMI_SDC1(%[ftmp7], %[stack], 0x00)
1733  "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1734  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1735  "xor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1736  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1737  "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1738  "pavgb %[ftmp6], %[ftmp14], %[ftmp1] \n\t"
1739  "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1740  "psubb %[ftmp7], %[ftmp14], %[ftmp1] \n\t"
1741  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1742  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1743  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1744  MMI_LDC1(%[ftmp12], %[stack], 0x10)
1745  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1746  "pavgb %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1747  "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1748  "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1749  "xor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1750  "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1751  "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1752  "xor %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
1753  "pavgb %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
1754  "and %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1755  MMI_LDC1(%[ftmp12], %[stack], 0x40)
1756  "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1757  MMI_LDC1(%[ftmp13], %[stack], 0x20)
1758  "pavgb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1759  "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1760  "xor %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1761  "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1762  "and %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
1763  "xor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1764  "xor %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1765  MMI_SDC1(%[ftmp6], %[pix], 0x00)
1766  MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00)
1767  "paddb %[ftmp7], %[ftmp14], %[ftmp6] \n\t"
1768  "pavgb %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1769  MMI_LDC1(%[ftmp12], %[stack], 0x00)
1770  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1771  "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1772  "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1773  "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1774  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1775  "xor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1776  "and %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1777  MMI_LDC1(%[ftmp12], %[stack], 0x40)
1778  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1779  "xor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1780  "xor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1781  "and %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1782  "and %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1783  "xor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1784  "xor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1785  MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1786  MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00)
1787  "1: \n\t"
1788  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1789  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1790  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1791  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1792  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1793  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1794  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
1795  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
1796  [tmp0]"=&r"(tmp[0]),
1797  RESTRICT_ASM_ALL64
1798  RESTRICT_ASM_ADDRT
1799  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
1800  [addr2]"=&r"(addr[2]),
1801  [alpha]"+&r"(alpha), [beta]"+&r"(beta)
1802  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1803  [stack]"r"(stack), [ff_pb_1]"m"(ff_pb_1)
1804  : "memory"
1805  );
1806 }
1807 
1808 void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1809  int beta, int8_t *tc0)
1810 {
1811  double ftmp[9];
1812  mips_reg addr[1];
1813  DECLARE_VAR_LOW32;
1814  DECLARE_VAR_ALL64;
1815  DECLARE_VAR_ADDRT;
1816 
1817  __asm__ volatile (
1818  "addi %[alpha], %[alpha], -0x01 \n\t"
1819  "addi %[beta], %[beta], -0x01 \n\t"
1820  "or %[addr0], $0, %[pix] \n\t"
1821  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1822  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1823  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1824  MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1825  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1826  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1827 
1828  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1829  "mtc1 %[alpha], %[ftmp5] \n\t"
1830  "mtc1 %[beta], %[ftmp6] \n\t"
1831  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1832  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1833  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1834  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1835  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1836  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1837  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1838  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1839  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1840  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1841  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1842  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1843  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1844  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1845  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1846  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1847  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1848  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1849  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1850  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1851  MMI_ULWC1(%[ftmp7], %[tc0], 0x00)
1852  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1853  "and %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1854  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1855  "xor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1856  "xor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1857  "and %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1858  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1859  "xor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1860  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1861  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1862  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1863  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1864  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1865  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1866  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1867  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1868  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1869  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1870  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1871  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1872 
1873  MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1874  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1875  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1876  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1877  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1878  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1879  [ftmp8]"=&f"(ftmp[8]),
1880  RESTRICT_ASM_LOW32
1881  RESTRICT_ASM_ALL64
1882  RESTRICT_ASM_ADDRT
1883  [addr0]"=&r"(addr[0])
1884  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1885  [alpha]"r"(alpha), [beta]"r"(beta),
1886  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
1887  [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
1888  : "memory"
1889  );
1890 }
1891 
1893  int beta)
1894 {
1895  double ftmp[9];
1896  mips_reg addr[1];
1897  DECLARE_VAR_ALL64;
1898  DECLARE_VAR_ADDRT;
1899 
1900  __asm__ volatile (
1901  "addi %[alpha], %[alpha], -0x01 \n\t"
1902  "addi %[beta], %[beta], -0x01 \n\t"
1903  "or %[addr0], $0, %[pix] \n\t"
1904  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1905  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1906  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1907  MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1908  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1909  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1910 
1911  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1912  "mtc1 %[alpha], %[ftmp5] \n\t"
1913  "mtc1 %[beta], %[ftmp6] \n\t"
1914  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1915  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1916  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1917  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1918  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1919  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1920  "or %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1921  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1922  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1923  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1924  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1925  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1926  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1927  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1928  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1929  "or %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1930  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1931  "or %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1932  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1933  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1934  "mov.d %[ftmp6], %[ftmp2] \n\t"
1935  "mov.d %[ftmp7], %[ftmp3] \n\t"
1936  "xor %[ftmp5], %[ftmp2], %[ftmp4] \n\t"
1937  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1938  "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1939  "psubusb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1940  "pavgb %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1941  "xor %[ftmp5], %[ftmp3], %[ftmp1] \n\t"
1942  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1943  "pavgb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1944  "psubusb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1945  "pavgb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1946  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1947  "psubb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1948  "and %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1949  "and %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1950  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1951  "paddb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1952 
1953  MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1954  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1955  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1956  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1957  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1958  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1959  [ftmp8]"=&f"(ftmp[8]),
1960  RESTRICT_ASM_ALL64
1961  RESTRICT_ASM_ADDRT
1962  [addr0]"=&r"(addr[0])
1963  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1964  [alpha]"r"(alpha), [beta]"r"(beta),
1965  [ff_pb_1]"f"(ff_pb_1)
1966  : "memory"
1967  );
1968 }
1969 
1970 void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
1971  int8_t *tc0)
1972 {
1973  double ftmp[11];
1974  mips_reg addr[6];
1975  DECLARE_VAR_LOW32;
1976 
1977  __asm__ volatile (
1978  "addi %[alpha], %[alpha], -0x01 \n\t"
1979  "addi %[beta], %[beta], -0x01 \n\t"
1980  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1981  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
1982  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
1983  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
1984  "or %[addr5], $0, %[pix] \n\t"
1985  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
1986  MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
1987  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
1988  MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
1989  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
1990  MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
1991  MMI_ULWC1(%[ftmp3], %[pix], 0x00)
1992  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1993  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1994  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
1995  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
1996  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1997  MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
1998  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
1999  MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2000  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
2001  MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2002  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
2003  MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2004  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2005  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
2006  "mov.d %[ftmp6], %[ftmp4] \n\t"
2007  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2008  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
2009  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
2010  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
2011  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2012  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2013  "mov.d %[ftmp9], %[ftmp0] \n\t"
2014  "mov.d %[ftmp10], %[ftmp3] \n\t"
2015 
2016  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2017  "mtc1 %[alpha], %[ftmp4] \n\t"
2018  "mtc1 %[beta], %[ftmp5] \n\t"
2019  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2020  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2021  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2022  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2023  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2024  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2025  "or %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2026  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2027  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2028  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2029  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2030  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2031  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2032  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2033  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2034  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2035  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2036  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2037  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2038  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2039  MMI_ULWC1(%[ftmp6], %[tc0], 0x00)
2040  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2041  "and %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2042  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2043  "xor %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
2044  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2045  "and %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
2046  "pavgb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
2047  "xor %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
2048  "pavgb %[ftmp3], %[ftmp3], %[ff_pb_3] \n\t"
2049  "pavgb %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
2050  "pavgb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2051  "paddusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2052  "psubusb %[ftmp6], %[ff_pb_A1], %[ftmp3] \n\t"
2053  "psubusb %[ftmp3], %[ftmp3], %[ff_pb_A1] \n\t"
2054  "pminub %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
2055  "pminub %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2056  "psubusb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2057  "psubusb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2058  "paddusb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2059  "paddusb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2060 
2061  "punpckhwd %[ftmp4], %[ftmp9], %[ftmp9] \n\t"
2062  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2063  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2064  "punpcklbh %[ftmp0], %[ftmp9], %[ftmp1] \n\t"
2065  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
2066  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2067  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2068  MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2069  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2070  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2071  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2072  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2073  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2074  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2075  "punpckhwd %[ftmp3], %[ftmp10], %[ftmp10] \n\t"
2076  MMI_USWC1(%[ftmp0], %[pix], 0x00)
2077  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2078  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2079  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2080  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2081  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2082  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2083  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2084  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2085  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2086  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2087  MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2088  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2089  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2090  MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2091  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2092  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2093  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2094  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2095  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2096  [ftmp10]"=&f"(ftmp[10]),
2097  RESTRICT_ASM_LOW32
2098  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2099  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2100  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2101  [pix]"+&r"(pix)
2102  : [alpha]"r"(alpha), [beta]"r"(beta),
2103  [stride]"r"((mips_reg)stride), [tc0]"r"(tc0),
2104  [ff_pb_1]"f"(ff_pb_1), [ff_pb_3]"f"(ff_pb_3),
2105  [ff_pb_A1]"f"(ff_pb_A1)
2106  : "memory"
2107  );
2108 }
2109 
2111  int beta)
2112 {
2113  double ftmp[11];
2114  mips_reg addr[6];
2115  DECLARE_VAR_LOW32;
2116 
2117  __asm__ volatile (
2118  "addi %[alpha], %[alpha], -0x01 \n\t"
2119  "addi %[beta], %[beta], -0x01 \n\t"
2120  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2121  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
2122  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
2123  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
2124  "or %[addr5], $0, %[pix] \n\t"
2125  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
2126  MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
2127  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2128  MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
2129  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2130  MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
2131  MMI_ULWC1(%[ftmp3], %[pix], 0x00)
2132  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2133  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2134  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2135  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
2136  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2137  MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
2138  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
2139  MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2140  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
2141  MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2142  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
2143  MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2144  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2145  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
2146  "mov.d %[ftmp6], %[ftmp4] \n\t"
2147  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2148  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
2149  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
2150  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
2151  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2152  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2153 
2154  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2155  "mtc1 %[alpha], %[ftmp4] \n\t"
2156  "mtc1 %[beta], %[ftmp5] \n\t"
2157  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2158  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2159  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2160  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2161  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2162  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2163  "or %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2164  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2165  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2166  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2167  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2168  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2169  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2170  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2171  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2172  "or %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2173  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2174  "or %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2175  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2176  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2177  "mov.d %[ftmp5], %[ftmp1] \n\t"
2178  "mov.d %[ftmp6], %[ftmp2] \n\t"
2179  "xor %[ftmp4], %[ftmp1], %[ftmp3] \n\t"
2180  "and %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2181  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2182  "psubusb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
2183  "pavgb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2184  "xor %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2185  "and %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2186  "pavgb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2187  "psubusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2188  "pavgb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2189  "psubb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2190  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2191  "and %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2192  "and %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2193  "paddb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2194  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2195 
2196  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2197  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2198  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2199  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2200  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2201  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2202  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2203  MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2204  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2205  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2206  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2207  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2208  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2209  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2210  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2211  MMI_USWC1(%[ftmp0], %[pix], 0x00)
2212  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2213  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2214  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2215  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2216  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2217  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2218  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2219  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2220  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2221  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2222  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2223  MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2224  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2225  MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2226  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2227  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2228  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2229  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2230  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2231  [ftmp10]"=&f"(ftmp[10]),
2232  RESTRICT_ASM_LOW32
2233  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2234  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2235  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2236  [pix]"+&r"(pix)
2237  : [alpha]"r"(alpha), [beta]"r"(beta),
2238  [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1)
2239  : "memory"
2240  );
2241 }
2242 
2243 void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
2244  int8_t *tc0)
2245 {
2246  if ((tc0[0] & tc0[1]) >= 0)
2247  ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
2248  if ((tc0[2] & tc0[3]) >= 0)
2249  ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
2250 }
2251 
2253  int beta)
2254 {
2255  deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
2256  deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
2257 }
2258 
2259 void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
2260  int8_t *tc0)
2261 {
2262  DECLARE_ALIGNED(8, const uint64_t, stack[0x0d]);
2263  double ftmp[9];
2264  mips_reg addr[8];
2265  DECLARE_VAR_LOW32;
2266  DECLARE_VAR_ALL64;
2267 
2268  __asm__ volatile (
2269  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2270  PTR_ADDI "%[addr1], %[pix], -0x4 \n\t"
2271  PTR_ADDU "%[addr2], %[stride], %[addr0] \n\t"
2272  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2273  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2274  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2275  MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2276  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2277  MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2278  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2279  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2280  MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2281  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2282  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2283  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2284  MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2285  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2286  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2287  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2288  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2289  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2290  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2291  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2292  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2293  MMI_SDC1(%[ftmp1], %[stack], 0x10)
2294  MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2295  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2296  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2297  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2298  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2299  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2300  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2301  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2302  MMI_LDC1(%[ftmp8], %[stack], 0x10)
2303  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2304  MMI_SDC1(%[ftmp0], %[stack], 0x00)
2305  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2306  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2307  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2308  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2309  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2310  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2311  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2312  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2313  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2314  MMI_SDC1(%[ftmp1], %[stack], 0x10)
2315  MMI_SDC1(%[ftmp3], %[stack], 0x20)
2316  MMI_SDC1(%[ftmp7], %[stack], 0x30)
2317  MMI_SDC1(%[ftmp5], %[stack], 0x40)
2318  MMI_SDC1(%[ftmp6], %[stack], 0x50)
2319  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2320  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2321  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2322  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2323  MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2324  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2325  MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2326  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2327  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2328  MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2329  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2330  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2331  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2332  MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2333  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2334  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2335  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2336  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2337  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2338  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2339  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2340  MMI_SDC1(%[ftmp1], %[stack], 0x18)
2341  MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2342  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2343  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2344  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2345  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2346  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2347  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2348  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2349  MMI_LDC1(%[ftmp8], %[stack], 0x18)
2350  MMI_SDC1(%[ftmp0], %[stack], 0x08)
2351  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2352  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2353  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2354  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2355  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2356  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2357  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2358  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2359  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2360  MMI_SDC1(%[ftmp1], %[stack], 0x18)
2361  MMI_SDC1(%[ftmp3], %[stack], 0x28)
2362  MMI_SDC1(%[ftmp7], %[stack], 0x38)
2363  MMI_SDC1(%[ftmp5], %[stack], 0x48)
2364  MMI_SDC1(%[ftmp6], %[stack], 0x58)
2365  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2366  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2367  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2368  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2369  [ftmp8]"=&f"(ftmp[8]),
2370  RESTRICT_ASM_ALL64
2371  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2372  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2373  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2374  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2375  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2376  [stack]"r"(stack)
2377  : "memory"
2378  );
2379 
2380  ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2381 
2382  __asm__ volatile (
2383  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2384  PTR_ADDI "%[addr1], %[pix], -0x02 \n\t"
2385  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2386  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2387  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2388  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2389  MMI_LDC1(%[ftmp0], %[stack], 0x10)
2390  MMI_LDC1(%[ftmp1], %[stack], 0x20)
2391  MMI_LDC1(%[ftmp2], %[stack], 0x30)
2392  MMI_LDC1(%[ftmp3], %[stack], 0x40)
2393  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2394  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2395  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2396  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2397  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2398  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2399  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2400  MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2401  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2402  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2403  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2404  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2405  MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2406  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2407  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2408  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2409  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2410  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2411  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2412  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2413  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2414  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2415  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2416  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2417  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2418  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2419  MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2420  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2421  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2422  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2423  MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2424  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2425  MMI_LDC1(%[ftmp0], %[stack], 0x18)
2426  MMI_LDC1(%[ftmp1], %[stack], 0x28)
2427  MMI_LDC1(%[ftmp2], %[stack], 0x38)
2428  MMI_LDC1(%[ftmp3], %[stack], 0x48)
2429  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2430  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2431  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2432  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2433  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2434  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2435  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2436  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2437  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2438  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2439  MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2440  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2441  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2442  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2443  MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2444  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2445  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2446  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2447  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2448  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2449  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2450  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2451  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2452  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2453  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2454  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2455  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2456  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2457  MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2458  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2459  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2460  MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2461  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2462  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2463  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2464  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2465  [ftmp8]"=&f"(ftmp[8]),
2466  RESTRICT_ASM_LOW32
2467  RESTRICT_ASM_ALL64
2468  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2469  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2470  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2471  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2472  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2473  [stack]"r"(stack)
2474  : "memory"
2475  );
2476 }
2477 
2479  int beta)
2480 {
2481  DECLARE_ALIGNED(8, const uint64_t, ptmp[0x11]);
2482  DECLARE_ALIGNED(8, const uint64_t, pdat[0x04]);
2483  double ftmp[9];
2484  mips_reg addr[7];
2485  DECLARE_VAR_ALL64;
2486 
2487  __asm__ volatile (
2488  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2489  PTR_ADDI "%[addr1], %[pix], -0x04 \n\t"
2490  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2491  PTR_ADDU "%[addr3], %[addr0], %[addr0] \n\t"
2492  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2493  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2494  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2495  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2496  MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2497  MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2498  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2499  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2500  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2501  MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2502  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2503  MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2504  MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2505  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2506  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2507  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2508  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2509  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2510  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2511  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2512  MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2513  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2514  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2515  MMI_SDC1(%[ftmp3], %[ptmp], 0x00)
2516  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2517  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2518  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2519  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2520  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2521  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2522  MMI_SDC1(%[ftmp2], %[ptmp], 0x20)
2523  MMI_LDC1(%[ftmp2], %[ptmp], 0x00)
2524  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2525  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2526  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2527  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2528  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2529  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2530  MMI_SDC1(%[ftmp0], %[ptmp], 0x00)
2531  MMI_SDC1(%[ftmp5], %[ptmp], 0x10)
2532  MMI_SDC1(%[ftmp7], %[ptmp], 0x40)
2533  MMI_SDC1(%[ftmp4], %[ptmp], 0x50)
2534  MMI_LDC1(%[ftmp8], %[ptmp], 0x20)
2535  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2536  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2537  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2538  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2539  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2540  MMI_SDC1(%[ftmp3], %[ptmp], 0x20)
2541  MMI_SDC1(%[ftmp0], %[ptmp], 0x30)
2542  MMI_SDC1(%[ftmp6], %[ptmp], 0x60)
2543  MMI_SDC1(%[ftmp5], %[ptmp], 0x70)
2544  PTR_ADDU "%[addr1], %[addr1], %[addr5] \n\t"
2545  PTR_ADDU "%[addr4], %[addr4], %[addr5] \n\t"
2546  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2547  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2548  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2549  MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2550  MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2551  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2552  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2553  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2554  MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2555  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2556  MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2557  MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2558  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2559  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2560  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2561  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2562  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2563  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2564  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2565  MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2566  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2567  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2568  MMI_SDC1(%[ftmp3], %[ptmp], 0x08)
2569  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2570  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2571  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2572  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2573  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2574  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2575  MMI_SDC1(%[ftmp2], %[ptmp], 0x28)
2576  MMI_LDC1(%[ftmp2], %[ptmp], 0x08)
2577  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2578  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2579  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2580  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2581  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2582  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2583  MMI_SDC1(%[ftmp0], %[ptmp], 0x08)
2584  MMI_SDC1(%[ftmp5], %[ptmp], 0x18)
2585  MMI_SDC1(%[ftmp7], %[ptmp], 0x48)
2586  MMI_SDC1(%[ftmp4], %[ptmp], 0x58)
2587  MMI_LDC1(%[ftmp8], %[ptmp], 0x28)
2588  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2589  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2590  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2591  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2592  MMI_SDC1(%[ftmp3], %[ptmp], 0x28)
2593  MMI_SDC1(%[ftmp0], %[ptmp], 0x38)
2594  MMI_SDC1(%[ftmp6], %[ptmp], 0x68)
2595  MMI_SDC1(%[ftmp5], %[ptmp], 0x78)
2596  PTR_S "%[addr1], 0x00(%[pdat]) \n\t"
2597  PTR_S "%[addr2], 0x08(%[pdat]) \n\t"
2598  PTR_S "%[addr0], 0x10(%[pdat]) \n\t"
2599  PTR_S "%[addr3], 0x18(%[pdat]) \n\t"
2600  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2601  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2602  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2603  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2604  [ftmp8]"=&f"(ftmp[8]),
2605  RESTRICT_ASM_ALL64
2606  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2607  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2608  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2609  [addr6]"=&r"(addr[6])
2610  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2611  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2612  : "memory"
2613  );
2614 
2615  ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2616 
2617  __asm__ volatile (
2618  PTR_L "%[addr1], 0x00(%[pdat]) \n\t"
2619  PTR_L "%[addr2], 0x08(%[pdat]) \n\t"
2620  PTR_L "%[addr0], 0x10(%[pdat]) \n\t"
2621  PTR_L "%[addr3], 0x18(%[pdat]) \n\t"
2622  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2623  MMI_LDC1(%[ftmp0], %[ptmp], 0x08)
2624  MMI_LDC1(%[ftmp1], %[ptmp], 0x18)
2625  MMI_LDC1(%[ftmp2], %[ptmp], 0x28)
2626  MMI_LDC1(%[ftmp3], %[ptmp], 0x38)
2627  MMI_LDC1(%[ftmp4], %[ptmp], 0x48)
2628  MMI_LDC1(%[ftmp5], %[ptmp], 0x58)
2629  MMI_LDC1(%[ftmp6], %[ptmp], 0x68)
2630  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2631  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2632  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2633  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2634  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2635  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2636  MMI_LDC1(%[ftmp8], %[ptmp], 0x78)
2637  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2638  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2639  MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2640  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2641  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2642  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2643  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2644  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2645  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2646  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2647  MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2648  MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2649  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2650  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2651  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2652  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2653  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2654  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2655  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2656  MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2657  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2658  MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2659  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2660  MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2661  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2662  MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2663  MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2664  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2665  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2666  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2667  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2668  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2669  MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2670  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2671  MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2672  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2673  MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2674  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2675  MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2676  PTR_SUBU "%[addr1], %[addr1], %[addr5] \n\t"
2677  PTR_SUBU "%[addr4], %[addr4], %[addr5] \n\t"
2678  MMI_LDC1(%[ftmp0], %[ptmp], 0x00)
2679  MMI_LDC1(%[ftmp1], %[ptmp], 0x10)
2680  MMI_LDC1(%[ftmp2], %[ptmp], 0x20)
2681  MMI_LDC1(%[ftmp3], %[ptmp], 0x30)
2682  MMI_LDC1(%[ftmp4], %[ptmp], 0x40)
2683  MMI_LDC1(%[ftmp5], %[ptmp], 0x50)
2684  MMI_LDC1(%[ftmp6], %[ptmp], 0x60)
2685  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2686  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2687  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2688  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2689  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2690  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2691  MMI_LDC1(%[ftmp8], %[ptmp], 0x70)
2692  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2693  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2694  MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2695  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2696  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2697  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2698  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2699  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2700  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2701  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2702  MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2703  MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2704  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2705  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2706  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2707  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2708  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2709  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2710  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2711  MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2712  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2713  MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2714  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2715  MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2716  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2717  MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2718  MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2719  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2720  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2721  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2722  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2723  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2724  MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2725  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2726  MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2727  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2728  MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2729  MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2730  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2731  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2732  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2733  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2734  [ftmp8]"=&f"(ftmp[8]),
2735  RESTRICT_ASM_ALL64
2736  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2737  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2738  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2739  [addr6]"=&r"(addr[6])
2740  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2741  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2742  : "memory"
2743  );
2744 }
stride
int stride
Definition: mace.c:144
ff_h264_idct8_add_8_mmi
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:177
PTR_SLL
#define PTR_SLL
Definition: asmdefs.h:55
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
ff_pb_3
const uint64_t ff_pb_3
Definition: constants.c:58
b
#define b
Definition: input.c:41
ff_h264_idct8_add4_8_mmi
void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:815
ff_h264_biweight_pixels4_8_mmi
void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1410
mips_reg
#define mips_reg
Definition: asmdefs.h:44
ff_h264_add_pixels4_8_mmi
void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
Definition: h264dsp_mmi.c:30
ff_h264_weight_pixels8_8_mmi
void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1277
PTR_ADDI
#define PTR_ADDI
Definition: asmdefs.h:49
deblock_v8_luma_intra_8_mmi
static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1582
ff_h264_biweight_pixels16_8_mmi
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1206
mmiutils.h
src
#define src
Definition: vp8dsp.c:254
ff_h264_chroma_dc_dequant_idct_8_mmi
void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:1136
ff_deblock_v8_luma_8_mmi
void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1454
ff_pb_A1
const uint64_t ff_pb_A1
Definition: constants.c:60
ff_pw_32
const uint64_t ff_pw_32
Definition: constants.c:43
h264dsp_mips.h
bit_depth_template.c
ff_deblock_h_chroma_8_mmi
void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1970
ff_deblock_v_luma_intra_8_mmi
void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2252
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
weight
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1564
ff_pw_1
const uint64_t ff_pw_1
Definition: constants.c:26
ff_deblock_h_luma_8_mmi
void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2259
ff_h264_idct_add8_8_mmi
void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:832
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
ff_h264_biweight_pixels8_8_mmi
void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1320
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
PTR_SUBU
#define PTR_SUBU
Definition: asmdefs.h:50
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
src0
#define src0
Definition: h264pred.c:138
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:112
src1
#define src1
Definition: h264pred.c:139
ff_h264_idct_add16intra_8_mmi
void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:802
ff_h264_chroma422_dc_dequant_idct_8_mmi
void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:1103
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
ff_h264_idct_add_8_mmi
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:80
ff_h264_idct_add8_422_8_mmi
void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:848
uint8_t
uint8_t
Definition: audio_convert.c:194
ff_deblock_v_luma_8_mmi
void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2243
ff_deblock_v_chroma_8_mmi
void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1808
ff_h264_idct_add16_8_mmi
void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:785
ff_h264_weight_pixels4_8_mmi
void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1372
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:47
ff_deblock_h_luma_intra_8_mmi
void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2478
ff_deblock_h_chroma_intra_8_mmi
void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2110
scan8
static const uint8_t scan8[16 *3+3]
Definition: h264dec.h:644
temp
else temp
Definition: vf_mcdeint.c:256
PTR_S
#define PTR_S
Definition: asmdefs.h:52
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:48
ff_pb_1
const uint64_t ff_pb_1
Definition: constants.c:57
ff_deblock_v_chroma_intra_8_mmi
void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1892
PTR_L
#define PTR_L
Definition: asmdefs.h:51
PTR_SRL
#define PTR_SRL
Definition: asmdefs.h:54
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
ff_h264_weight_pixels16_8_mmi
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1150
ff_h264_idct8_dc_add_8_mmi
void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:700
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
block1
static int16_t block1[64]
Definition: dct.c:116
ff_h264_idct_dc_add_8_mmi
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:656
ff_h264_luma_dc_dequant_idct_8_mmi
void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input, int qmul)
Definition: h264dsp_mmi.c:876