FFmpeg
vp3dsp_idct_mmi.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 gxw <guxiwei-hf@loongson.cn>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "vp3dsp_mips.h"
22 #include "libavutil/intreadwrite.h"
24 #include "libavutil/common.h"
25 #include "libavcodec/rnd_avg.h"
26 
27 #define LOAD_CONST(dst, value) \
28  "li %[tmp1], "#value" \n\t" \
29  "dmtc1 %[tmp1], "#dst" \n\t" \
30  "pshufh "#dst", "#dst", %[ftmp10] \n\t"
31 
32 static void idct_row_mmi(int16_t *input)
33 {
34  double ftmp[23];
35  uint64_t tmp[2];
36  __asm__ volatile (
37  "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
38  LOAD_CONST(%[csth_1], 1)
39  "li %[tmp0], 0x02 \n\t"
40  "1: \n\t"
41  /* Load input */
42  "ldc1 %[ftmp0], 0x00(%[input]) \n\t"
43  "ldc1 %[ftmp1], 0x10(%[input]) \n\t"
44  "ldc1 %[ftmp2], 0x20(%[input]) \n\t"
45  "ldc1 %[ftmp3], 0x30(%[input]) \n\t"
46  "ldc1 %[ftmp4], 0x40(%[input]) \n\t"
47  "ldc1 %[ftmp5], 0x50(%[input]) \n\t"
48  "ldc1 %[ftmp6], 0x60(%[input]) \n\t"
49  "ldc1 %[ftmp7], 0x70(%[input]) \n\t"
50  LOAD_CONST(%[ftmp8], 64277)
51  LOAD_CONST(%[ftmp9], 12785)
52  "pmulhh %[A], %[ftmp9], %[ftmp7] \n\t"
53  "pcmpgth %[C], %[ftmp10], %[ftmp1] \n\t"
54  "or %[mask], %[C], %[csth_1] \n\t"
55  "pmullh %[B], %[ftmp1], %[mask] \n\t"
56  "pmulhuh %[B], %[ftmp8], %[B] \n\t"
57  "pmullh %[B], %[B], %[mask] \n\t"
58  "paddh %[A], %[A], %[B] \n\t"
59  "paddh %[A], %[A], %[C] \n\t"
60  "pcmpgth %[D], %[ftmp10], %[ftmp7] \n\t"
61  "or %[mask], %[D], %[csth_1] \n\t"
62  "pmullh %[ftmp7], %[ftmp7], %[mask] \n\t"
63  "pmulhuh %[B], %[ftmp8], %[ftmp7] \n\t"
64  "pmullh %[B], %[B], %[mask] \n\t"
65  "pmulhh %[C], %[ftmp9], %[ftmp1] \n\t"
66  "psubh %[B], %[C], %[B] \n\t"
67  "psubh %[B], %[B], %[D] \n\t"
68 
69  LOAD_CONST(%[ftmp8], 54491)
70  LOAD_CONST(%[ftmp9], 36410)
71  "pcmpgth %[Ad], %[ftmp10], %[ftmp5] \n\t"
72  "or %[mask], %[Ad], %[csth_1] \n\t"
73  "pmullh %[ftmp1], %[ftmp5], %[mask] \n\t"
74  "pmulhuh %[C], %[ftmp9], %[ftmp1] \n\t"
75  "pmullh %[C], %[C], %[mask] \n\t"
76  "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t"
77  "or %[mask], %[Bd], %[csth_1] \n\t"
78  "pmullh %[D], %[ftmp3], %[mask] \n\t"
79  "pmulhuh %[D], %[ftmp8], %[D] \n\t"
80  "pmullh %[D], %[D], %[mask] \n\t"
81  "paddh %[C], %[C], %[D] \n\t"
82  "paddh %[C], %[C], %[Ad] \n\t"
83  "paddh %[C], %[C], %[Bd] \n\t"
84  "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t"
85  "or %[mask], %[Bd], %[csth_1] \n\t"
86  "pmullh %[ftmp1], %[ftmp3], %[mask] \n\t"
87  "pmulhuh %[D], %[ftmp9], %[ftmp1] \n\t"
88  "pmullh %[D], %[D], %[mask] \n\t"
89  "pcmpgth %[Ed], %[ftmp10], %[ftmp5] \n\t"
90  "or %[mask], %[Ed], %[csth_1] \n\t"
91  "pmullh %[Ad], %[ftmp5], %[mask] \n\t"
92  "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t"
93  "pmullh %[Ad], %[Ad], %[mask] \n\t"
94  "psubh %[D], %[Ad], %[D] \n\t"
95  "paddh %[D], %[D], %[Ed] \n\t"
96  "psubh %[D], %[D], %[Bd] \n\t"
97 
98  LOAD_CONST(%[ftmp8], 46341)
99  "psubh %[Ad], %[A], %[C] \n\t"
100  "pcmpgth %[Bd], %[ftmp10], %[Ad] \n\t"
101  "or %[mask], %[Bd], %[csth_1] \n\t"
102  "pmullh %[Ad], %[Ad], %[mask] \n\t"
103  "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t"
104  "pmullh %[Ad], %[Ad], %[mask] \n\t"
105  "paddh %[Ad], %[Ad], %[Bd] \n\t"
106  "psubh %[Bd], %[B], %[D] \n\t"
107  "pcmpgth %[Cd], %[ftmp10], %[Bd] \n\t"
108  "or %[mask], %[Cd], %[csth_1] \n\t"
109  "pmullh %[Bd], %[Bd], %[mask] \n\t"
110  "pmulhuh %[Bd], %[ftmp8], %[Bd] \n\t"
111  "pmullh %[Bd], %[Bd], %[mask] \n\t"
112  "paddh %[Bd], %[Bd], %[Cd] \n\t"
113  "paddh %[Cd], %[A], %[C] \n\t"
114  "paddh %[Dd], %[B], %[D] \n\t"
115  "paddh %[A], %[ftmp0], %[ftmp4] \n\t"
116  "pcmpgth %[B], %[ftmp10], %[A] \n\t"
117  "or %[mask], %[B], %[csth_1] \n\t"
118  "pmullh %[A], %[A], %[mask] \n\t"
119  "pmulhuh %[A], %[ftmp8], %[A] \n\t"
120  "pmullh %[A], %[A], %[mask] \n\t"
121  "paddh %[A], %[A], %[B] \n\t"
122  "psubh %[B], %[ftmp0], %[ftmp4] \n\t"
123  "pcmpgth %[C], %[ftmp10], %[B] \n\t"
124  "or %[mask], %[C], %[csth_1] \n\t"
125  "pmullh %[B], %[B], %[mask] \n\t"
126  "pmulhuh %[B], %[ftmp8], %[B] \n\t"
127  "pmullh %[B], %[B], %[mask] \n\t"
128  "paddh %[B], %[B], %[C] \n\t"
129 
130  LOAD_CONST(%[ftmp8], 60547)
131  LOAD_CONST(%[ftmp9], 25080)
132  "pmulhh %[C], %[ftmp9], %[ftmp6] \n\t"
133  "pcmpgth %[D], %[ftmp10], %[ftmp2] \n\t"
134  "or %[mask], %[D], %[csth_1] \n\t"
135  "pmullh %[Ed], %[ftmp2], %[mask] \n\t"
136  "pmulhuh %[Ed], %[ftmp8], %[Ed] \n\t"
137  "pmullh %[Ed], %[Ed], %[mask] \n\t"
138  "paddh %[C], %[C], %[Ed] \n\t"
139  "paddh %[C], %[C], %[D] \n\t"
140  "pcmpgth %[Ed], %[ftmp10], %[ftmp6] \n\t"
141  "or %[mask], %[Ed], %[csth_1] \n\t"
142  "pmullh %[ftmp6], %[ftmp6], %[mask] \n\t"
143  "pmulhuh %[D], %[ftmp8], %[ftmp6] \n\t"
144  "pmullh %[D], %[D], %[mask] \n\t"
145  "pmulhh %[Gd], %[ftmp9], %[ftmp2] \n\t"
146  "psubh %[D], %[Gd], %[D] \n\t"
147  "psubh %[D], %[D], %[Ed] \n\t"
148  "psubh %[Ed], %[A], %[C] \n\t"
149  "paddh %[Gd], %[A], %[C] \n\t"
150  "paddh %[A], %[B], %[Ad] \n\t"
151  "psubh %[C], %[B], %[Ad] \n\t"
152  "psubh %[B], %[Bd], %[D] \n\t"
153  "paddh %[D], %[Bd], %[D] \n\t"
154  /* Final sequence of operations over-write original inputs */
155  "paddh %[ftmp0], %[Gd], %[Cd] \n\t"
156  "paddh %[ftmp1], %[A], %[D] \n\t"
157  "psubh %[ftmp2], %[A], %[D] \n\t"
158  "paddh %[ftmp3], %[Ed], %[Dd] \n\t"
159  "psubh %[ftmp4], %[Ed], %[Dd] \n\t"
160  "paddh %[ftmp5], %[C], %[B] \n\t"
161  "psubh %[ftmp6], %[C], %[B] \n\t"
162  "psubh %[ftmp7], %[Gd], %[Cd] \n\t"
163  "sdc1 %[ftmp0], 0x00(%[input]) \n\t"
164  "sdc1 %[ftmp1], 0x10(%[input]) \n\t"
165  "sdc1 %[ftmp2], 0x20(%[input]) \n\t"
166  "sdc1 %[ftmp3], 0x30(%[input]) \n\t"
167  "sdc1 %[ftmp4], 0x40(%[input]) \n\t"
168  "sdc1 %[ftmp5], 0x50(%[input]) \n\t"
169  "sdc1 %[ftmp6], 0x60(%[input]) \n\t"
170  "sdc1 %[ftmp7], 0x70(%[input]) \n\t"
171  PTR_ADDU "%[tmp0], %[tmp0], -0x01 \n\t"
172  PTR_ADDIU "%[input], %[input], 0x08 \n\t"
173  "bnez %[tmp0], 1b \n\t"
174  : [input]"+&r"(input), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
175  [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
176  [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
177  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
178  [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]),
179  [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]),
180  [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]),
181  [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]),
182  [Gd]"=&f"(ftmp[21]), [csth_1]"=&f"(ftmp[22])
183  :
184  : "memory"
185  );
186 }
187 
188 static void idct_column_true_mmi(uint8_t *dst, int stride, int16_t *input)
189 {
190  uint8_t temp_value[8];
191  double ftmp[23];
192  uint64_t tmp[2];
193  for (int i = 0; i < 8; ++i)
194  temp_value[i] = av_clip_uint8(128 + ((46341 * input[i << 3] + (8 << 16)) >> 20));
195  __asm__ volatile (
196  "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
197  "li %[tmp0], 0x02 \n\t"
198  "1: \n\t"
199  "ldc1 %[ftmp0], 0x00(%[input]) \n\t"
200  "ldc1 %[ftmp4], 0x08(%[input]) \n\t"
201  "ldc1 %[ftmp1], 0x10(%[input]) \n\t"
202  "ldc1 %[ftmp5], 0x18(%[input]) \n\t"
203  "ldc1 %[ftmp2], 0x20(%[input]) \n\t"
204  "ldc1 %[ftmp6], 0x28(%[input]) \n\t"
205  "ldc1 %[ftmp3], 0x30(%[input]) \n\t"
206  "ldc1 %[ftmp7], 0x38(%[input]) \n\t"
207  TRANSPOSE_4H(%[ftmp0], %[ftmp1], %[ftmp2], %[ftmp3],
208  %[A], %[B], %[C], %[D])
209  TRANSPOSE_4H(%[ftmp4], %[ftmp5], %[ftmp6], %[ftmp7],
210  %[A], %[B], %[C], %[D])
211  LOAD_CONST(%[ftmp8], 64277)
212  LOAD_CONST(%[ftmp9], 12785)
213  LOAD_CONST(%[Gd], 1)
214  "pmulhh %[A], %[ftmp9], %[ftmp7] \n\t"
215  "pcmpgth %[C], %[ftmp10], %[ftmp1] \n\t"
216  "or %[mask], %[C], %[Gd] \n\t"
217  "pmullh %[B], %[ftmp1], %[mask] \n\t"
218  "pmulhuh %[B], %[ftmp8], %[B] \n\t"
219  "pmullh %[B], %[B], %[mask] \n\t"
220  "paddh %[A], %[A], %[B] \n\t"
221  "paddh %[A], %[A], %[C] \n\t"
222  "pcmpgth %[D], %[ftmp10], %[ftmp7] \n\t"
223  "or %[mask], %[D], %[Gd] \n\t"
224  "pmullh %[Ad], %[ftmp7], %[mask] \n\t"
225  "pmulhuh %[B], %[ftmp8], %[Ad] \n\t"
226  "pmullh %[B], %[B], %[mask] \n\t"
227  "pmulhh %[C], %[ftmp9], %[ftmp1] \n\t"
228  "psubh %[B], %[C], %[B] \n\t"
229  "psubh %[B], %[B], %[D] \n\t"
230 
231  LOAD_CONST(%[ftmp8], 54491)
232  LOAD_CONST(%[ftmp9], 36410)
233  "pcmpgth %[Ad], %[ftmp10], %[ftmp5] \n\t"
234  "or %[mask], %[Ad], %[Gd] \n\t"
235  "pmullh %[Cd], %[ftmp5], %[mask] \n\t"
236  "pmulhuh %[C], %[ftmp9], %[Cd] \n\t"
237  "pmullh %[C], %[C], %[mask] \n\t"
238  "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t"
239  "or %[mask], %[Bd], %[Gd] \n\t"
240  "pmullh %[D], %[ftmp3], %[mask] \n\t"
241  "pmulhuh %[D], %[ftmp8], %[D] \n\t"
242  "pmullh %[D], %[D], %[mask] \n\t"
243  "paddh %[C], %[C], %[D] \n\t"
244  "paddh %[C], %[C], %[Ad] \n\t"
245  "paddh %[C], %[C], %[Bd] \n\t"
246  "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t"
247  "or %[mask], %[Bd], %[Gd] \n\t"
248  "pmullh %[Cd], %[ftmp3], %[mask] \n\t"
249  "pmulhuh %[D], %[ftmp9], %[Cd] \n\t"
250  "pmullh %[D], %[D], %[mask] \n\t"
251  "pcmpgth %[Ed], %[ftmp10], %[ftmp5] \n\t"
252  "or %[mask], %[Ed], %[Gd] \n\t"
253  "pmullh %[Ad], %[ftmp5], %[mask] \n\t"
254  "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t"
255  "pmullh %[Ad], %[Ad], %[mask] \n\t"
256  "psubh %[D], %[Ad], %[D] \n\t"
257  "paddh %[D], %[D], %[Ed] \n\t"
258  "psubh %[D], %[D], %[Bd] \n\t"
259 
260  LOAD_CONST(%[ftmp8], 46341)
261  "psubh %[Ad], %[A], %[C] \n\t"
262  "pcmpgth %[Bd], %[ftmp10], %[Ad] \n\t"
263  "or %[mask], %[Bd], %[Gd] \n\t"
264  "pmullh %[Ad], %[Ad], %[mask] \n\t"
265  "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t"
266  "pmullh %[Ad], %[Ad], %[mask] \n\t"
267  "paddh %[Ad], %[Ad], %[Bd] \n\t"
268  "psubh %[Bd], %[B], %[D] \n\t"
269  "pcmpgth %[Cd], %[ftmp10], %[Bd] \n\t"
270  "or %[mask], %[Cd], %[Gd] \n\t"
271  "pmullh %[Bd], %[Bd], %[mask] \n\t"
272  "pmulhuh %[Bd], %[ftmp8], %[Bd] \n\t"
273  "pmullh %[Bd], %[Bd], %[mask] \n\t"
274  "paddh %[Bd], %[Bd], %[Cd] \n\t"
275  "paddh %[Cd], %[A], %[C] \n\t"
276  "paddh %[Dd], %[B], %[D] \n\t"
277 
278  LOAD_CONST(%[Ed], 2056)
279  "paddh %[A], %[ftmp0], %[ftmp4] \n\t"
280  "pcmpgth %[B], %[ftmp10], %[A] \n\t"
281  "or %[mask], %[B], %[Gd] \n\t"
282  "pmullh %[A], %[A], %[mask] \n\t"
283  "pmulhuh %[A], %[ftmp8], %[A] \n\t"
284  "pmullh %[A], %[A], %[mask] \n\t"
285  "paddh %[A], %[A], %[B] \n\t"
286  "paddh %[A], %[A], %[Ed] \n\t"
287  "psubh %[B], %[ftmp0], %[ftmp4] \n\t"
288  "pcmpgth %[C], %[ftmp10], %[B] \n\t"
289  "or %[mask], %[C], %[Gd] \n\t"
290  "pmullh %[B], %[B], %[mask] \n\t"
291  "pmulhuh %[B], %[ftmp8], %[B] \n\t"
292  "pmullh %[B], %[B], %[mask] \n\t"
293  "paddh %[B], %[B], %[C] \n\t"
294  "paddh %[B], %[B], %[Ed] \n\t"
295 
296  LOAD_CONST(%[ftmp8], 60547)
297  LOAD_CONST(%[ftmp9], 25080)
298  "pmulhh %[C], %[ftmp9], %[ftmp6] \n\t"
299  "pcmpgth %[D], %[ftmp10], %[ftmp2] \n\t"
300  "or %[mask], %[D], %[Gd] \n\t"
301  "pmullh %[Ed], %[ftmp2], %[mask] \n\t"
302  "pmulhuh %[Ed], %[ftmp8], %[Ed] \n\t"
303  "pmullh %[Ed], %[Ed], %[mask] \n\t"
304  "paddh %[C], %[C], %[Ed] \n\t"
305  "paddh %[C], %[C], %[D] \n\t"
306  "pcmpgth %[Ed], %[ftmp10], %[ftmp6] \n\t"
307  "or %[mask], %[Ed], %[Gd] \n\t"
308  "pmullh %[D], %[ftmp6], %[mask] \n\t"
309  "pmulhuh %[D], %[ftmp8], %[D] \n\t"
310  "pmullh %[D], %[D], %[mask] \n\t"
311  "pmulhh %[Gd], %[ftmp9], %[ftmp2] \n\t"
312  "psubh %[D], %[Gd], %[D] \n\t"
313  "psubh %[D], %[D], %[Ed] \n\t"
314  "psubh %[Ed], %[A], %[C] \n\t"
315  "paddh %[Gd], %[A], %[C] \n\t"
316  "paddh %[A], %[B], %[Ad] \n\t"
317  "psubh %[C], %[B], %[Ad] \n\t"
318  "psubh %[B], %[Bd], %[D] \n\t"
319  "paddh %[D], %[Bd], %[D] \n\t"
320  "or %[mask], %[ftmp1], %[ftmp2] \n\t"
321  "or %[mask], %[mask], %[ftmp3] \n\t"
322  "or %[mask], %[mask], %[ftmp4] \n\t"
323  "or %[mask], %[mask], %[ftmp5] \n\t"
324  "or %[mask], %[mask], %[ftmp6] \n\t"
325  "or %[mask], %[mask], %[ftmp7] \n\t"
326  "pcmpeqh %[mask], %[mask], %[ftmp10] \n\t"
327  "packushb %[mask], %[mask], %[ftmp10] \n\t"
328  "li %[tmp1], 0x04 \n\t"
329  "dmtc1 %[tmp1], %[ftmp8] \n\t"
330  "paddh %[ftmp0], %[Gd], %[Cd] \n\t"
331  "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
332  "paddh %[ftmp1], %[A], %[D] \n\t"
333  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
334  "psubh %[ftmp2], %[A], %[D] \n\t"
335  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
336  "paddh %[ftmp3], %[Ed], %[Dd] \n\t"
337  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
338  "psubh %[ftmp4], %[Ed], %[Dd] \n\t"
339  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
340  "paddh %[ftmp5], %[C], %[B] \n\t"
341  "psrah %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
342  "psubh %[ftmp6], %[C], %[B] \n\t"
343  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
344  "psubh %[ftmp7], %[Gd], %[Cd] \n\t"
345  "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
346  "pmaxsh %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
347  "packushb %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
348  "pmaxsh %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
349  "packushb %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
350  "pmaxsh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
351  "packushb %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
352  "pmaxsh %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
353  "packushb %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
354  "pmaxsh %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
355  "packushb %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
356  "pmaxsh %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
357  "packushb %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
358  "pmaxsh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
359  "packushb %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
360  "pmaxsh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
361  "packushb %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
362 
363  "lwc1 %[Ed], 0x00(%[temp_value]) \n\t"
364  "and %[Ed], %[Ed], %[mask] \n\t"
365  "paddb %[ftmp0], %[ftmp0], %[Ed] \n\t"
366  "paddb %[ftmp1], %[ftmp1], %[Ed] \n\t"
367  "paddb %[ftmp2], %[ftmp2], %[Ed] \n\t"
368  "paddb %[ftmp3], %[ftmp3], %[Ed] \n\t"
369  "paddb %[ftmp4], %[ftmp4], %[Ed] \n\t"
370  "paddb %[ftmp5], %[ftmp5], %[Ed] \n\t"
371  "paddb %[ftmp6], %[ftmp6], %[Ed] \n\t"
372  "paddb %[ftmp7], %[ftmp7], %[Ed] \n\t"
373  "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
374  PTR_ADDU "%[tmp1], %[dst], %[stride] \n\t"
375  "swc1 %[ftmp1], 0x00(%[tmp1]) \n\t"
376  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
377  "swc1 %[ftmp2], 0x00(%[tmp1]) \n\t"
378  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
379  "swc1 %[ftmp3], 0x00(%[tmp1]) \n\t"
380  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
381  "swc1 %[ftmp4], 0x00(%[tmp1]) \n\t"
382  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
383  "swc1 %[ftmp5], 0x00(%[tmp1]) \n\t"
384  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
385  "swc1 %[ftmp6], 0x00(%[tmp1]) \n\t"
386  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
387  "swc1 %[ftmp7], 0x00(%[tmp1]) \n\t"
388  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
389  PTR_ADDIU "%[input], %[input], 0x40 \n\t"
390  PTR_ADDIU "%[temp_value], %[temp_value], 0x04 \n\t"
391  PTR_ADDIU "%[tmp0], %[tmp0], -0x01 \n\t"
392  "bnez %[tmp0], 1b \n\t"
393  : [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
394  [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
395  [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
396  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
397  [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]),
398  [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]),
399  [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]),
400  [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]),
401  [Gd]"=&f"(ftmp[21]), [input]"+&r"(input)
402  : [stride]"r"(stride), [temp_value]"r"(temp_value)
403  : "memory"
404  );
405 }
406 
407 static void idct_column_false_mmi(uint8_t *dst, int stride, int16_t *input)
408 {
409  int16_t temp_value[8];
410  double ftmp[23];
411  uint64_t tmp[2];
412  for (int i = 0; i < 8; ++i)
413  temp_value[i] = (46341 * input[i << 3] + (8 << 16)) >> 20;
414  __asm__ volatile (
415  "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t"
416  "li %[tmp0], 0x02 \n\t"
417  "1: \n\t"
418  "ldc1 %[ftmp0], 0x00(%[input]) \n\t"
419  "ldc1 %[ftmp4], 0x08(%[input]) \n\t"
420  "ldc1 %[ftmp1], 0x10(%[input]) \n\t"
421  "ldc1 %[ftmp5], 0x18(%[input]) \n\t"
422  "ldc1 %[ftmp2], 0x20(%[input]) \n\t"
423  "ldc1 %[ftmp6], 0x28(%[input]) \n\t"
424  "ldc1 %[ftmp3], 0x30(%[input]) \n\t"
425  "ldc1 %[ftmp7], 0x38(%[input]) \n\t"
426  TRANSPOSE_4H(%[ftmp0], %[ftmp1], %[ftmp2], %[ftmp3],
427  %[A], %[B], %[C], %[D])
428  TRANSPOSE_4H(%[ftmp4], %[ftmp5], %[ftmp6], %[ftmp7],
429  %[A], %[B], %[C], %[D])
430  LOAD_CONST(%[ftmp8], 64277)
431  LOAD_CONST(%[ftmp9], 12785)
432  LOAD_CONST(%[Gd], 1)
433  "pmulhh %[A], %[ftmp9], %[ftmp7] \n\t"
434  "pcmpgth %[C], %[ftmp10], %[ftmp1] \n\t"
435  "or %[mask], %[C], %[Gd] \n\t"
436  "pmullh %[B], %[ftmp1], %[mask] \n\t"
437  "pmulhuh %[B], %[ftmp8], %[B] \n\t"
438  "pmullh %[B], %[B], %[mask] \n\t"
439  "paddh %[A], %[A], %[B] \n\t"
440  "paddh %[A], %[A], %[C] \n\t"
441  "pcmpgth %[D], %[ftmp10], %[ftmp7] \n\t"
442  "or %[mask], %[D], %[Gd] \n\t"
443  "pmullh %[Ad], %[ftmp7], %[mask] \n\t"
444  "pmulhuh %[B], %[ftmp8], %[Ad] \n\t"
445  "pmullh %[B], %[B], %[mask] \n\t"
446  "pmulhh %[C], %[ftmp9], %[ftmp1] \n\t"
447  "psubh %[B], %[C], %[B] \n\t"
448  "psubh %[B], %[B], %[D] \n\t"
449 
450  LOAD_CONST(%[ftmp8], 54491)
451  LOAD_CONST(%[ftmp9], 36410)
452  "pcmpgth %[Ad], %[ftmp10], %[ftmp5] \n\t"
453  "or %[mask], %[Ad], %[Gd] \n\t"
454  "pmullh %[Cd], %[ftmp5], %[mask] \n\t"
455  "pmulhuh %[C], %[ftmp9], %[Cd] \n\t"
456  "pmullh %[C], %[C], %[mask] \n\t"
457  "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t"
458  "or %[mask], %[Bd], %[Gd] \n\t"
459  "pmullh %[D], %[ftmp3], %[mask] \n\t"
460  "pmulhuh %[D], %[ftmp8], %[D] \n\t"
461  "pmullh %[D], %[D], %[mask] \n\t"
462  "paddh %[C], %[C], %[D] \n\t"
463  "paddh %[C], %[C], %[Ad] \n\t"
464  "paddh %[C], %[C], %[Bd] \n\t"
465  "pcmpgth %[Bd], %[ftmp10], %[ftmp3] \n\t"
466  "or %[mask], %[Bd], %[Gd] \n\t"
467  "pmullh %[Cd], %[ftmp3], %[mask] \n\t"
468  "pmulhuh %[D], %[ftmp9], %[Cd] \n\t"
469  "pmullh %[D], %[D], %[mask] \n\t"
470  "pcmpgth %[Ed], %[ftmp10], %[ftmp5] \n\t"
471  "or %[mask], %[Ed], %[Gd] \n\t"
472  "pmullh %[Ad], %[ftmp5], %[mask] \n\t"
473  "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t"
474  "pmullh %[Ad], %[Ad], %[mask] \n\t"
475  "psubh %[D], %[Ad], %[D] \n\t"
476  "paddh %[D], %[D], %[Ed] \n\t"
477  "psubh %[D], %[D], %[Bd] \n\t"
478 
479  LOAD_CONST(%[ftmp8], 46341)
480  "psubh %[Ad], %[A], %[C] \n\t"
481  "pcmpgth %[Bd], %[ftmp10], %[Ad] \n\t"
482  "or %[mask], %[Bd], %[Gd] \n\t"
483  "pmullh %[Ad], %[Ad], %[mask] \n\t"
484  "pmulhuh %[Ad], %[ftmp8], %[Ad] \n\t"
485  "pmullh %[Ad], %[Ad], %[mask] \n\t"
486  "paddh %[Ad], %[Ad], %[Bd] \n\t"
487  "psubh %[Bd], %[B], %[D] \n\t"
488  "pcmpgth %[Cd], %[ftmp10], %[Bd] \n\t"
489  "or %[mask], %[Cd], %[Gd] \n\t"
490  "pmullh %[Bd], %[Bd], %[mask] \n\t"
491  "pmulhuh %[Bd], %[ftmp8], %[Bd] \n\t"
492  "pmullh %[Bd], %[Bd], %[mask] \n\t"
493  "paddh %[Bd], %[Bd], %[Cd] \n\t"
494  "paddh %[Cd], %[A], %[C] \n\t"
495  "paddh %[Dd], %[B], %[D] \n\t"
496 
497  LOAD_CONST(%[Ed], 8)
498  "paddh %[A], %[ftmp0], %[ftmp4] \n\t"
499  "pcmpgth %[B], %[ftmp10], %[A] \n\t"
500  "or %[mask], %[B], %[Gd] \n\t"
501  "pmullh %[A], %[A], %[mask] \n\t"
502  "pmulhuh %[A], %[ftmp8], %[A] \n\t"
503  "pmullh %[A], %[A], %[mask] \n\t"
504  "paddh %[A], %[A], %[B] \n\t"
505  "paddh %[A], %[A], %[Ed] \n\t"
506  "psubh %[B], %[ftmp0], %[ftmp4] \n\t"
507  "pcmpgth %[C], %[ftmp10], %[B] \n\t"
508  "or %[mask], %[C], %[Gd] \n\t"
509  "pmullh %[B], %[B], %[mask] \n\t"
510  "pmulhuh %[B], %[ftmp8], %[B] \n\t"
511  "pmullh %[B], %[B], %[mask] \n\t"
512  "paddh %[B], %[B], %[C] \n\t"
513  "paddh %[B], %[B], %[Ed] \n\t"
514 
515  LOAD_CONST(%[ftmp8], 60547)
516  LOAD_CONST(%[ftmp9], 25080)
517  "pmulhh %[C], %[ftmp9], %[ftmp6] \n\t"
518  "pcmpgth %[D], %[ftmp10], %[ftmp2] \n\t"
519  "or %[mask], %[D], %[Gd] \n\t"
520  "pmullh %[Ed], %[ftmp2], %[mask] \n\t"
521  "pmulhuh %[Ed], %[ftmp8], %[Ed] \n\t"
522  "pmullh %[Ed], %[Ed], %[mask] \n\t"
523  "paddh %[C], %[C], %[Ed] \n\t"
524  "paddh %[C], %[C], %[D] \n\t"
525  "pcmpgth %[Ed], %[ftmp10], %[ftmp6] \n\t"
526  "or %[mask], %[Ed], %[Gd] \n\t"
527  "pmullh %[D], %[ftmp6], %[mask] \n\t"
528  "pmulhuh %[D], %[ftmp8], %[D] \n\t"
529  "pmullh %[D], %[D], %[mask] \n\t"
530  "pmulhh %[Gd], %[ftmp9], %[ftmp2] \n\t"
531  "psubh %[D], %[Gd], %[D] \n\t"
532  "psubh %[D], %[D], %[Ed] \n\t"
533  "psubh %[Ed], %[A], %[C] \n\t"
534  "paddh %[Gd], %[A], %[C] \n\t"
535  "paddh %[A], %[B], %[Ad] \n\t"
536  "psubh %[C], %[B], %[Ad] \n\t"
537  "psubh %[B], %[Bd], %[D] \n\t"
538  "paddh %[D], %[Bd], %[D] \n\t"
539  "or %[mask], %[ftmp1], %[ftmp2] \n\t"
540  "or %[mask], %[mask], %[ftmp3] \n\t"
541  "or %[mask], %[mask], %[ftmp4] \n\t"
542  "or %[mask], %[mask], %[ftmp5] \n\t"
543  "or %[mask], %[mask], %[ftmp6] \n\t"
544  "or %[mask], %[mask], %[ftmp7] \n\t"
545  "pcmpeqh %[mask], %[mask], %[ftmp10] \n\t"
546  "li %[tmp1], 0x04 \n\t"
547  "dmtc1 %[tmp1], %[ftmp8] \n\t"
548  "paddh %[ftmp0], %[Gd], %[Cd] \n\t"
549  "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
550  "paddh %[ftmp1], %[A], %[D] \n\t"
551  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
552  "psubh %[ftmp2], %[A], %[D] \n\t"
553  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
554  "paddh %[ftmp3], %[Ed], %[Dd] \n\t"
555  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
556  "psubh %[ftmp4], %[Ed], %[Dd] \n\t"
557  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
558  "paddh %[ftmp5], %[C], %[B] \n\t"
559  "psrah %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
560  "psubh %[ftmp6], %[C], %[B] \n\t"
561  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
562  "psubh %[ftmp7], %[Gd], %[Cd] \n\t"
563  "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
564 
565  /* Load from dst */
566  "lwc1 %[A], 0x00(%[dst]) \n\t"
567  PTR_ADDU "%[tmp1], %[dst], %[stride] \n\t"
568  "lwc1 %[B], 0x00(%[tmp1]) \n\t"
569  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
570  "lwc1 %[C], 0x00(%[tmp1]) \n\t"
571  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
572  "lwc1 %[D], 0x00(%[tmp1]) \n\t"
573  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
574  "lwc1 %[Ad], 0x00(%[tmp1]) \n\t"
575  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
576  "lwc1 %[Bd], 0x00(%[tmp1]) \n\t"
577  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
578  "lwc1 %[Cd], 0x00(%[tmp1]) \n\t"
579  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
580  "lwc1 %[Dd], 0x00(%[tmp1]) \n\t"
581  "punpcklbh %[A], %[A], %[ftmp10] \n\t"
582  "punpcklbh %[B], %[B], %[ftmp10] \n\t"
583  "punpcklbh %[C], %[C], %[ftmp10] \n\t"
584  "punpcklbh %[D], %[D], %[ftmp10] \n\t"
585  "punpcklbh %[Ad], %[Ad], %[ftmp10] \n\t"
586  "punpcklbh %[Bd], %[Bd], %[ftmp10] \n\t"
587  "punpcklbh %[Cd], %[Cd], %[ftmp10] \n\t"
588  "punpcklbh %[Dd], %[Dd], %[ftmp10] \n\t"
589  "ldc1 %[Ed], 0x00(%[temp_value]) \n\t"
590  "and %[Ed], %[Ed], %[mask] \n\t"
591  "nor %[mask], %[mask], %[mask] \n\t"
592  "and %[ftmp0], %[ftmp0], %[mask] \n\t"
593  "and %[ftmp1], %[ftmp1], %[mask] \n\t"
594  "and %[ftmp2], %[ftmp2], %[mask] \n\t"
595  "and %[ftmp3], %[ftmp3], %[mask] \n\t"
596  "and %[ftmp4], %[ftmp4], %[mask] \n\t"
597  "and %[ftmp5], %[ftmp5], %[mask] \n\t"
598  "and %[ftmp6], %[ftmp6], %[mask] \n\t"
599  "and %[ftmp7], %[ftmp7], %[mask] \n\t"
600  "paddh %[ftmp0], %[ftmp0], %[A] \n\t"
601  "paddh %[ftmp1], %[ftmp1], %[B] \n\t"
602  "paddh %[ftmp2], %[ftmp2], %[C] \n\t"
603  "paddh %[ftmp3], %[ftmp3], %[D] \n\t"
604  "paddh %[ftmp4], %[ftmp4], %[Ad] \n\t"
605  "paddh %[ftmp5], %[ftmp5], %[Bd] \n\t"
606  "paddh %[ftmp6], %[ftmp6], %[Cd] \n\t"
607  "paddh %[ftmp7], %[ftmp7], %[Dd] \n\t"
608  "paddh %[ftmp0], %[ftmp0], %[Ed] \n\t"
609  "paddh %[ftmp1], %[ftmp1], %[Ed] \n\t"
610  "paddh %[ftmp2], %[ftmp2], %[Ed] \n\t"
611  "paddh %[ftmp3], %[ftmp3], %[Ed] \n\t"
612  "paddh %[ftmp4], %[ftmp4], %[Ed] \n\t"
613  "paddh %[ftmp5], %[ftmp5], %[Ed] \n\t"
614  "paddh %[ftmp6], %[ftmp6], %[Ed] \n\t"
615  "paddh %[ftmp7], %[ftmp7], %[Ed] \n\t"
616  "pmaxsh %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
617  "packushb %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
618  "pmaxsh %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
619  "packushb %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
620  "pmaxsh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
621  "packushb %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
622  "pmaxsh %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
623  "packushb %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
624  "pmaxsh %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
625  "packushb %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
626  "pmaxsh %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
627  "packushb %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
628  "pmaxsh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
629  "packushb %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
630  "pmaxsh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
631  "packushb %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
632  "swc1 %[ftmp0], 0x00(%[dst]) \n\t"
633  PTR_ADDU "%[tmp1], %[dst], %[stride] \n\t"
634  "swc1 %[ftmp1], 0x00(%[tmp1]) \n\t"
635  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
636  "swc1 %[ftmp2], 0x00(%[tmp1]) \n\t"
637  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
638  "swc1 %[ftmp3], 0x00(%[tmp1]) \n\t"
639  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
640  "swc1 %[ftmp4], 0x00(%[tmp1]) \n\t"
641  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
642  "swc1 %[ftmp5], 0x00(%[tmp1]) \n\t"
643  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
644  "swc1 %[ftmp6], 0x00(%[tmp1]) \n\t"
645  PTR_ADDU "%[tmp1], %[tmp1], %[stride] \n\t"
646  "swc1 %[ftmp7], 0x00(%[tmp1]) \n\t"
647  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
648  PTR_ADDIU "%[input], %[input], 0x40 \n\t"
649  PTR_ADDIU "%[temp_value], %[temp_value], 0x08 \n\t"
650  PTR_ADDIU "%[tmp0], %[tmp0], -0x01 \n\t"
651  "bnez %[tmp0], 1b \n\t"
652  : [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
653  [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
654  [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
655  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
656  [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]),
657  [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]),
658  [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]),
659  [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]),
660  [Gd]"=&f"(ftmp[21]), [input]"+&r"(input)
661  : [stride]"r"(stride), [temp_value]"r"(temp_value)
662  : "memory"
663  );
664 }
665 static void idct_mmi(uint8_t *dst, int stride, int16_t *input, int type)
666 {
668  if (type == 1)
670  else
672 }
673 
674 void ff_vp3_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
675 {
676  idct_mmi(dest, line_size, block, 1);
677  memset(block, 0, sizeof(*block) << 6);
678 }
679 
680 void ff_vp3_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
681 {
682  idct_mmi(dest, line_size, block, 2);
683  memset(block, 0, sizeof(*block) << 6);
684 }
685 void ff_vp3_idct_dc_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
686 {
687  int dc = (block[0] + 15) >> 5;
688 
689  double ftmp[7];
690  uint64_t tmp;
691  __asm__ volatile (
692  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
693  "mtc1 %[dc], %[ftmp5] \n\t"
694  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
695  "li %[tmp0], 0x08 \n\t"
696  "1: \n\t"
697  "ldc1 %[ftmp1], 0x00(%[dest]) \n\t"
698  "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
699  "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
700  "paddh %[ftmp4], %[ftmp2], %[ftmp5] \n\t"
701  "paddh %[ftmp6], %[ftmp3], %[ftmp5] \n\t"
702  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
703  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
704  "swc1 %[ftmp4], 0x00(%[dest]) \n\t"
705  "swc1 %[ftmp6], 0x04(%[dest]) \n\t"
706  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
707  PTR_ADDIU "%[tmp0], %[tmp0], -0x01 \n\t"
708  "bnez %[tmp0], 1b \n\t"
709  : [dest]"+&r"(dest), [block]"+&r"(block), [tmp0]"=&r"(tmp),
710  [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
711  [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
712  [ftmp6]"=&f"(ftmp[6])
713  : [line_size]"r"(line_size), [dc]"r"(dc)
714  : "memory"
715  );
716  block[0] = 0;
717 }
718 
720  const uint8_t *src2, ptrdiff_t stride, int h)
721 {
722  if (h == 8) {
723  double ftmp[6];
724  uint64_t tmp[2];
725  __asm__ volatile (
726  "li %[tmp0], 0x08 \n\t"
727  "li %[tmp1], 0xfefefefe \n\t"
728  "dmtc1 %[tmp1], %[ftmp4] \n\t"
729  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
730  "li %[tmp1], 0x01 \n\t"
731  "dmtc1 %[tmp1], %[ftmp5] \n\t"
732  "1: \n\t"
733  "gsldlc1 %[ftmp1], 0x07(%[src1]) \n\t"
734  "gsldrc1 %[ftmp1], 0x00(%[src1]) \n\t"
735  "gsldlc1 %[ftmp2], 0x07(%[src2]) \n\t"
736  "gsldrc1 %[ftmp2], 0x00(%[src2]) \n\t"
737  "xor %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
738  "and %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
739  "psrlw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
740  "and %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
741  "paddw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
742  "sdc1 %[ftmp3], 0x00(%[dst]) \n\t"
743  PTR_ADDU "%[src1], %[src1], %[stride] \n\t"
744  PTR_ADDU "%[src2], %[src2], %[stride] \n\t"
745  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
746  PTR_ADDIU "%[tmp0], %[tmp0], -0x01 \n\t"
747  "bnez %[tmp0], 1b \n\t"
748  : [dst]"+&r"(dst), [src1]"+&r"(src1), [src2]"+&r"(src2),
749  [ftmp1]"=&f"(ftmp[0]), [ftmp2]"=&f"(ftmp[1]), [ftmp3]"=&f"(ftmp[2]),
750  [ftmp4]"=&f"(ftmp[3]), [ftmp5]"=&f"(ftmp[4]), [ftmp6]"=&f"(ftmp[5]),
751  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1])
752  : [stride]"r"(stride)
753  : "memory"
754  );
755  } else {
756  int i;
757 
758  for (i = 0; i < h; i++) {
759  uint32_t a, b;
760 
761  a = AV_RN32(&src1[i * stride]);
762  b = AV_RN32(&src2[i * stride]);
763  AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
764  a = AV_RN32(&src1[i * stride + 4]);
765  b = AV_RN32(&src2[i * stride + 4]);
766  AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
767  }
768  }
769 }
stride
int stride
Definition: mace.c:144
no_rnd_avg32
static uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
Definition: rnd_avg.h:36
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
vp3dsp_mips.h
b
#define b
Definition: input.c:41
AV_WN32A
#define AV_WN32A(p, v)
Definition: intreadwrite.h:538
D
D(D(float, sse)
Definition: rematrix_init.c:28
A
#define A(x)
Definition: vp56_arith.h:28
TRANSPOSE_4H
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 4X4 half word packaged data.
Definition: mmiutils.h:267
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
mmiutils.h
C
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
Definition: writing_filters.txt:58
idct_column_true_mmi
static void idct_column_true_mmi(uint8_t *dst, int stride, int16_t *input)
Definition: vp3dsp_idct_mmi.c:188
mask
static const uint16_t mask[17]
Definition: lzw.c:38
intreadwrite.h
ff_vp3_idct_add_mmi
void ff_vp3_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: vp3dsp_idct_mmi.c:680
idct_column_false_mmi
static void idct_column_false_mmi(uint8_t *dst, int stride, int16_t *input)
Definition: vp3dsp_idct_mmi.c:407
AV_RN32
#define AV_RN32(p)
Definition: intreadwrite.h:364
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
idct_mmi
static void idct_mmi(uint8_t *dst, int stride, int16_t *input, int type)
Definition: vp3dsp_idct_mmi.c:665
src1
#define src1
Definition: h264pred.c:139
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
ff_put_no_rnd_pixels_l2_mmi
void ff_put_no_rnd_pixels_l2_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: vp3dsp_idct_mmi.c:719
common.h
uint8_t
uint8_t
Definition: audio_convert.c:194
rnd_avg.h
ff_vp3_idct_put_mmi
void ff_vp3_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: vp3dsp_idct_mmi.c:674
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:47
B
#define B
Definition: huffyuvdsp.h:32
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:48
LOAD_CONST
#define LOAD_CONST(dst, value)
Definition: vp3dsp_idct_mmi.c:27
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
ff_vp3_idct_dc_add_mmi
void ff_vp3_idct_dc_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: vp3dsp_idct_mmi.c:685
idct_row_mmi
static void idct_row_mmi(int16_t *input)
Definition: vp3dsp_idct_mmi.c:32