FFmpeg
vp9_mc_mmi.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 gxw <guxiwei-hf@loongson.cn>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavcodec/vp9dsp.h"
23 #include "vp9dsp_mips.h"
24 
25 #define GET_DATA_H_MMI \
26  "pmaddhw %[ftmp4], %[ftmp4], %[filter1] \n\t" \
27  "pmaddhw %[ftmp5], %[ftmp5], %[filter2] \n\t" \
28  "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
29  "punpckhwd %[ftmp5], %[ftmp4], %[ftmp0] \n\t" \
30  "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
31  "pmaddhw %[ftmp6], %[ftmp6], %[filter1] \n\t" \
32  "pmaddhw %[ftmp7], %[ftmp7], %[filter2] \n\t" \
33  "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \
34  "punpckhwd %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \
35  "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \
36  "punpcklwd %[srcl], %[ftmp4], %[ftmp6] \n\t" \
37  "pmaddhw %[ftmp8], %[ftmp8], %[filter1] \n\t" \
38  "pmaddhw %[ftmp9], %[ftmp9], %[filter2] \n\t" \
39  "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
40  "punpckhwd %[ftmp9], %[ftmp8], %[ftmp0] \n\t" \
41  "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
42  "pmaddhw %[ftmp10], %[ftmp10], %[filter1] \n\t" \
43  "pmaddhw %[ftmp11], %[ftmp11], %[filter2] \n\t" \
44  "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \
45  "punpckhwd %[ftmp11], %[ftmp10], %[ftmp0] \n\t" \
46  "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \
47  "punpcklwd %[srch], %[ftmp8], %[ftmp10] \n\t"
48 
49 #define GET_DATA_V_MMI \
50  "punpcklhw %[srcl], %[ftmp4], %[ftmp5] \n\t" \
51  "pmaddhw %[srcl], %[srcl], %[filter10] \n\t" \
52  "punpcklhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \
53  "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \
54  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
55  "punpcklhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \
56  "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \
57  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
58  "punpcklhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \
59  "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \
60  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
61  "punpckhhw %[srch], %[ftmp4], %[ftmp5] \n\t" \
62  "pmaddhw %[srch], %[srch], %[filter10] \n\t" \
63  "punpckhhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \
64  "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \
65  "paddw %[srch], %[srch], %[ftmp12] \n\t" \
66  "punpckhhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \
67  "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \
68  "paddw %[srch], %[srch], %[ftmp12] \n\t" \
69  "punpckhhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \
70  "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \
71  "paddw %[srch], %[srch], %[ftmp12] \n\t"
72 
73 static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride,
74  uint8_t *dst, int32_t dst_stride,
75  const uint16_t *filter_x, int32_t w,
76  int32_t h)
77 {
78  double ftmp[15];
79  uint32_t tmp[2];
81  src -= 3;
82  src_stride -= w;
83  dst_stride -= w;
84  __asm__ volatile (
85  "move %[tmp1], %[width] \n\t"
86  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
87  MMI_ULDC1(%[filter1], %[filter], 0x00)
88  MMI_ULDC1(%[filter2], %[filter], 0x08)
89  "li %[tmp0], 0x07 \n\t"
90  "dmtc1 %[tmp0], %[ftmp13] \n\t"
91  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
92  "1: \n\t"
93  /* Get 8 data per row */
94  MMI_ULDC1(%[ftmp5], %[src], 0x00)
95  MMI_ULDC1(%[ftmp7], %[src], 0x01)
96  MMI_ULDC1(%[ftmp9], %[src], 0x02)
97  MMI_ULDC1(%[ftmp11], %[src], 0x03)
98  "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t"
99  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
100  "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
101  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
102  "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t"
103  "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
104  "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t"
105  "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
106  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
107  /* Get raw data */
109  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
110  %[ftmp6], %[tmp0])
111  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
112  %[ftmp6], %[tmp0])
113  "packsswh %[srcl], %[srcl], %[srch] \n\t"
114  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
115  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
116  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
117  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
118  /* Loop count */
119  "bnez %[width], 1b \n\t"
120  "move %[width], %[tmp1] \n\t"
121  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
122  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
123  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
124  "bnez %[height], 1b \n\t"
126  [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
127  [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]),
128  [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]),
129  [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]),
130  [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]),
131  [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]),
132  [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]),
133  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
134  [src]"+&r"(src), [width]"+&r"(w),
135  [dst]"+&r"(dst), [height]"+&r"(h),
136  [ftmp13]"=&f"(ftmp[14])
137  : [filter]"r"(filter_x),
138  [src_stride]"r"((mips_reg)src_stride),
139  [dst_stride]"r"((mips_reg)dst_stride)
140  : "memory"
141  );
142 }
143 
144 static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride,
145  uint8_t *dst, int32_t dst_stride,
146  const int16_t *filter_y, int32_t w,
147  int32_t h)
148 {
149  double ftmp[17];
150  uint32_t tmp[1];
151  ptrdiff_t addr = src_stride;
153  src_stride -= w;
154  dst_stride -= w;
155 
156  __asm__ volatile (
157  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
158  MMI_ULDC1(%[ftmp4], %[filter], 0x00)
159  MMI_ULDC1(%[ftmp5], %[filter], 0x08)
160  "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t"
161  "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t"
162  "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t"
163  "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t"
164  "li %[tmp0], 0x07 \n\t"
165  "dmtc1 %[tmp0], %[ftmp13] \n\t"
166  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
167  "1: \n\t"
168  /* Get 8 data per column */
169  MMI_ULDC1(%[ftmp4], %[src], 0x0)
170  PTR_ADDU "%[tmp0], %[src], %[addr] \n\t"
171  MMI_ULDC1(%[ftmp5], %[tmp0], 0x0)
172  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
173  MMI_ULDC1(%[ftmp6], %[tmp0], 0x0)
174  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
175  MMI_ULDC1(%[ftmp7], %[tmp0], 0x0)
176  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
177  MMI_ULDC1(%[ftmp8], %[tmp0], 0x0)
178  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
179  MMI_ULDC1(%[ftmp9], %[tmp0], 0x0)
180  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
181  MMI_ULDC1(%[ftmp10], %[tmp0], 0x0)
182  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
183  MMI_ULDC1(%[ftmp11], %[tmp0], 0x0)
184  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
185  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
186  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
187  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
188  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
189  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
190  "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
191  "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
192  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
193  /* Get raw data */
195  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
196  %[ftmp6], %[tmp0])
197  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
198  %[ftmp6], %[tmp0])
199  "packsswh %[srcl], %[srcl], %[srch] \n\t"
200  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
201  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
202  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
203  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
204  /* Loop count */
205  "bnez %[width], 1b \n\t"
206  PTR_SUBU "%[width], %[addr], %[src_stride] \n\t"
207  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
208  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
209  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
210  "bnez %[height], 1b \n\t"
212  [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
213  [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
214  [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
215  [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]),
216  [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]),
217  [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]),
218  [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]),
219  [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]),
220  [src]"+&r"(src), [dst]"+&r"(dst),
221  [width]"+&r"(w), [height]"+&r"(h),
222  [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16])
223  : [filter]"r"(filter_y),
224  [src_stride]"r"((mips_reg)src_stride),
225  [dst_stride]"r"((mips_reg)dst_stride),
226  [addr]"r"((mips_reg)addr)
227  : "memory"
228  );
229 }
230 
231 static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride,
232  uint8_t *dst, int32_t dst_stride,
233  const uint16_t *filter_x, int32_t w,
234  int32_t h)
235 {
236  double ftmp[15];
237  uint32_t tmp[2];
239  src -= 3;
240  src_stride -= w;
241  dst_stride -= w;
242 
243  __asm__ volatile (
244  "move %[tmp1], %[width] \n\t"
245  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
246  MMI_ULDC1(%[filter1], %[filter], 0x00)
247  MMI_ULDC1(%[filter2], %[filter], 0x08)
248  "li %[tmp0], 0x07 \n\t"
249  "dmtc1 %[tmp0], %[ftmp13] \n\t"
250  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
251  "1: \n\t"
252  /* Get 8 data per row */
253  MMI_ULDC1(%[ftmp5], %[src], 0x00)
254  MMI_ULDC1(%[ftmp7], %[src], 0x01)
255  MMI_ULDC1(%[ftmp9], %[src], 0x02)
256  MMI_ULDC1(%[ftmp11], %[src], 0x03)
257  "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t"
258  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
259  "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
260  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
261  "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t"
262  "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
263  "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t"
264  "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
265  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
266  /* Get raw data */
268  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
269  %[ftmp6], %[tmp0])
270  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
271  %[ftmp6], %[tmp0])
272  "packsswh %[srcl], %[srcl], %[srch] \n\t"
273  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
274  "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
275  MMI_ULDC1(%[ftmp4], %[dst], 0x0)
276  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
277  "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t"
278  "li %[tmp0], 0x10001 \n\t"
279  "dmtc1 %[tmp0], %[ftmp5] \n\t"
280  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
281  "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
282  "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
283  "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
284  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
285  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
286  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
287  /* Loop count */
288  "bnez %[width], 1b \n\t"
289  "move %[width], %[tmp1] \n\t"
290  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
291  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
292  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
293  "bnez %[height], 1b \n\t"
295  [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
296  [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]),
297  [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]),
298  [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]),
299  [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]),
300  [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]),
301  [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]),
302  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
303  [src]"+&r"(src), [width]"+&r"(w),
304  [dst]"+&r"(dst), [height]"+&r"(h),
305  [ftmp13]"=&f"(ftmp[14])
306  : [filter]"r"(filter_x),
307  [src_stride]"r"((mips_reg)src_stride),
308  [dst_stride]"r"((mips_reg)dst_stride)
309  : "memory"
310  );
311 }
312 
313 static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride,
314  uint8_t *dst, int32_t dst_stride,
315  const int16_t *filter_y, int32_t w,
316  int32_t h)
317 {
318  double ftmp[17];
319  uint32_t tmp[1];
320  ptrdiff_t addr = src_stride;
322  src_stride -= w;
323  dst_stride -= w;
324 
325  __asm__ volatile (
326  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
327  MMI_ULDC1(%[ftmp4], %[filter], 0x00)
328  MMI_ULDC1(%[ftmp5], %[filter], 0x08)
329  "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t"
330  "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t"
331  "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t"
332  "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t"
333  "li %[tmp0], 0x07 \n\t"
334  "dmtc1 %[tmp0], %[ftmp13] \n\t"
335  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
336  "1: \n\t"
337  /* Get 8 data per column */
338  MMI_ULDC1(%[ftmp4], %[src], 0x0)
339  PTR_ADDU "%[tmp0], %[src], %[addr] \n\t"
340  MMI_ULDC1(%[ftmp5], %[tmp0], 0x0)
341  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
342  MMI_ULDC1(%[ftmp6], %[tmp0], 0x0)
343  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
344  MMI_ULDC1(%[ftmp7], %[tmp0], 0x0)
345  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
346  MMI_ULDC1(%[ftmp8], %[tmp0], 0x0)
347  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
348  MMI_ULDC1(%[ftmp9], %[tmp0], 0x0)
349  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
350  MMI_ULDC1(%[ftmp10], %[tmp0], 0x0)
351  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
352  MMI_ULDC1(%[ftmp11], %[tmp0], 0x0)
353  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
354  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
355  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
356  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
357  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
358  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
359  "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
360  "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
361  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
362  /* Get raw data */
364  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
365  %[ftmp6], %[tmp0])
366  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
367  %[ftmp6], %[tmp0])
368  "packsswh %[srcl], %[srcl], %[srch] \n\t"
369  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
370  "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
371  MMI_ULDC1(%[ftmp4], %[dst], 0x00)
372  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
373  "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t"
374  "li %[tmp0], 0x10001 \n\t"
375  "dmtc1 %[tmp0], %[ftmp5] \n\t"
376  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
377  "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
378  "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
379  "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
380  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
381  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
382  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
383  /* Loop count */
384  "bnez %[width], 1b \n\t"
385  PTR_SUBU "%[width], %[addr], %[src_stride] \n\t"
386  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
387  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
388  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
389  "bnez %[height], 1b \n\t"
391  [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
392  [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
393  [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
394  [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]),
395  [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]),
396  [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]),
397  [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]),
398  [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]),
399  [src]"+&r"(src), [dst]"+&r"(dst),
400  [width]"+&r"(w), [height]"+&r"(h),
401  [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16])
402  : [filter]"r"(filter_y),
403  [src_stride]"r"((mips_reg)src_stride),
404  [dst_stride]"r"((mips_reg)dst_stride),
405  [addr]"r"((mips_reg)addr)
406  : "memory"
407  );
408 }
409 
410 static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride,
411  uint8_t *dst, int32_t dst_stride,
412  int32_t w, int32_t h)
413 {
414  double ftmp[4];
415  uint32_t tmp[2];
417  src_stride -= w;
418  dst_stride -= w;
419 
420  __asm__ volatile (
421  "move %[tmp1], %[width] \n\t"
422  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
423  "li %[tmp0], 0x10001 \n\t"
424  "dmtc1 %[tmp0], %[ftmp3] \n\t"
425  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
426  "1: \n\t"
427  MMI_ULDC1(%[ftmp1], %[src], 0x00)
428  MMI_ULDC1(%[ftmp2], %[dst], 0x00)
429  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
430  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
431  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
432  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
433  "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
434  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
435  "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
436  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
437  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
438  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
439  "bnez %[width], 1b \n\t"
440  "move %[width], %[tmp1] \n\t"
441  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
442  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
443  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
444  "bnez %[height], 1b \n\t"
446  [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
447  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
448  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
449  [src]"+&r"(src), [dst]"+&r"(dst),
450  [width]"+&r"(w), [height]"+&r"(h)
451  : [src_stride]"r"((mips_reg)src_stride),
452  [dst_stride]"r"((mips_reg)dst_stride)
453  : "memory"
454  );
455 }
456 
457 static const int16_t vp9_subpel_filters_mmi[3][15][8] = {
458  [FILTER_8TAP_REGULAR] = {
459  {0, 1, -5, 126, 8, -3, 1, 0},
460  {-1, 3, -10, 122, 18, -6, 2, 0},
461  {-1, 4, -13, 118, 27, -9, 3, -1},
462  {-1, 4, -16, 112, 37, -11, 4, -1},
463  {-1, 5, -18, 105, 48, -14, 4, -1},
464  {-1, 5, -19, 97, 58, -16, 5, -1},
465  {-1, 6, -19, 88, 68, -18, 5, -1},
466  {-1, 6, -19, 78, 78, -19, 6, -1},
467  {-1, 5, -18, 68, 88, -19, 6, -1},
468  {-1, 5, -16, 58, 97, -19, 5, -1},
469  {-1, 4, -14, 48, 105, -18, 5, -1},
470  {-1, 4, -11, 37, 112, -16, 4, -1},
471  {-1, 3, -9, 27, 118, -13, 4, -1},
472  {0, 2, -6, 18, 122, -10, 3, -1},
473  {0, 1, -3, 8, 126, -5, 1, 0},
474  }, [FILTER_8TAP_SHARP] = {
475  {-1, 3, -7, 127, 8, -3, 1, 0},
476  {-2, 5, -13, 125, 17, -6, 3, -1},
477  {-3, 7, -17, 121, 27, -10, 5, -2},
478  {-4, 9, -20, 115, 37, -13, 6, -2},
479  {-4, 10, -23, 108, 48, -16, 8, -3},
480  {-4, 10, -24, 100, 59, -19, 9, -3},
481  {-4, 11, -24, 90, 70, -21, 10, -4},
482  {-4, 11, -23, 80, 80, -23, 11, -4},
483  {-4, 10, -21, 70, 90, -24, 11, -4},
484  {-3, 9, -19, 59, 100, -24, 10, -4},
485  {-3, 8, -16, 48, 108, -23, 10, -4},
486  {-2, 6, -13, 37, 115, -20, 9, -4},
487  {-2, 5, -10, 27, 121, -17, 7, -3},
488  {-1, 3, -6, 17, 125, -13, 5, -2},
489  {0, 1, -3, 8, 127, -7, 3, -1},
490  }, [FILTER_8TAP_SMOOTH] = {
491  {-3, -1, 32, 64, 38, 1, -3, 0},
492  {-2, -2, 29, 63, 41, 2, -3, 0},
493  {-2, -2, 26, 63, 43, 4, -4, 0},
494  {-2, -3, 24, 62, 46, 5, -4, 0},
495  {-2, -3, 21, 60, 49, 7, -4, 0},
496  {-1, -4, 18, 59, 51, 9, -4, 0},
497  {-1, -4, 16, 57, 53, 12, -4, -1},
498  {-1, -4, 14, 55, 55, 14, -4, -1},
499  {-1, -4, 12, 53, 57, 16, -4, -1},
500  {0, -4, 9, 51, 59, 18, -4, -1},
501  {0, -4, 7, 49, 60, 21, -3, -2},
502  {0, -4, 5, 46, 62, 24, -3, -2},
503  {0, -4, 4, 43, 63, 26, -2, -2},
504  {0, -3, 2, 41, 63, 29, -2, -2},
505  {0, -3, 1, 38, 64, 32, -1, -3},
506  }
507 };
508 
509 #define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX) \
510 void ff_put_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
511  const uint8_t *src, \
512  ptrdiff_t srcstride, \
513  int h, int mx, int my) \
514 { \
515  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
516  \
517  convolve_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
518 } \
519  \
520 void ff_put_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
521  const uint8_t *src, \
522  ptrdiff_t srcstride, \
523  int h, int mx, int my) \
524 { \
525  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
526  \
527  src -= (3 * srcstride); \
528  convolve_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
529 } \
530  \
531 void ff_put_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
532  const uint8_t *src, \
533  ptrdiff_t srcstride, \
534  int h, int mx, int my) \
535 { \
536  const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
537  const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
538  \
539  int tmp_h = h + 7; \
540  uint8_t temp[64 * 71]; \
541  src -= (3 * srcstride); \
542  convolve_horiz_mmi(src, srcstride, temp, 64, hfilter, SIZE, tmp_h); \
543  convolve_vert_mmi(temp, 64, dst, dststride, vfilter, SIZE, h); \
544 } \
545  \
546 void ff_avg_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
547  const uint8_t *src, \
548  ptrdiff_t srcstride, \
549  int h, int mx, int my) \
550 { \
551  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
552  \
553  convolve_avg_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
554 } \
555  \
556 void ff_avg_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
557  const uint8_t *src, \
558  ptrdiff_t srcstride, \
559  int h, int mx, int my) \
560 { \
561  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
562  \
563  src -= (3 * srcstride); \
564  convolve_avg_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
565 } \
566  \
567 void ff_avg_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
568  const uint8_t *src, \
569  ptrdiff_t srcstride, \
570  int h, int mx, int my) \
571 { \
572  const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
573  const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
574  \
575  uint8_t temp1[64 * 64]; \
576  uint8_t temp2[64 * 71]; \
577  int tmp_h = h + 7; \
578  src -= (3 * srcstride); \
579  convolve_horiz_mmi(src, srcstride, temp2, 64, hfilter, SIZE, tmp_h); \
580  convolve_vert_mmi(temp2, 64, temp1, 64, vfilter, SIZE, h); \
581  convolve_avg_mmi(temp1, 64, dst, dststride, SIZE, h); \
582 }
583 
589 
595 
601 
602 #undef VP9_8TAP_MIPS_MMI_FUNC
convolve_avg_vert_mmi
static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:313
filter1
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
Definition: dcadsp.c:358
convolve_horiz_mmi
static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:73
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
w
uint8_t w
Definition: llviddspenc.c:38
convolve_vert_mmi
static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:144
GET_DATA_H_MMI
#define GET_DATA_H_MMI
Definition: vp9_mc_mmi.c:25
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
convolve_avg_horiz_mmi
static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:231
mips_reg
#define mips_reg
Definition: asmdefs.h:46
vp9_subpel_filters_mmi
static const int16_t vp9_subpel_filters_mmi[3][15][8]
Definition: vp9_mc_mmi.c:457
mmiutils.h
FILTER_8TAP_SHARP
@ FILTER_8TAP_SHARP
Definition: vp9.h:67
width
#define width
vp9dsp_mips.h
vp9dsp.h
ROUND_POWER_OF_TWO_MMI
#define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0)
brief: (((value) + (1 << ((n) - 1))) >> (n)) fr_i0: src & dst fr_i1: Operand number fr_t0,...
Definition: mmiutils.h:383
FILTER_8TAP_REGULAR
@ FILTER_8TAP_REGULAR
Definition: vp9.h:66
GET_DATA_V_MMI
#define GET_DATA_V_MMI
Definition: vp9_mc_mmi.c:49
height
#define height
PTR_SUBU
#define PTR_SUBU
Definition: asmdefs.h:52
DECLARE_VAR_ALL64
#define DECLARE_VAR_ALL64
Definition: mmiutils.h:39
FILTER_8TAP_SMOOTH
@ FILTER_8TAP_SMOOTH
Definition: vp9.h:65
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:49
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
smooth
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifo *values)
Definition: vf_deshake_opencl.c:889
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
convolve_avg_mmi
static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:410
int32_t
int32_t
Definition: audioconvert.c:56
VP9_8TAP_MIPS_MMI_FUNC
#define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX)
Definition: vp9_mc_mmi.c:509
h
h
Definition: vp9dsp_template.c:2038
RESTRICT_ASM_ALL64
#define RESTRICT_ASM_ALL64
Definition: mmiutils.h:40