FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
h264chroma_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264chroma
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "h264chroma_mips.h"
26 
28  int h, int x, int y)
29 {
30  const int A = (8 - x) * (8 - y);
31  const int B = x * (8 - y);
32  const int C = (8 - x) * y;
33  const int D = x * y;
34  const int E = B + C;
35  int i;
36 
37  av_assert2(x<8 && y<8 && x>=0 && y>=0);
38 
39  if (D) {
40  for (i=0; i<h; i++) {
41  __asm__ volatile (
42  "ldl $2, %2 \r\n"
43  "ldr $2, %1 \r\n"
44  "ldl $3, %4 \r\n"
45  "ldr $3, %3 \r\n"
46  "ldl $4, %6 \r\n"
47  "ldr $4, %5 \r\n"
48  "ldl $5, %8 \r\n"
49  "ldr $5, %7 \r\n"
50  "daddiu $6, $0, 32 \r\n"
51  "mtc1 %9, $f6 \r\n"
52  "mtc1 %10, $f8 \r\n"
53  "mtc1 %11, $f10 \r\n"
54  "mtc1 %12, $f12 \r\n"
55  "mtc1 $0, $f20 \r\n"
56  "mtc1 $2, $f2 \r\n"
57  "mtc1 $3, $f4 \r\n"
58  "mtc1 $4, $f16 \r\n"
59  "mtc1 $5, $f18 \r\n"
60  "mtc1 $6, $f14 \r\n"
61  "punpcklbh $f2, $f2, $f20 \r\n"
62  "punpcklbh $f4, $f4, $f20 \r\n"
63  "pshufh $f6, $f6, $f20 \r\n"
64  "pshufh $f8, $f8, $f20 \r\n"
65  "pshufh $f10, $f10, $f20 \r\n"
66  "pshufh $f12, $f12, $f20 \r\n"
67  "pshufh $f14, $f14, $f20 \r\n"
68  "punpcklbh $f16, $f16, $f20 \r\n"
69  "punpcklbh $f18, $f18, $f20 \r\n"
70  "daddiu $6, $0, 6 \r\n"
71  "mtc1 $6, $f22 \r\n"
72  "dsrl32 $2, $2, 0 \r\n"
73  "dsrl32 $3, $3, 0 \r\n"
74  "dsrl32 $4, $4, 0 \r\n"
75  "dsrl32 $5, $5, 0 \r\n"
76  "pmullh $f2, $f2, $f6 \r\n"
77  "pmullh $f4, $f4, $f8 \r\n"
78  "pmullh $f16, $f10, $f16 \r\n"
79  "pmullh $f18, $f12, $f18 \r\n"
80  "paddh $f2, $f2, $f14 \r\n"
81  "paddh $f4, $f4, $f16 \r\n"
82  "paddh $f2, $f2, $f18 \r\n"
83  "paddh $f2, $f2, $f4 \r\n"
84  "psrah $f24, $f2, $f22 \r\n"
85  "mtc1 $2, $f2 \r\n"
86  "mtc1 $3, $f4 \r\n"
87  "mtc1 $4, $f16 \r\n"
88  "mtc1 $5, $f18 \r\n"
89  "punpcklbh $f2, $f2, $f20 \r\n"
90  "punpcklbh $f4, $f4, $f20 \r\n"
91  "punpcklbh $f16, $f16, $f20 \r\n"
92  "punpcklbh $f18, $f18, $f20 \r\n"
93  "pmullh $f2, $f2, $f6 \r\n"
94  "pmullh $f4, $f4, $f8 \r\n"
95  "pmullh $f16, $f10, $f16 \r\n"
96  "pmullh $f18, $f12, $f18 \r\n"
97  "paddh $f2, $f2, $f14 \r\n"
98  "paddh $f4, $f4, $f16 \r\n"
99  "paddh $f2, $f2, $f18 \r\n"
100  "paddh $f2, $f2, $f4 \r\n"
101  "psrah $f2, $f2, $f22 \r\n"
102  "packushb $f2, $f24, $f2 \r\n"
103  "sdc1 $f2, %0 \r\n"
104  : "=m"(*dst)
105  : "m"(*src),"m"(*(src+7)),"m"(*(src+1)),"m"(*(src+8)),
106  "m"(*(src+stride)),"m"(*(src+stride+7)),
107  "m"(*(src+stride+1)),"m"(*(src+stride+8)),
108  "r"(A),"r"(B),"r"(C),"r"(D)
109  : "$2","$3","$4","$5","$6"
110  );
111 
112  dst += stride;
113  src += stride;
114  }
115  } else if (E) {
116  const int step = C ? stride : 1;
117 
118  for (i=0; i<h; i++) {
119  __asm__ volatile (
120  "daddiu $6, $0, 32 \r\n"
121  "ldl $2, %2 \r\n"
122  "ldr $2, %1 \r\n"
123  "ldl $3, %4 \r\n"
124  "ldr $3, %3 \r\n"
125  "mtc1 $6, $f14 \r\n"
126  "mtc1 %5, $f6 \r\n"
127  "mtc1 %6, $f8 \r\n"
128  "mtc1 $0, $f20 \r\n"
129  "mtc1 $2, $f2 \r\n"
130  "mtc1 $3, $f4 \r\n"
131  "daddiu $6, $0, 6 \r\n"
132  "punpcklbh $f2, $f2, $f20 \r\n"
133  "punpcklbh $f4, $f4, $f20 \r\n"
134  "pshufh $f6, $f6, $f20 \r\n"
135  "pshufh $f8, $f8, $f20 \r\n"
136  "pshufh $f14, $f14, $f20 \r\n"
137  "mtc1 $6, $f22 \r\n"
138  "dsrl32 $2, $2, 0 \r\n"
139  "dsrl32 $3, $3, 0 \r\n"
140  "pmullh $f2, $f2, $f6 \r\n"
141  "pmullh $f4, $f4, $f8 \r\n"
142  "paddh $f2, $f2, $f14 \r\n"
143  "paddh $f2, $f2, $f4 \r\n"
144  "psrah $f24, $f2, $f22 \r\n"
145  "mtc1 $2, $f2 \r\n"
146  "mtc1 $3, $f4 \r\n"
147  "punpcklbh $f2, $f2, $f20 \r\n"
148  "punpcklbh $f4, $f4, $f20 \r\n"
149  "pmullh $f2, $f2, $f6 \r\n"
150  "pmullh $f4, $f4, $f8 \r\n"
151  "paddh $f2, $f2, $f14 \r\n"
152  "paddh $f2, $f2, $f4 \r\n"
153  "psrah $f2, $f2, $f22 \r\n"
154  "packushb $f2, $f24, $f2 \r\n"
155  "sdc1 $f2, %0 \r\n"
156  : "=m"(*dst)
157  : "m"(*(src)),"m"(*(src+7)),
158  "m"(*(src+step)),"m"(*(src+step+7)),
159  "r"(A),"r"(E)
160  : "$2","$3","$4","$5","$6"
161  );
162 
163  dst += stride;
164  src += stride;
165  }
166  } else {
167  for (i = 0; i < h; i++) {
168  __asm__ volatile (
169  "daddiu $6, $0, 32 \r\n"
170  "ldl $2, %2 \r\n"
171  "ldr $2, %1 \r\n"
172  "mtc1 $6, $f14 \r\n"
173  "mtc1 %3, $f6 \r\n"
174  "mtc1 $0, $f20 \r\n"
175  "mtc1 $2, $f2 \r\n"
176  "daddiu $6, $0, 6 \r\n"
177  "punpcklbh $f2, $f2, $f20 \r\n"
178  "pshufh $f6, $f6, $f20 \r\n"
179  "pshufh $f14, $f14, $f20 \r\n"
180  "mtc1 $6, $f22 \r\n"
181  "dsrl32 $2, $2, 0 \r\n"
182  "pmullh $f2, $f2, $f6 \r\n"
183  "paddh $f2, $f2, $f14 \r\n"
184  "psrah $f24, $f2, $f22 \r\n"
185  "mtc1 $2, $f2 \r\n"
186  "punpcklbh $f2, $f2, $f20 \r\n"
187  "pmullh $f2, $f2, $f6 \r\n"
188  "paddh $f2, $f2, $f14 \r\n"
189  "psrah $f2, $f2, $f22 \r\n"
190  "packushb $f2, $f24, $f2 \r\n"
191  "sdc1 $f2, %0 \r\n"
192  :"=m"(*dst)
193  :"m"(*src),"m"(*(src+7)),"r"(A)
194  :"$2"
195  );
196 
197  dst += stride;
198  src += stride;
199  }
200  }
201 }
202 
204  int h, int x, int y)
205 {
206  const int A = (8 - x) * (8 - y);
207  const int B = x * (8 - y);
208  const int C = (8 - x) * y;
209  const int D = x * y;
210  const int E = B + C;
211  int i;
212 
213  av_assert2(x<8 && y<8 && x>=0 && y>=0);
214 
215  if (D) {
216  for (i=0; i<h; i++) {
217  __asm__ volatile (
218  "ldl $2, %2 \r\n"
219  "ldr $2, %1 \r\n"
220  "ldl $3, %4 \r\n"
221  "ldr $3, %3 \r\n"
222  "ldl $4, %6 \r\n"
223  "ldr $4, %5 \r\n"
224  "ldl $5, %8 \r\n"
225  "ldr $5, %7 \r\n"
226  "daddiu $6, $0, 32 \r\n"
227  "mtc1 %9, $f6 \r\n"
228  "mtc1 %10, $f8 \r\n"
229  "mtc1 %11, $f10 \r\n"
230  "mtc1 %12, $f12 \r\n"
231  "mtc1 $0, $f20 \r\n"
232  "mtc1 $2, $f2 \r\n"
233  "mtc1 $3, $f4 \r\n"
234  "mtc1 $4, $f16 \r\n"
235  "mtc1 $5, $f18 \r\n"
236  "mtc1 $6, $f14 \r\n"
237  "punpcklbh $f2, $f2, $f20 \r\n"
238  "punpcklbh $f4, $f4, $f20 \r\n"
239  "pshufh $f6, $f6, $f20 \r\n"
240  "pshufh $f8, $f8, $f20 \r\n"
241  "pshufh $f10, $f10, $f20 \r\n"
242  "pshufh $f12, $f12, $f20 \r\n"
243  "pshufh $f14, $f14, $f20 \r\n"
244  "punpcklbh $f16, $f16, $f20 \r\n"
245  "punpcklbh $f18, $f18, $f20 \r\n"
246  "daddiu $6, $0, 6 \r\n"
247  "mtc1 $6, $f22 \r\n"
248  "dsrl32 $2, $2, 0 \r\n"
249  "dsrl32 $3, $3, 0 \r\n"
250  "dsrl32 $4, $4, 0 \r\n"
251  "dsrl32 $5, $5, 0 \r\n"
252  "pmullh $f2, $f2, $f6 \r\n"
253  "pmullh $f4, $f4, $f8 \r\n"
254  "pmullh $f16, $f10, $f16 \r\n"
255  "pmullh $f18, $f12, $f18 \r\n"
256  "paddh $f2, $f2, $f14 \r\n"
257  "paddh $f4, $f4, $f16 \r\n"
258  "paddh $f2, $f2, $f18 \r\n"
259  "paddh $f2, $f2, $f4 \r\n"
260  "psrah $f24, $f2, $f22 \r\n"
261  "mtc1 $2, $f2 \r\n"
262  "mtc1 $3, $f4 \r\n"
263  "mtc1 $4, $f16 \r\n"
264  "mtc1 $5, $f18 \r\n"
265  "punpcklbh $f2, $f2, $f20 \r\n"
266  "punpcklbh $f4, $f4, $f20 \r\n"
267  "punpcklbh $f16, $f16, $f20 \r\n"
268  "punpcklbh $f18, $f18, $f20 \r\n"
269  "pmullh $f2, $f2, $f6 \r\n"
270  "pmullh $f4, $f4, $f8 \r\n"
271  "pmullh $f16, $f10, $f16 \r\n"
272  "pmullh $f18, $f12, $f18 \r\n"
273  "paddh $f2, $f2, $f14 \r\n"
274  "paddh $f4, $f4, $f16 \r\n"
275  "paddh $f2, $f2, $f18 \r\n"
276  "paddh $f2, $f2, $f4 \r\n"
277  "psrah $f2, $f2, $f22 \r\n"
278  "packushb $f2, $f24, $f2 \r\n"
279  "ldc1 $f4, %0 \r\n"
280  "pavgb $f2, $f2, $f4 \r\n"
281  "sdc1 $f2, %0 \r\n"
282  : "=m"(*dst)
283  : "m"(*(src)),"m"(*(src+7)),"m"(*(src+1)),"m"(*(src+8)),
284  "m"(*(src+stride)),"m"(*(src+stride+7)),
285  "m"(*(src+stride+1)),"m"(*(src+stride+8)),
286  "r"(A),"r"(B),"r"(C),"r"(D)
287  : "$2","$3","$4","$5","$6"
288  );
289 
290  dst += stride;
291  src += stride;
292  }
293  } else {
294  const int step = C ? stride : 1;
295 
296  for (i=0; i<h; i++) {
297  __asm__ volatile (
298  "daddiu $6, $0, 32 \r\n"
299  "ldl $2, %2 \r\n"
300  "ldr $2, %1 \r\n"
301  "ldl $3, %4 \r\n"
302  "ldr $3, %3 \r\n"
303  "mtc1 $6, $f14 \r\n"
304  "mtc1 %5, $f6 \r\n"
305  "mtc1 %6, $f8 \r\n"
306  "mtc1 $0, $f20 \r\n"
307  "mtc1 $2, $f2 \r\n"
308  "mtc1 $3, $f4 \r\n"
309  "daddiu $6, $0, 6 \r\n"
310  "punpcklbh $f2, $f2, $f20 \r\n"
311  "punpcklbh $f4, $f4, $f20 \r\n"
312  "pshufh $f6, $f6, $f20 \r\n"
313  "pshufh $f8, $f8, $f20 \r\n"
314  "pshufh $f14, $f14, $f20 \r\n"
315  "mtc1 $6, $f22 \r\n"
316  "dsrl32 $2, $2, 0 \r\n"
317  "dsrl32 $3, $3, 0 \r\n"
318  "pmullh $f2, $f2, $f6 \r\n"
319  "pmullh $f4, $f4, $f8 \r\n"
320  "paddh $f2, $f2, $f14 \r\n"
321  "paddh $f2, $f2, $f4 \r\n"
322  "psrah $f24, $f2, $f22 \r\n"
323  "mtc1 $2, $f2 \r\n"
324  "mtc1 $3, $f4 \r\n"
325  "punpcklbh $f2, $f2, $f20 \r\n"
326  "punpcklbh $f4, $f4, $f20 \r\n"
327  "pmullh $f2, $f2, $f6 \r\n"
328  "pmullh $f4, $f4, $f8 \r\n"
329  "paddh $f2, $f2, $f14 \r\n"
330  "paddh $f2, $f2, $f4 \r\n"
331  "psrah $f2, $f2, $f22 \r\n"
332  "packushb $f2, $f24, $f2 \r\n"
333  "ldc1 $f4, %0 \r\n"
334  "pavgb $f2, $f2, $f4 \r\n"
335  "sdc1 $f2, %0 \r\n"
336  : "=m"(*dst)
337  : "m"(*(src)),"m"(*(src+7)),
338  "m"(*(src+step)),"m"(*(src+step+7)),"r"(A),"r"(E)
339  : "$2","$3","$4","$5","$6"
340  );
341 
342  dst += stride;
343  src += stride;
344  }
345  }
346 }
347 
349  int h, int x, int y)
350 {
351  const int A = (8 - x) * (8 - y);
352  const int B = x * (8 - y);
353  const int C = (8 - x) * y;
354  const int D = x * y;
355  const int E = B + C;
356  int i;
357 
358  av_assert2(x<8 && y<8 && x>=0 && y>=0);
359 
360  if (D) {
361  for (i=0; i<h; i++) {
362  __asm__ volatile (
363  "ldl $2, %2 \r\n"
364  "ldr $2, %1 \r\n"
365  "ldl $3, %4 \r\n"
366  "ldr $3, %3 \r\n"
367  "ldl $4, %6 \r\n"
368  "ldr $4, %5 \r\n"
369  "ldl $5, %8 \r\n"
370  "ldr $5, %7 \r\n"
371  "daddiu $6, $0, 32 \r\n"
372  "mtc1 %9, $f6 \r\n"
373  "mtc1 %10, $f8 \r\n"
374  "mtc1 %11, $f10 \r\n"
375  "mtc1 %12, $f12 \r\n"
376  "mtc1 $0, $f20 \r\n"
377  "mtc1 $2, $f2 \r\n"
378  "mtc1 $3, $f4 \r\n"
379  "mtc1 $4, $f16 \r\n"
380  "mtc1 $5, $f18 \r\n"
381  "mtc1 $6, $f14 \r\n"
382  "punpcklbh $f2, $f2, $f20 \r\n"
383  "punpcklbh $f4, $f4, $f20 \r\n"
384  "pshufh $f6, $f6, $f20 \r\n"
385  "pshufh $f8, $f8, $f20 \r\n"
386  "pshufh $f10, $f10, $f20 \r\n"
387  "pshufh $f12, $f12, $f20 \r\n"
388  "pshufh $f14, $f14, $f20 \r\n"
389  "punpcklbh $f16, $f16, $f20 \r\n"
390  "punpcklbh $f18, $f18, $f20 \r\n"
391  "daddiu $6, $0, 6 \r\n"
392  "mtc1 $6, $f22 \r\n"
393  "pmullh $f2, $f2, $f6 \r\n"
394  "pmullh $f4, $f4, $f8 \r\n"
395  "pmullh $f16, $f10, $f16 \r\n"
396  "pmullh $f18, $f12, $f18 \r\n"
397  "paddh $f2, $f2, $f14 \r\n"
398  "paddh $f4, $f4, $f16 \r\n"
399  "paddh $f2, $f2, $f18 \r\n"
400  "paddh $f2, $f2, $f4 \r\n"
401  "psrah $f2, $f2, $f22 \r\n"
402  "packushb $f2, $f2, $f2 \r\n"
403  "swc1 $f2, %0 \r\n"
404  : "=m"(*dst)
405  : "m"(*(src)),"m"(*(src+7)),"m"(*(src+1)),"m"(*(src+8)),
406  "m"(*(src+stride)),"m"(*(src+stride+7)),
407  "m"(*(src+stride+1)),"m"(*(src+stride+8)),
408  "r"(A),"r"(B),"r"(C),"r"(D)
409  : "$2","$3","$4","$5","$6"
410  );
411 
412  dst += stride;
413  src += stride;
414  }
415  } else if (E) {
416  const int step = C ? stride : 1;
417 
418  for (i=0; i<h; i++) {
419  __asm__ volatile (
420  "ldl $2, %2 \r\n"
421  "ldr $2, %1 \r\n"
422  "ldl $3, %4 \r\n"
423  "ldr $3, %3 \r\n"
424  "daddiu $4, $0, 32 \r\n"
425  "mtc1 %5, $f6 \r\n"
426  "mtc1 %6, $f8 \r\n"
427  "mtc1 $0, $f20 \r\n"
428  "mtc1 $2, $f2 \r\n"
429  "mtc1 $3, $f4 \r\n"
430  "mtc1 $4, $f10 \r\n"
431  "punpcklbh $f2, $f2, $f20 \r\n"
432  "punpcklbh $f4, $f4, $f20 \r\n"
433  "pshufh $f6, $f6, $f20 \r\n"
434  "pshufh $f8, $f8, $f20 \r\n"
435  "pshufh $f10, $f10, $f20 \r\n"
436  "daddiu $4, $0, 6 \r\n"
437  "mtc1 $4, $f22 \r\n"
438  "pmullh $f2, $f2, $f6 \r\n"
439  "pmullh $f4, $f4, $f8 \r\n"
440  "paddh $f2, $f2, $f10 \r\n"
441  "paddh $f2, $f2, $f4 \r\n"
442  "psrah $f2, $f2, $f22 \r\n"
443  "packushb $f2, $f2, $f20 \r\n"
444  "swc1 $f2, %0 \r\n"
445  : "=m"(*dst)
446  : "m"(*(src)),"m"(*(src+7)),"m"(*(src+step)),
447  "m"(*(src+step+7)),"r"(A),"r"(E)
448  : "$2","$3","$4","$5","$6"
449  );
450 
451  dst += stride;
452  src += stride;
453  }
454  } else {
455  for (i=0; i<h; i++) {
456  __asm__ volatile (
457  "lwl $2, %2 \r\n"
458  "lwr $2, %1 \r\n"
459  "sw $2, %0 \r\n"
460  : "=m"(*dst)
461  : "m"(*src),"m"(*(src+3))
462  : "$2"
463  );
464 
465  dst += stride;
466  src += stride;
467  }
468  }
469 }
470 
472  int h, int x, int y)
473 {
474  const int A = (8 - x) *(8 - y);
475  const int B = x * (8 - y);
476  const int C = (8 - x) * y;
477  const int D = x * y;
478  int i;
479 
480  av_assert2(x<8 && y<8 && x>=0 && y>=0);
481 
482  if (D) {
483  for (i=0; i<h; i++) {
484  __asm__ volatile (
485  "ldl $2, %2 \r\n"
486  "ldr $2, %1 \r\n"
487  "ldl $3, %4 \r\n"
488  "ldr $3, %3 \r\n"
489  "ldl $4, %6 \r\n"
490  "ldr $4, %5 \r\n"
491  "ldl $5, %8 \r\n"
492  "ldr $5, %7 \r\n"
493  "daddiu $6, $0, 32 \r\n"
494  "mtc1 %9, $f6 \r\n"
495  "mtc1 %10, $f8 \r\n"
496  "mtc1 %11, $f10 \r\n"
497  "mtc1 %12, $f12 \r\n"
498  "mtc1 $0, $f20 \r\n"
499  "mtc1 $2, $f2 \r\n"
500  "mtc1 $3, $f4 \r\n"
501  "mtc1 $4, $f16 \r\n"
502  "mtc1 $5, $f18 \r\n"
503  "mtc1 $6, $f14 \r\n"
504  "punpcklbh $f2, $f2, $f20 \r\n"
505  "punpcklbh $f4, $f4, $f20 \r\n"
506  "pshufh $f6, $f6, $f20 \r\n"
507  "pshufh $f8, $f8, $f20 \r\n"
508  "pshufh $f10, $f10, $f20 \r\n"
509  "pshufh $f12, $f12, $f20 \r\n"
510  "pshufh $f14, $f14, $f20 \r\n"
511  "punpcklbh $f16, $f16, $f20 \r\n"
512  "punpcklbh $f18, $f18, $f20 \r\n"
513  "daddiu $6, $0, 6 \r\n"
514  "mtc1 $6, $f22 \r\n"
515  "pmullh $f2, $f2, $f6 \r\n"
516  "pmullh $f4, $f4, $f8 \r\n"
517  "pmullh $f16, $f10, $f16 \r\n"
518  "pmullh $f18, $f12, $f18 \r\n"
519  "paddh $f2, $f2, $f14 \r\n"
520  "paddh $f4, $f4, $f16 \r\n"
521  "paddh $f2, $f2, $f18 \r\n"
522  "paddh $f2, $f2, $f4 \r\n"
523  "psrah $f2, $f2, $f22 \r\n"
524  "packushb $f2, $f2, $f2 \r\n"
525  "lwc1 $f4, %0 \r\n"
526  "pavgb $f2, $f2, $f4 \r\n"
527  "swc1 $f2, %0 \r\n"
528  : "=m"(*dst)
529  : "m"(*(src)),"m"(*(src+7)),"m"(*(src+1)),"m"(*(src+8)),
530  "m"(*(src+stride)),"m"(*(src+stride+7)),
531  "m"(*(src+stride+1)),"m"(*(src+stride+8)),
532  "r"(A),"r"(B),"r"(C),"r"(D)
533  : "$2","$3","$4","$5","$6"
534  );
535 
536  dst += stride;
537  src += stride;
538  }
539  } else {
540  const int E = B + C;
541  const int step = C ? stride : 1;
542 
543  for (i=0; i<h; i++) {
544  __asm__ volatile (
545  "ldl $2, %2 \r\n"
546  "ldr $2, %1 \r\n"
547  "ldl $3, %4 \r\n"
548  "ldr $3, %3 \r\n"
549  "daddiu $4, $0, 32 \r\n"
550  "mtc1 %5, $f6 \r\n"
551  "mtc1 %6, $f8 \r\n"
552  "mtc1 $0, $f20 \r\n"
553  "mtc1 $2, $f2 \r\n"
554  "mtc1 $3, $f4 \r\n"
555  "mtc1 $4, $f10 \r\n"
556  "punpcklbh $f2, $f2, $f20 \r\n"
557  "punpcklbh $f4, $f4, $f20 \r\n"
558  "pshufh $f6, $f6, $f20 \r\n"
559  "pshufh $f8, $f8, $f20 \r\n"
560  "pshufh $f10, $f10, $f20 \r\n"
561  "daddiu $4, $0, 6 \r\n"
562  "mtc1 $4, $f22 \r\n"
563  "pmullh $f2, $f2, $f6 \r\n"
564  "pmullh $f4, $f4, $f8 \r\n"
565  "paddh $f2, $f2, $f10 \r\n"
566  "paddh $f2, $f2, $f4 \r\n"
567  "psrah $f2, $f2, $f22 \r\n"
568  "packushb $f2, $f2, $f20 \r\n"
569  "lwc1 $f4, %0 \r\n"
570  "pavgb $f2, $f2, $f4 \r\n"
571  "swc1 $f2, %0 \r\n"
572  : "=m"(*dst)
573  : "m"(*(src)),"m"(*(src+7)),"m"(*(src+step)),
574  "m"(*(src+step+7)),"r"(A),"r"(E)
575  : "$2","$3","$4","$5","$6"
576  );
577 
578  dst += stride;
579  src += stride;
580  }
581  }
582 }
#define C
void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
uint8_t
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:63
void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
#define A(x)
Definition: vp56_arith.h:28
void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
#define E
Definition: avdct.c:32
float y
AVS_Value src
Definition: avisynth_c.h:482
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
D(D(float, sse)
Definition: rematrix_init.c:28
void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
#define stride
Definition: vf_geq.c:45