FFmpeg
wmv2dsp_mmi.c
Go to the documentation of this file.
1 /*
2  * WMV2 - DSP functions Loongson MMI-optimized
3  *
4  * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "libavutil/avassert.h"
24 #include "constants.h"
25 #include "wmv2dsp_mips.h"
27 
28 #define W0 2048
29 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
30 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
31 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
32 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
33 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
34 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
35 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
36 
37 static void wmv2_idct_row_mmi(short * b)
38 {
39  int s1, s2;
40  int a0, a1, a2, a3, a4, a5, a6, a7;
41 
42  /* step 1 */
43  a0 = W0 * b[0] + W0 * b[4];
44  a1 = W1 * b[1] + W7 * b[7];
45  a2 = W2 * b[2] + W6 * b[6];
46  a3 = W3 * b[5] - W5 * b[3];
47  a4 = W0 * b[0] - W0 * b[4];
48  a5 = W5 * b[5] + W3 * b[3];
49  a6 = W6 * b[2] - W2 * b[6];
50  a7 = W7 * b[1] - W1 * b[7];
51 
52  /* step 2 */
53  s1 = (181 * (a1 - a5 + a7 - a3) + 128) >> 8; // 1, 3, 5, 7
54  s2 = (181 * (a1 - a5 - a7 + a3) + 128) >> 8;
55 
56  /* step 3 */
57  b[0] = (a0 + a2 + a1 + a5 + 128) >> 8;
58  b[1] = (a4 + a6 + s1 + 128) >> 8;
59  b[2] = (a4 - a6 + s2 + 128) >> 8;
60  b[3] = (a0 - a2 + a7 + a3 + 128) >> 8;
61  b[4] = (a0 - a2 - a7 - a3 + 128) >> 8;
62  b[5] = (a4 - a6 - s2 + 128) >> 8;
63  b[6] = (a4 + a6 - s1 + 128) >> 8;
64  b[7] = (a0 + a2 - a1 - a5 + 128) >> 8;
65 }
66 
67 static void wmv2_idct_col_mmi(short * b)
68 {
69  int s1, s2;
70  int a0, a1, a2, a3, a4, a5, a6, a7;
71 
72  /* step 1, with extended precision */
73  a0 = (W0 * b[ 0] + W0 * b[32] ) >> 3;
74  a1 = (W1 * b[ 8] + W7 * b[56] + 4) >> 3;
75  a2 = (W2 * b[16] + W6 * b[48] + 4) >> 3;
76  a3 = (W3 * b[40] - W5 * b[24] + 4) >> 3;
77  a4 = (W0 * b[ 0] - W0 * b[32] ) >> 3;
78  a5 = (W5 * b[40] + W3 * b[24] + 4) >> 3;
79  a6 = (W6 * b[16] - W2 * b[48] + 4) >> 3;
80  a7 = (W7 * b[ 8] - W1 * b[56] + 4) >> 3;
81 
82  /* step 2 */
83  s1 = (181 * (a1 - a5 + a7 - a3) + 128) >> 8;
84  s2 = (181 * (a1 - a5 - a7 + a3) + 128) >> 8;
85 
86  /* step 3 */
87  b[ 0] = (a0 + a2 + a1 + a5 + 8192) >> 14;
88  b[ 8] = (a4 + a6 + s1 + 8192) >> 14;
89  b[16] = (a4 - a6 + s2 + 8192) >> 14;
90  b[24] = (a0 - a2 + a7 + a3 + 8192) >> 14;
91 
92  b[32] = (a0 - a2 - a7 - a3 + 8192) >> 14;
93  b[40] = (a4 - a6 - s2 + 8192) >> 14;
94  b[48] = (a4 + a6 - s1 + 8192) >> 14;
95  b[56] = (a0 + a2 - a1 - a5 + 8192) >> 14;
96 }
97 
98 void ff_wmv2_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
99 {
100  int i;
101  double ftmp[11];
102 
103  for (i = 0; i < 64; i += 8)
105  for (i = 0; i < 8; i++)
107 
108  __asm__ volatile (
109  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
110 
111  // low 4 loop
112  MMI_LDC1(%[ftmp1], %[block], 0x00)
113  MMI_LDC1(%[ftmp2], %[block], 0x08)
114  MMI_LDC1(%[ftmp3], %[block], 0x10)
115  MMI_LDC1(%[ftmp4], %[block], 0x18)
116  MMI_LDC1(%[ftmp5], %[block], 0x20)
117  MMI_LDC1(%[ftmp6], %[block], 0x28)
118  MMI_LDC1(%[ftmp7], %[block], 0x30)
119  MMI_LDC1(%[ftmp8], %[block], 0x38)
120 
121  MMI_LDC1(%[ftmp9], %[dest], 0x00)
122  "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
123  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
124  "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
125  "paddh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
126  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
127  MMI_SDC1(%[ftmp1], %[dest], 0x00)
128  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
129 
130  MMI_LDC1(%[ftmp9], %[dest], 0x00)
131  "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
132  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
133  "paddh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
134  "paddh %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
135  "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
136  MMI_SDC1(%[ftmp3], %[dest], 0x00)
137  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
138 
139  MMI_LDC1(%[ftmp9], %[dest], 0x00)
140  "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
141  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
142  "paddh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
143  "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
144  "packushb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
145  MMI_SDC1(%[ftmp5], %[dest], 0x00)
146  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
147 
148  MMI_LDC1(%[ftmp9], %[dest], 0x00)
149  "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
150  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
151  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
152  "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
153  "packushb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
154  MMI_SDC1(%[ftmp7], %[dest], 0x00)
155 
156  PTR_ADDIU "%[block], %[block], 0x40 \n\t"
157  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
158 
159  // high 4 loop
160  MMI_LDC1(%[ftmp1], %[block], 0x00)
161  MMI_LDC1(%[ftmp2], %[block], 0x08)
162  MMI_LDC1(%[ftmp3], %[block], 0x10)
163  MMI_LDC1(%[ftmp4], %[block], 0x18)
164  MMI_LDC1(%[ftmp5], %[block], 0x20)
165  MMI_LDC1(%[ftmp6], %[block], 0x28)
166  MMI_LDC1(%[ftmp7], %[block], 0x30)
167  MMI_LDC1(%[ftmp8], %[block], 0x38)
168 
169  MMI_LDC1(%[ftmp9], %[dest], 0x00)
170  "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
171  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
172  "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
173  "paddh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
174  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
175  MMI_SDC1(%[ftmp1], %[dest], 0x00)
176  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
177 
178  MMI_LDC1(%[ftmp9], %[dest], 0x00)
179  "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
180  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
181  "paddh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
182  "paddh %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
183  "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
184  MMI_SDC1(%[ftmp3], %[dest], 0x00)
185  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
186 
187  MMI_LDC1(%[ftmp9], %[dest], 0x00)
188  "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
189  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
190  "paddh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
191  "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
192  "packushb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
193  MMI_SDC1(%[ftmp5], %[dest], 0x00)
194  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
195 
196  MMI_LDC1(%[ftmp9], %[dest], 0x00)
197  "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
198  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
199  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
200  "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
201  "packushb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
202  MMI_SDC1(%[ftmp7], %[dest], 0x00)
203  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
204  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
205  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
206  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
207  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
208  [ftmp10]"=&f"(ftmp[10]),
209  [block]"+&r"(block), [dest]"+&r"(dest)
210  : [line_size]"r"((mips_reg)line_size)
211  : "memory"
212  );
213 }
214 
215 void ff_wmv2_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
216 {
217  int i;
218  double ftmp[8];
219 
220  for (i = 0; i < 64; i += 8)
222  for (i = 0; i < 8; i++)
224 
225  __asm__ volatile (
226  // low 4 loop
227  MMI_LDC1(%[ftmp0], %[block], 0x00)
228  MMI_LDC1(%[ftmp1], %[block], 0x08)
229  MMI_LDC1(%[ftmp2], %[block], 0x10)
230  MMI_LDC1(%[ftmp3], %[block], 0x18)
231  MMI_LDC1(%[ftmp4], %[block], 0x20)
232  MMI_LDC1(%[ftmp5], %[block], 0x28)
233  MMI_LDC1(%[ftmp6], %[block], 0x30)
234  MMI_LDC1(%[ftmp7], %[block], 0x38)
235  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
236  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
237  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
238  "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
239  MMI_SDC1(%[ftmp0], %[dest], 0x00)
240  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
241  MMI_SDC1(%[ftmp2], %[dest], 0x00)
242  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
243  MMI_SDC1(%[ftmp4], %[dest], 0x00)
244  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
245  MMI_SDC1(%[ftmp6], %[dest], 0x00)
246 
247  PTR_ADDIU "%[block], %[block], 0x40 \n\t"
248  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
249 
250  // high 4 loop
251  MMI_LDC1(%[ftmp0], %[block], 0x00)
252  MMI_LDC1(%[ftmp1], %[block], 0x08)
253  MMI_LDC1(%[ftmp2], %[block], 0x10)
254  MMI_LDC1(%[ftmp3], %[block], 0x18)
255  MMI_LDC1(%[ftmp4], %[block], 0x20)
256  MMI_LDC1(%[ftmp5], %[block], 0x28)
257  MMI_LDC1(%[ftmp6], %[block], 0x30)
258  MMI_LDC1(%[ftmp7], %[block], 0x38)
259  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
260  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
261  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
262  "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
263  MMI_SDC1(%[ftmp0], %[dest], 0x00)
264  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
265  MMI_SDC1(%[ftmp2], %[dest], 0x00)
266  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
267  MMI_SDC1(%[ftmp4], %[dest], 0x00)
268  PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
269  MMI_SDC1(%[ftmp6], %[dest], 0x00)
270  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
271  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
272  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
273  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
274  [block]"+&r"(block), [dest]"+&r"(dest)
275  : [line_size]"r"((mips_reg)line_size)
276  : "memory"
277  );
278 }
ff_wmv2_idct_add_mmi
void ff_wmv2_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp_mmi.c:98
wmv2dsp_mips.h
b
#define b
Definition: input.c:41
mips_reg
#define mips_reg
Definition: asmdefs.h:46
wmv2_idct_row_mmi
static void wmv2_idct_row_mmi(short *b)
Definition: wmv2dsp_mmi.c:37
ff_wmv2_idct_put_mmi
void ff_wmv2_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp_mmi.c:215
W6
#define W6
Definition: wmv2dsp_mmi.c:34
wmv2_idct_col_mmi
static void wmv2_idct_col_mmi(short *b)
Definition: wmv2dsp_mmi.c:67
W7
#define W7
Definition: wmv2dsp_mmi.c:35
constants.h
mmiutils.h
a1
#define a1
Definition: regdef.h:47
avassert.h
W1
#define W1
Definition: wmv2dsp_mmi.c:29
s1
#define s1
Definition: regdef.h:38
a4
#define a4
Definition: regdef.h:50
W3
#define W3
Definition: wmv2dsp_mmi.c:31
W5
#define W5
Definition: wmv2dsp_mmi.c:33
s2
#define s2
Definition: regdef.h:39
a0
#define a0
Definition: regdef.h:46
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
a2
#define a2
Definition: regdef.h:48
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:49
a5
#define a5
Definition: regdef.h:51
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
W2
#define W2
Definition: wmv2dsp_mmi.c:30
a3
#define a3
Definition: regdef.h:49
W0
#define W0
Definition: wmv2dsp_mmi.c:28