FFmpeg
Main Page
Related Pages
Modules
Namespaces
Data Structures
Files
Examples
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
libavutil
mips
mmiutils.h
Go to the documentation of this file.
1
/*
2
* Loongson SIMD utils
3
*
4
* Copyright (c) 2016 Loongson Technology Corporation Limited
5
* Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6
*
7
* This file is part of FFmpeg.
8
*
9
* FFmpeg is free software; you can redistribute it and/or
10
* modify it under the terms of the GNU Lesser General Public
11
* License as published by the Free Software Foundation; either
12
* version 2.1 of the License, or (at your option) any later version.
13
*
14
* FFmpeg is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
* Lesser General Public License for more details.
18
*
19
* You should have received a copy of the GNU Lesser General Public
20
* License along with FFmpeg; if not, write to the Free Software
21
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
*/
23
24
#ifndef AVUTIL_MIPS_MMIUTILS_H
25
#define AVUTIL_MIPS_MMIUTILS_H
26
27
#include "
config.h
"
28
#include "
libavutil/mips/asmdefs.h
"
29
30
#if HAVE_LOONGSON2
31
32
#define DECLARE_VAR_LOW32 int32_t low32
33
#define RESTRICT_ASM_LOW32 [low32]"=&r"(low32),
34
#define DECLARE_VAR_ALL64 int64_t all64
35
#define RESTRICT_ASM_ALL64 [all64]"=&r"(all64),
36
#define DECLARE_VAR_ADDRT mips_reg addrt
37
#define RESTRICT_ASM_ADDRT [addrt]"=&r"(addrt),
38
39
#define MMI_LWX(reg, addr, stride, bias) \
40
PTR_ADDU "%[addrt], "#addr", "#stride" \n\t" \
41
"lw "#reg", "#bias"(%[addrt]) \n\t"
42
43
#define MMI_SWX(reg, addr, stride, bias) \
44
PTR_ADDU "%[addrt], "#addr", "#stride" \n\t" \
45
"sw "#reg", "#bias"(%[addrt]) \n\t"
46
47
#define MMI_LDX(reg, addr, stride, bias) \
48
PTR_ADDU "%[addrt], "#addr", "#stride" \n\t" \
49
"ld "#reg", "#bias"(%[addrt]) \n\t"
50
51
#define MMI_SDX(reg, addr, stride, bias) \
52
PTR_ADDU "%[addrt], "#addr", "#stride" \n\t" \
53
"sd "#reg", "#bias"(%[addrt]) \n\t"
54
55
#define MMI_LWC1(fp, addr, bias) \
56
"lwc1 "#fp", "#bias"("#addr") \n\t"
57
58
#define MMI_ULWC1(fp, addr, bias) \
59
"ulw %[low32], "#bias"("#addr") \n\t" \
60
"mtc1 %[low32], "#fp" \n\t"
61
62
#define MMI_LWXC1(fp, addr, stride, bias) \
63
PTR_ADDU "%[addrt], "#addr", "#stride" \n\t" \
64
MMI_LWC1(fp, %[addrt], bias)
65
66
#define MMI_SWC1(fp, addr, bias) \
67
"swc1 "#fp", "#bias"("#addr") \n\t"
68
69
#define MMI_USWC1(fp, addr, bias) \
70
"mfc1 %[low32], "#fp" \n\t" \
71
"usw %[low32], "#bias"("#addr") \n\t"
72
73
#define MMI_SWXC1(fp, addr, stride, bias) \
74
PTR_ADDU "%[addrt], "#addr", "#stride" \n\t" \
75
MMI_SWC1(fp, %[addrt], bias)
76
77
#define MMI_LDC1(fp, addr, bias) \
78
"ldc1 "#fp", "#bias"("#addr") \n\t"
79
80
#define MMI_ULDC1(fp, addr, bias) \
81
"uld %[all64], "#bias"("#addr") \n\t" \
82
"dmtc1 %[all64], "#fp" \n\t"
83
84
#define MMI_LDXC1(fp, addr, stride, bias) \
85
PTR_ADDU "%[addrt], "#addr", "#stride" \n\t" \
86
MMI_LDC1(fp, %[addrt], bias)
87
88
#define MMI_SDC1(fp, addr, bias) \
89
"sdc1 "#fp", "#bias"("#addr") \n\t"
90
91
#define MMI_USDC1(fp, addr, bias) \
92
"dmfc1 %[all64], "#fp" \n\t" \
93
"usd %[all64], "#bias"("#addr") \n\t"
94
95
#define MMI_SDXC1(fp, addr, stride, bias) \
96
PTR_ADDU "%[addrt], "#addr", "#stride" \n\t" \
97
MMI_SDC1(fp, %[addrt], bias)
98
99
#define MMI_LQ(reg1, reg2, addr, bias) \
100
"ld "#reg1", "#bias"("#addr") \n\t" \
101
"ld "#reg2", 8+"#bias"("#addr") \n\t"
102
103
#define MMI_SQ(reg1, reg2, addr, bias) \
104
"sd "#reg1", "#bias"("#addr") \n\t" \
105
"sd "#reg2", 8+"#bias"("#addr") \n\t"
106
107
#define MMI_LQC1(fp1, fp2, addr, bias) \
108
"ldc1 "#fp1", "#bias"("#addr") \n\t" \
109
"ldc1 "#fp2", 8+"#bias"("#addr") \n\t"
110
111
#define MMI_SQC1(fp1, fp2, addr, bias) \
112
"sdc1 "#fp1", "#bias"("#addr") \n\t" \
113
"sdc1 "#fp2", 8+"#bias"("#addr") \n\t"
114
115
#elif HAVE_LOONGSON3
/* !HAVE_LOONGSON2 */
116
117
#define DECLARE_VAR_ALL64
118
#define RESTRICT_ASM_ALL64
119
#define DECLARE_VAR_ADDRT
120
#define RESTRICT_ASM_ADDRT
121
122
#define MMI_LWX(reg, addr, stride, bias) \
123
"gslwx "#reg", "#bias"("#addr", "#stride") \n\t"
124
125
#define MMI_SWX(reg, addr, stride, bias) \
126
"gsswx "#reg", "#bias"("#addr", "#stride") \n\t"
127
128
#define MMI_LDX(reg, addr, stride, bias) \
129
"gsldx "#reg", "#bias"("#addr", "#stride") \n\t"
130
131
#define MMI_SDX(reg, addr, stride, bias) \
132
"gssdx "#reg", "#bias"("#addr", "#stride") \n\t"
133
134
#define MMI_LWC1(fp, addr, bias) \
135
"lwc1 "#fp", "#bias"("#addr") \n\t"
136
137
#if _MIPS_SIM == _ABIO32
/* workaround for 3A2000 gslwlc1 bug */
138
139
#define DECLARE_VAR_LOW32 int32_t low32
140
#define RESTRICT_ASM_LOW32 [low32]"=&r"(low32),
141
142
#define MMI_ULWC1(fp, addr, bias) \
143
"ulw %[low32], "#bias"("#addr") \n\t" \
144
"mtc1 %[low32], "#fp" \n\t"
145
146
#else
/* _MIPS_SIM != _ABIO32 */
147
148
#define DECLARE_VAR_LOW32
149
#define RESTRICT_ASM_LOW32
150
151
#define MMI_ULWC1(fp, addr, bias) \
152
"gslwlc1 "#fp", 3+"#bias"("#addr") \n\t" \
153
"gslwrc1 "#fp", "#bias"("#addr") \n\t"
154
155
#endif
/* _MIPS_SIM != _ABIO32 */
156
157
#define MMI_LWXC1(fp, addr, stride, bias) \
158
"gslwxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
159
160
#define MMI_SWC1(fp, addr, bias) \
161
"swc1 "#fp", "#bias"("#addr") \n\t"
162
163
#define MMI_USWC1(fp, addr, bias) \
164
"gsswlc1 "#fp", 3+"#bias"("#addr") \n\t" \
165
"gsswrc1 "#fp", "#bias"("#addr") \n\t"
166
167
#define MMI_SWXC1(fp, addr, stride, bias) \
168
"gsswxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
169
170
#define MMI_LDC1(fp, addr, bias) \
171
"ldc1 "#fp", "#bias"("#addr") \n\t"
172
173
#define MMI_ULDC1(fp, addr, bias) \
174
"gsldlc1 "#fp", 7+"#bias"("#addr") \n\t" \
175
"gsldrc1 "#fp", "#bias"("#addr") \n\t"
176
177
#define MMI_LDXC1(fp, addr, stride, bias) \
178
"gsldxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
179
180
#define MMI_SDC1(fp, addr, bias) \
181
"sdc1 "#fp", "#bias"("#addr") \n\t"
182
183
#define MMI_USDC1(fp, addr, bias) \
184
"gssdlc1 "#fp", 7+"#bias"("#addr") \n\t" \
185
"gssdrc1 "#fp", "#bias"("#addr") \n\t"
186
187
#define MMI_SDXC1(fp, addr, stride, bias) \
188
"gssdxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
189
190
#define MMI_LQ(reg1, reg2, addr, bias) \
191
"gslq "#reg1", "#reg2", "#bias"("#addr") \n\t"
192
193
#define MMI_SQ(reg1, reg2, addr, bias) \
194
"gssq "#reg1", "#reg2", "#bias"("#addr") \n\t"
195
196
#define MMI_LQC1(fp1, fp2, addr, bias) \
197
"gslqc1 "#fp1", "#fp2", "#bias"("#addr") \n\t"
198
199
#define MMI_SQC1(fp1, fp2, addr, bias) \
200
"gssqc1 "#fp1", "#fp2", "#bias"("#addr") \n\t"
201
202
#endif
/* HAVE_LOONGSON2 */
203
204
/**
205
* backup register
206
*/
207
#define BACKUP_REG \
208
double temp_backup_reg[8]; \
209
if (_MIPS_SIM == _ABI64) \
210
__asm__ volatile ( \
211
"gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
212
"gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
213
"gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
214
"gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
215
: \
216
: [temp]"r"(temp_backup_reg) \
217
: "memory" \
218
); \
219
else \
220
__asm__ volatile ( \
221
"gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
222
"gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
223
"gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
224
: \
225
: [temp]"r"(temp_backup_reg) \
226
: "memory" \
227
);
228
229
/**
230
* recover register
231
*/
232
#define RECOVER_REG \
233
if (_MIPS_SIM == _ABI64) \
234
__asm__ volatile ( \
235
"gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
236
"gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
237
"gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
238
"gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
239
: \
240
: [temp]"r"(temp_backup_reg) \
241
: "memory" \
242
); \
243
else \
244
__asm__ volatile ( \
245
"gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
246
"gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
247
"gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
248
: \
249
: [temp]"r"(temp_backup_reg) \
250
: "memory" \
251
);
252
253
/**
254
* brief: Transpose 4X4 half word packaged data.
255
* fr_i0, fr_i1, fr_i2, fr_i3: src & dst
256
* fr_t0, fr_t1, fr_t2, fr_t3: temporary register
257
*/
258
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, \
259
fr_t0, fr_t1, fr_t2, fr_t3) \
260
"punpcklhw "#fr_t0", "#fr_i0", "#fr_i1" \n\t" \
261
"punpckhhw "#fr_t1", "#fr_i0", "#fr_i1" \n\t" \
262
"punpcklhw "#fr_t2", "#fr_i2", "#fr_i3" \n\t" \
263
"punpckhhw "#fr_t3", "#fr_i2", "#fr_i3" \n\t" \
264
"punpcklwd "#fr_i0", "#fr_t0", "#fr_t2" \n\t" \
265
"punpckhwd "#fr_i1", "#fr_t0", "#fr_t2" \n\t" \
266
"punpcklwd "#fr_i2", "#fr_t1", "#fr_t3" \n\t" \
267
"punpckhwd "#fr_i3", "#fr_t1", "#fr_t3" \n\t"
268
269
/**
270
* brief: Transpose 8x8 byte packaged data.
271
* fr_i0~i7: src & dst
272
* fr_t0~t3: temporary register
273
*/
274
#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, \
275
fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3) \
276
"punpcklbh "#fr_t0", "#fr_i0", "#fr_i1" \n\t" \
277
"punpckhbh "#fr_t1", "#fr_i0", "#fr_i1" \n\t" \
278
"punpcklbh "#fr_t2", "#fr_i2", "#fr_i3" \n\t" \
279
"punpckhbh "#fr_t3", "#fr_i2", "#fr_i3" \n\t" \
280
"punpcklbh "#fr_i0", "#fr_i4", "#fr_i5" \n\t" \
281
"punpckhbh "#fr_i1", "#fr_i4", "#fr_i5" \n\t" \
282
"punpcklbh "#fr_i2", "#fr_i6", "#fr_i7" \n\t" \
283
"punpckhbh "#fr_i3", "#fr_i6", "#fr_i7" \n\t" \
284
"punpcklhw "#fr_i4", "#fr_t0", "#fr_t2" \n\t" \
285
"punpckhhw "#fr_i5", "#fr_t0", "#fr_t2" \n\t" \
286
"punpcklhw "#fr_i6", "#fr_t1", "#fr_t3" \n\t" \
287
"punpckhhw "#fr_i7", "#fr_t1", "#fr_t3" \n\t" \
288
"punpcklhw "#fr_t0", "#fr_i0", "#fr_i2" \n\t" \
289
"punpckhhw "#fr_t1", "#fr_i0", "#fr_i2" \n\t" \
290
"punpcklhw "#fr_t2", "#fr_i1", "#fr_i3" \n\t" \
291
"punpckhhw "#fr_t3", "#fr_i1", "#fr_i3" \n\t" \
292
"punpcklwd "#fr_i0", "#fr_i4", "#fr_t0" \n\t" \
293
"punpckhwd "#fr_i1", "#fr_i4", "#fr_t0" \n\t" \
294
"punpcklwd "#fr_i2", "#fr_i5", "#fr_t1" \n\t" \
295
"punpckhwd "#fr_i3", "#fr_i5", "#fr_t1" \n\t" \
296
"punpcklwd "#fr_i4", "#fr_i6", "#fr_t2" \n\t" \
297
"punpckhwd "#fr_i5", "#fr_i6", "#fr_t2" \n\t" \
298
"punpcklwd "#fr_i6", "#fr_i7", "#fr_t3" \n\t" \
299
"punpckhwd "#fr_i7", "#fr_i7", "#fr_t3" \n\t"
300
301
/**
302
* brief: Parallel SRA for 8 byte packaged data.
303
* fr_i0: src
304
* fr_i1: SRA number(SRAB number + 8)
305
* fr_t0, fr_t1: temporary register
306
* fr_d0: dst
307
*/
308
#define PSRAB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
309
"punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
310
"punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
311
"psrah "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
312
"psrah "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
313
"packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
314
315
/**
316
* brief: Parallel SRL for 8 byte packaged data.
317
* fr_i0: src
318
* fr_i1: SRL number(SRLB number + 8)
319
* fr_t0, fr_t1: temporary register
320
* fr_d0: dst
321
*/
322
#define PSRLB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
323
"punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
324
"punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
325
"psrlh "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
326
"psrlh "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
327
"packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
328
329
#define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
330
"psrah "#fp1", "#fp1", "#shift" \n\t" \
331
"psrah "#fp2", "#fp2", "#shift" \n\t" \
332
"psrah "#fp3", "#fp3", "#shift" \n\t" \
333
"psrah "#fp4", "#fp4", "#shift" \n\t"
334
335
#define PSRAH_8_MMI(fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, shift) \
336
PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
337
PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift)
338
339
340
#endif
/* AVUTILS_MIPS_MMIUTILS_H */
asmdefs.h
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
config.h
Generated on Tue Nov 6 2018 18:11:31 for FFmpeg by
1.8.6