FFmpeg
cabac.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVCODEC_X86_CABAC_H
22 #define AVCODEC_X86_CABAC_H
23 
24 #include <stddef.h>
25 
26 #include "libavcodec/cabac.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/macros.h"
29 #include "libavutil/x86/asm.h"
30 #include "config.h"
31 
32 #if (defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
33  || ( !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)\
34  || (defined(__INTEL_COMPILER) && defined(_MSC_VER))
35 # define BROKEN_COMPILER 1
36 #else
37 # define BROKEN_COMPILER 0
38 #endif
39 
40 #if HAVE_INLINE_ASM
41 
42 #ifndef UNCHECKED_BITSTREAM_READER
43 #define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
44 #endif
45 
46 #if UNCHECKED_BITSTREAM_READER
47 #define END_CHECK(end) ""
48 #else
49 #define END_CHECK(end) \
50  "cmp "end" , %%"FF_REG_c" \n\t"\
51  "jge 1f \n\t"
52 #endif
53 
54 #ifdef BROKEN_RELOCATIONS
55 #define TABLES_ARG , "r"(tables)
56 
57 #if HAVE_FAST_CMOV
58 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
59  "cmp "low" , "tmp" \n\t"\
60  "cmova %%ecx , "range" \n\t"\
61  "sbb %%rcx , %%rcx \n\t"\
62  "and %%ecx , "tmp" \n\t"\
63  "xor %%rcx , "retq" \n\t"\
64  "sub "tmp" , "low" \n\t"
65 #else /* HAVE_FAST_CMOV */
66 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
67 /* P4 Prescott has crappy cmov,sbb,64-bit shift so avoid them */ \
68  "sub "low" , "tmp" \n\t"\
69  "sar $31 , "tmp" \n\t"\
70  "sub %%ecx , "range" \n\t"\
71  "and "tmp" , "range" \n\t"\
72  "add %%ecx , "range" \n\t"\
73  "shl $17 , %%ecx \n\t"\
74  "and "tmp" , %%ecx \n\t"\
75  "sub %%ecx , "low" \n\t"\
76  "xor "tmp" , "ret" \n\t"\
77  "movslq "ret" , "retq" \n\t"
78 #endif /* HAVE_FAST_CMOV */
79 
80 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
81  "movzbl "statep" , "ret" \n\t"\
82  "mov "range" , "tmp" \n\t"\
83  "and $0xC0 , "range" \n\t"\
84  "lea ("ret", "range", 2), %%ecx \n\t"\
85  "movzbl "lps_off"("tables", %%rcx), "range" \n\t"\
86  "sub "range" , "tmp" \n\t"\
87  "mov "tmp" , %%ecx \n\t"\
88  "shl $17 , "tmp" \n\t"\
89  BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
90  "movzbl "norm_off"("tables", "rangeq"), %%ecx \n\t"\
91  "shl %%cl , "range" \n\t"\
92  "movzbl "mlps_off"+128("tables", "retq"), "tmp" \n\t"\
93  "shl %%cl , "low" \n\t"\
94  "mov "tmpbyte" , "statep" \n\t"\
95  "test "lowword" , "lowword" \n\t"\
96  "jnz 2f \n\t"\
97  "mov "byte" , %%"FF_REG_c" \n\t"\
98  END_CHECK(end)\
99  "add"FF_OPSIZE" $2 , "byte" \n\t"\
100  "1: \n\t"\
101  "movzwl (%%"FF_REG_c") , "tmp" \n\t"\
102  "lea -1("low") , %%ecx \n\t"\
103  "xor "low" , %%ecx \n\t"\
104  "shr $15 , %%ecx \n\t"\
105  "bswap "tmp" \n\t"\
106  "shr $15 , "tmp" \n\t"\
107  "movzbl "norm_off"("tables", %%rcx), %%ecx \n\t"\
108  "sub $0xFFFF , "tmp" \n\t"\
109  "neg %%ecx \n\t"\
110  "add $7 , %%ecx \n\t"\
111  "shl %%cl , "tmp" \n\t"\
112  "add "tmp" , "low" \n\t"\
113  "2: \n\t"
114 
115 #else /* BROKEN_RELOCATIONS */
116 #define TABLES_ARG NAMED_CONSTRAINTS_ARRAY_ADD(ff_h264_cabac_tables)
117 #define RIP_ARG
118 
119 #if HAVE_FAST_CMOV
120 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
121  "mov "tmp" , %%ecx \n\t"\
122  "shl $17 , "tmp" \n\t"\
123  "cmp "low" , "tmp" \n\t"\
124  "cmova %%ecx , "range" \n\t"\
125  "sbb %%ecx , %%ecx \n\t"\
126  "and %%ecx , "tmp" \n\t"\
127  "xor %%ecx , "ret" \n\t"\
128  "sub "tmp" , "low" \n\t"
129 #else /* HAVE_FAST_CMOV */
130 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
131  "mov "tmp" , %%ecx \n\t"\
132  "shl $17 , "tmp" \n\t"\
133  "sub "low" , "tmp" \n\t"\
134  "sar $31 , "tmp" \n\t" /*lps_mask*/\
135  "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
136  "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
137  "add %%ecx , "range" \n\t" /*new range*/\
138  "shl $17 , %%ecx \n\t"\
139  "and "tmp" , %%ecx \n\t"\
140  "sub %%ecx , "low" \n\t"\
141  "xor "tmp" , "ret" \n\t"
142 #endif /* HAVE_FAST_CMOV */
143 
144 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
145  "movzbl "statep" , "ret" \n\t"\
146  "mov "range" , "tmp" \n\t"\
147  "and $0xC0 , "range" \n\t"\
148  "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
149  "sub "range" , "tmp" \n\t"\
150  BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \
151  "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\
152  "shl %%cl , "range" \n\t"\
153  "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\
154  "shl %%cl , "low" \n\t"\
155  "mov "tmpbyte" , "statep" \n\t"\
156  "test "lowword" , "lowword" \n\t"\
157  " jnz 2f \n\t"\
158  "mov "byte" , %%"FF_REG_c" \n\t"\
159  END_CHECK(end)\
160  "add"FF_OPSIZE" $2 , "byte" \n\t"\
161  "1: \n\t"\
162  "movzwl (%%"FF_REG_c") , "tmp" \n\t"\
163  "lea -1("low") , %%ecx \n\t"\
164  "xor "low" , %%ecx \n\t"\
165  "shr $15 , %%ecx \n\t"\
166  "bswap "tmp" \n\t"\
167  "shr $15 , "tmp" \n\t"\
168  "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
169  "sub $0xFFFF , "tmp" \n\t"\
170  "neg %%ecx \n\t"\
171  "add $7 , %%ecx \n\t"\
172  "shl %%cl , "tmp" \n\t"\
173  "add "tmp" , "low" \n\t"\
174  "2: \n\t"
175 
176 #endif /* BROKEN_RELOCATIONS */
177 
178 #if HAVE_7REGS && !BROKEN_COMPILER
179 #define get_cabac_inline get_cabac_inline_x86
180 static av_always_inline int get_cabac_inline_x86(CABACContext *c,
181  uint8_t *const state)
182 {
183  int bit, tmp;
184 #ifdef BROKEN_RELOCATIONS
185  void *tables;
186 
187  __asm__ volatile(
188  "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
189  : "=&r"(tables)
191  );
192 #endif
193 
194  __asm__ volatile(
195  BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
196  "%2", "%q2", "%3", "%b3",
197  "%c6(%5)", "%c7(%5)",
201  "%8")
202  : "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp)
203  : "r"(state), "r"(c),
204  "i"(offsetof(CABACContext, bytestream)),
205  "i"(offsetof(CABACContext, bytestream_end))
206  TABLES_ARG
207  ,"1"(c->low), "2"(c->range)
208  : "%"FF_REG_c, "memory"
209  );
210  return bit & 1;
211 }
212 #endif /* HAVE_7REGS && !BROKEN_COMPILER */
213 
214 #if !BROKEN_COMPILER
215 #define get_cabac_bypass_sign get_cabac_bypass_sign_x86
216 static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
217 {
218  x86_reg tmp;
219  __asm__ volatile(
220  "movl %c6(%2), %k1 \n\t"
221  "movl %c3(%2), %%eax \n\t"
222  "shl $17, %k1 \n\t"
223  "add %%eax, %%eax \n\t"
224  "sub %k1, %%eax \n\t"
225  "cdq \n\t"
226  "and %%edx, %k1 \n\t"
227  "add %k1, %%eax \n\t"
228  "xor %%edx, %%ecx \n\t"
229  "sub %%edx, %%ecx \n\t"
230  "test %%ax, %%ax \n\t"
231  "jnz 1f \n\t"
232  "mov %c4(%2), %1 \n\t"
233  "subl $0xFFFF, %%eax \n\t"
234  "movzwl (%1), %%edx \n\t"
235  "bswap %%edx \n\t"
236  "shrl $15, %%edx \n\t"
237 #if UNCHECKED_BITSTREAM_READER
238  "add $2, %1 \n\t"
239  "addl %%edx, %%eax \n\t"
240  "mov %1, %c4(%2) \n\t"
241 #else
242  "addl %%edx, %%eax \n\t"
243  "cmp %c5(%2), %1 \n\t"
244  "jge 1f \n\t"
245  "add"FF_OPSIZE" $2, %c4(%2) \n\t"
246 #endif
247  "1: \n\t"
248  "movl %%eax, %c3(%2) \n\t"
249 
250  : "+c"(val), "=&r"(tmp)
251  : "r"(c),
252  "i"(offsetof(CABACContext, low)),
253  "i"(offsetof(CABACContext, bytestream)),
254  "i"(offsetof(CABACContext, bytestream_end)),
255  "i"(offsetof(CABACContext, range))
256  : "%eax", "%edx", "memory"
257  );
258  return val;
259 }
260 
261 #define get_cabac_bypass get_cabac_bypass_x86
262 static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
263 {
264  x86_reg tmp;
265  int res;
266  __asm__ volatile(
267  "movl %c6(%2), %k1 \n\t"
268  "movl %c3(%2), %%eax \n\t"
269  "shl $17, %k1 \n\t"
270  "add %%eax, %%eax \n\t"
271  "sub %k1, %%eax \n\t"
272  "cdq \n\t"
273  "and %%edx, %k1 \n\t"
274  "add %k1, %%eax \n\t"
275  "inc %%edx \n\t"
276  "test %%ax, %%ax \n\t"
277  "jnz 1f \n\t"
278  "mov %c4(%2), %1 \n\t"
279  "subl $0xFFFF, %%eax \n\t"
280  "movzwl (%1), %%ecx \n\t"
281  "bswap %%ecx \n\t"
282  "shrl $15, %%ecx \n\t"
283  "addl %%ecx, %%eax \n\t"
284  "cmp %c5(%2), %1 \n\t"
285  "jge 1f \n\t"
286  "add"FF_OPSIZE" $2, %c4(%2) \n\t"
287  "1: \n\t"
288  "movl %%eax, %c3(%2) \n\t"
289 
290  : "=&d"(res), "=&r"(tmp)
291  : "r"(c),
292  "i"(offsetof(CABACContext, low)),
293  "i"(offsetof(CABACContext, bytestream)),
294  "i"(offsetof(CABACContext, bytestream_end)),
295  "i"(offsetof(CABACContext, range))
296  : "%eax", "%ecx", "memory"
297  );
298  return res;
299 }
300 #endif /* !BROKEN_COMPILER */
301 
302 #endif /* HAVE_INLINE_ASM */
303 #endif /* AVCODEC_X86_CABAC_H */
cabac.h
state
static struct @321 state
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:27
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:58
macros.h
NAMED_CONSTRAINTS_ARRAY
#define NAMED_CONSTRAINTS_ARRAY(...)
Definition: asm.h:151
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
val
static double val(void *priv, double ch)
Definition: aeval.c:76
ff_h264_cabac_tables
const uint8_t ff_h264_cabac_tables[512+4 *2 *64+4 *64+63]
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
asm.h
H264_LPS_RANGE_OFFSET
#define H264_LPS_RANGE_OFFSET
Definition: cabac.h:34
attributes.h
av_always_inline
#define av_always_inline
Definition: attributes.h:49
AV_STRINGIFY
#define AV_STRINGIFY(s)
Definition: macros.h:36
uint8_t
uint8_t
Definition: audio_convert.c:194
H264_NORM_SHIFT_OFFSET
#define H264_NORM_SHIFT_OFFSET
Definition: cabac.h:33
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
MANGLE
#define MANGLE(a)
Definition: asm.h:127
x86_reg
int x86_reg
Definition: asm.h:72
H264_MLPS_STATE_OFFSET
#define H264_MLPS_STATE_OFFSET
Definition: cabac.h:35
CABACContext
Definition: cabac.h:41