Go to the documentation of this file.
33 #define cpuid(index, eax, ebx, ecx, edx) \
34 ff_cpu_cpuid(index, &eax, &ebx, &ecx, &edx)
36 #define xgetbv(index, eax, edx) \
37 ff_cpu_xgetbv(index, &eax, &edx)
42 #define cpuid(index, eax, ebx, ecx, edx) \
44 "mov %%"FF_REG_b", %%"FF_REG_S" \n\t" \
46 "xchg %%"FF_REG_b", %%"FF_REG_S \
47 : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
48 : "0" (index), "2"(0))
50 #define xgetbv(index, eax, edx) \
51 __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
53 #define get_eflags(x) \
54 __asm__ volatile ("pushfl \n" \
58 #define set_eflags(x) \
59 __asm__ volatile ("push %0 \n" \
67 #define cpuid_test() 1
71 #define cpuid_test ff_cpu_cpuid_test
75 static int cpuid_test(
void)
82 set_eflags(
a ^ 0x200000);
96 int eax, ebx, ecx, edx;
97 int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
98 int family = 0, model = 0;
99 union {
int i[3];
char c[12]; } vendor;
100 int xcr0_lo = 0, xcr0_hi = 0;
105 cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
107 if (max_std_level >= 1) {
108 cpuid(1, eax, ebx, ecx, std_caps);
109 family = ((eax >> 8) & 0
xf) + ((eax >> 20) & 0xff);
110 model = ((eax >> 4) & 0
xf) + ((eax >> 12) & 0xf0);
111 if (std_caps & (1 << 15))
113 if (std_caps & (1 << 23))
115 if (std_caps & (1 << 25))
118 if (std_caps & (1 << 25))
120 if (std_caps & (1 << 26))
124 if (ecx & 0x00000200 )
126 if (ecx & 0x00080000 )
128 if (ecx & 0x00100000 )
130 if (ecx & 0x02000000 )
134 if ((ecx & 0x18000000) == 0x18000000) {
136 xgetbv(0, xcr0_lo, xcr0_hi);
137 if ((xcr0_lo & 0x6) == 0x6) {
139 if (ecx & 0x00001000)
146 if (max_std_level >= 7) {
147 cpuid(7, eax, ebx, ecx, edx);
152 if ((xcr0_lo & 0xe0) == 0xe0) {
160 if (ebx & 0x00000008) {
162 if (ebx & 0x00000100)
167 cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
169 if (max_ext_level >= 0x80000001) {
170 cpuid(0x80000001, eax, ebx, ecx, ext_caps);
171 if (ext_caps & (1
U << 31))
173 if (ext_caps & (1 << 30))
175 if (ext_caps & (1 << 23))
177 if (ext_caps & (1 << 22))
180 if (!strncmp(vendor.c,
"AuthenticAMD", 12)) {
208 if (ecx & 0x00000800)
210 if (ecx & 0x00010000)
215 if (!strncmp(vendor.c,
"GenuineIntel", 12)) {
216 if (family == 6 && (model == 9 || model == 13 || model == 14)) {
234 if (family == 6 && model == 28)
240 family == 6 && model < 23)
#define AV_CPU_FLAG_SSSE3SLOW
SSSE3 supported, but usually not faster.
#define AV_CPU_FLAG_SSE3
Prescott SSE3 functions.
#define AV_CPU_FLAG_SSE3SLOW
SSE3 supported, but usually not faster.
#define AV_CPU_FLAG_3DNOW
AMD 3DNOW.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
#define AV_CPU_FLAG_BMI1
Bit Manipulation Instruction Set 1.
#define AV_CPU_FLAG_SSSE3
Conroe SSSE3 functions.
#define AV_CPU_FLAG_XOP
Bulldozer XOP functions.
#define AV_CPU_FLAG_3DNOWEXT
AMD 3DNowExt.
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
#define AV_CPU_FLAG_AVX512
AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used.
size_t ff_get_cpu_max_align_x86(void)
#define AV_CPU_FLAG_SSE4
Penryn SSE4.1 functions.
#define AV_CPU_FLAG_CMOV
supports cmov instruction
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
#define AV_CPU_FLAG_AVXSLOW
AVX supported, but slow when using YMM registers (e.g. Bulldozer)
#define AV_CPU_FLAG_AVX
AVX functions: requires OS support even if YMM registers aren't used.
#define AV_CPU_FLAG_FMA4
Bulldozer FMA4 functions.
#define AV_CPU_FLAG_AVX2
AVX2 functions: requires OS support even if YMM registers aren't used.
#define AV_CPU_FLAG_SSE2SLOW
SSE2 supported, but usually not faster.
#define AV_CPU_FLAG_FMA3
Haswell FMA3 functions.
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define i(width, name, range_min, range_max)
#define AV_CPU_FLAG_SSE42
Nehalem SSE4.2 functions.
#define xf(width, name, var, range_min, range_max, subs,...)
int ff_get_cpu_flags_x86(void)
#define AV_CPU_FLAG_ATOM
Atom processor, some SSSE3 instructions are slower.
#define AV_CPU_FLAG_MMX
standard MMX
#define AV_CPU_FLAG_AESNI
Advanced Encryption Standard functions.
#define AV_CPU_FLAG_SSE
SSE functions.
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
#define AV_CPU_FLAG_BMI2
Bit Manipulation Instruction Set 2.
#define flags(name, subs,...)