Go to the documentation of this file.
34 static void lpc_apply_welch_window_sse2(
const int32_t *
data,
int len,
37 double c = 2.0 / (
len-1.0);
42 "movsd %4, %%xmm7 \n\t"
43 "movapd "MANGLE(pd_1)
", %%xmm6 \n\t"
44 "movapd "MANGLE(pd_2)
", %%xmm5 \n\t"
45 "movlhps %%xmm7, %%xmm7 \n\t"
46 "subpd %%xmm5, %%xmm7 \n\t"
47 "addsd %%xmm6, %%xmm7 \n\t"
50 #define WELCH(MOVPD, offset)\
52 "movapd %%xmm7, %%xmm1 \n\t"\
53 "mulpd %%xmm1, %%xmm1 \n\t"\
54 "movapd %%xmm6, %%xmm0 \n\t"\
55 "subpd %%xmm1, %%xmm0 \n\t"\
56 "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\
57 "cvtpi2pd (%3,%0), %%xmm2 \n\t"\
58 "cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\
59 "mulpd %%xmm0, %%xmm2 \n\t"\
60 "mulpd %%xmm1, %%xmm3 \n\t"\
61 "movapd %%xmm2, (%2,%0,2) \n\t"\
62 MOVPD" %%xmm3, "#offset"*8(%2,%1,2) \n\t"\
63 "subpd %%xmm5, %%xmm7 \n\t"\
74 :
"r"(w_data+n2),
"r"(
data+n2),
"m"(
c),
"r"(
len)
77 "%xmm5",
"%xmm6",
"%xmm7")
82 static void lpc_compute_autocorr_sse2(
const double *
data,
int len,
int lag,
90 for(j=0; j<lag; j+=2){
94 "movsd "MANGLE(pd_1)
", %%xmm0 \n\t"
95 "movsd "MANGLE(pd_1)
", %%xmm1 \n\t"
96 "movsd "MANGLE(pd_1)
", %%xmm2 \n\t"
98 "movapd (%2,%0), %%xmm3 \n\t"
99 "movupd -8(%3,%0), %%xmm4 \n\t"
100 "movapd (%3,%0), %%xmm5 \n\t"
101 "mulpd %%xmm3, %%xmm4 \n\t"
102 "mulpd %%xmm3, %%xmm5 \n\t"
103 "mulpd -16(%3,%0), %%xmm3 \n\t"
104 "addpd %%xmm4, %%xmm1 \n\t"
105 "addpd %%xmm5, %%xmm0 \n\t"
106 "addpd %%xmm3, %%xmm2 \n\t"
109 "movhlps %%xmm0, %%xmm3 \n\t"
110 "movhlps %%xmm1, %%xmm4 \n\t"
111 "movhlps %%xmm2, %%xmm5 \n\t"
112 "addsd %%xmm3, %%xmm0 \n\t"
113 "addsd %%xmm4, %%xmm1 \n\t"
114 "addsd %%xmm5, %%xmm2 \n\t"
115 "movsd %%xmm0, (%1) \n\t"
116 "movsd %%xmm1, 8(%1) \n\t"
117 "movsd %%xmm2, 16(%1) \n\t"
125 "movsd "MANGLE(pd_1)
", %%xmm0 \n\t"
126 "movsd "MANGLE(pd_1)
", %%xmm1 \n\t"
128 "movapd (%3,%0), %%xmm3 \n\t"
129 "movupd -8(%4,%0), %%xmm4 \n\t"
130 "mulpd %%xmm3, %%xmm4 \n\t"
131 "mulpd (%4,%0), %%xmm3 \n\t"
132 "addpd %%xmm4, %%xmm1 \n\t"
133 "addpd %%xmm3, %%xmm0 \n\t"
136 "movhlps %%xmm0, %%xmm3 \n\t"
137 "movhlps %%xmm1, %%xmm4 \n\t"
138 "addsd %%xmm3, %%xmm0 \n\t"
139 "addsd %%xmm4, %%xmm1 \n\t"
140 "movsd %%xmm0, %1 \n\t"
141 "movsd %%xmm1, %2 \n\t"
142 :
"+&r"(
i),
"=m"(autoc[j]),
"=m"(autoc[j+1])
158 c->lpc_apply_welch_window = lpc_apply_welch_window_sse2;
159 c->lpc_compute_autocorr = lpc_compute_autocorr_sse2;
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
#define INLINE_SSE2(flags)
#define NAMED_CONSTRAINTS_ARRAY_ADD(...)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define XMM_CLOBBERS_ONLY(...)
#define i(width, name, range_min, range_max)
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
av_cold void ff_lpc_init_x86(LPCContext *c)
#define INLINE_SSE2_SLOW(flags)
DECLARE_ASM_CONST(16, double, pd_1)[2]