Go to the documentation of this file.
80 #define hadamard_func(cpu) \
81 int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1, \
82 uint8_t *src2, ptrdiff_t stride, int h); \
83 int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \
84 uint8_t *src2, ptrdiff_t stride, int h);
97 score1 =
c->mecc.sse[0](
c, pix1, pix2,
stride,
h);
104 return score1 +
FFABS(score2) *
c->avctx->nsse_weight;
106 return score1 +
FFABS(score2) * 8;
117 return score1 +
FFABS(score2) *
c->avctx->nsse_weight;
119 return score1 +
FFABS(score2) * 8;
127 0x0000000000000000ULL,
128 0x0001000100010001ULL,
129 0x0002000200020002ULL,
132 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
137 "movq (%1, %%"FF_REG_a
"), %%mm0\n\t"
138 "movq 1(%1, %%"FF_REG_a
"), %%mm2\n\t"
139 "movq %%mm0, %%mm1 \n\t"
140 "movq %%mm2, %%mm3 \n\t"
141 "punpcklbw %%mm7, %%mm0 \n\t"
142 "punpckhbw %%mm7, %%mm1 \n\t"
143 "punpcklbw %%mm7, %%mm2 \n\t"
144 "punpckhbw %%mm7, %%mm3 \n\t"
145 "paddw %%mm2, %%mm0 \n\t"
146 "paddw %%mm3, %%mm1 \n\t"
149 "movq (%2, %%"FF_REG_a
"), %%mm2\n\t"
150 "movq 1(%2, %%"FF_REG_a
"), %%mm4\n\t"
151 "movq %%mm2, %%mm3 \n\t"
152 "movq %%mm4, %%mm5 \n\t"
153 "punpcklbw %%mm7, %%mm2 \n\t"
154 "punpckhbw %%mm7, %%mm3 \n\t"
155 "punpcklbw %%mm7, %%mm4 \n\t"
156 "punpckhbw %%mm7, %%mm5 \n\t"
157 "paddw %%mm4, %%mm2 \n\t"
158 "paddw %%mm5, %%mm3 \n\t"
159 "movq %5, %%mm5 \n\t"
160 "paddw %%mm2, %%mm0 \n\t"
161 "paddw %%mm3, %%mm1 \n\t"
162 "paddw %%mm5, %%mm0 \n\t"
163 "paddw %%mm5, %%mm1 \n\t"
164 "movq (%3, %%"FF_REG_a
"), %%mm4 \n\t"
165 "movq (%3, %%"FF_REG_a
"), %%mm5 \n\t"
166 "psrlw $2, %%mm0 \n\t"
167 "psrlw $2, %%mm1 \n\t"
168 "packuswb %%mm1, %%mm0 \n\t"
169 "psubusb %%mm0, %%mm4 \n\t"
170 "psubusb %%mm5, %%mm0 \n\t"
171 "por %%mm4, %%mm0 \n\t"
172 "movq %%mm0, %%mm4 \n\t"
173 "punpcklbw %%mm7, %%mm0 \n\t"
174 "punpckhbw %%mm7, %%mm4 \n\t"
175 "paddw %%mm0, %%mm6 \n\t"
176 "paddw %%mm4, %%mm6 \n\t"
177 "movq %%mm2, %%mm0 \n\t"
178 "movq %%mm3, %%mm1 \n\t"
179 "add %4, %%"FF_REG_a
" \n\t"
183 "r" (
stride),
"m" (round_tab[2]));
186 static inline int sum_mmx(
void)
190 "movq %%mm6, %%mm0 \n\t"
191 "psrlq $32, %%mm6 \n\t"
192 "paddw %%mm0, %%mm6 \n\t"
193 "movq %%mm6, %%mm0 \n\t"
194 "psrlq $16, %%mm6 \n\t"
195 "paddw %%mm0, %%mm6 \n\t"
196 "movd %%mm6, %0 \n\t"
201 #define PIX_SADXY(suf) \
202 static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
203 uint8_t *blk1, ptrdiff_t stride, int h) \
205 av_assert2(h == 8); \
207 "pxor %%mm7, %%mm7 \n\t" \
208 "pxor %%mm6, %%mm6 \n\t" \
211 sad8_4_ ## suf(blk1, blk2, stride, 8); \
213 return sum_ ## suf(); \
216 static int sad16_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
217 uint8_t *blk1, ptrdiff_t stride, int h) \
220 "pxor %%mm7, %%mm7 \n\t" \
221 "pxor %%mm6, %%mm6 \n\t" \
224 sad8_4_ ## suf(blk1, blk2, stride, h); \
225 sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
227 return sum_ ## suf(); \
240 c->pix_abs[0][3] = sad16_xy2_mmx;
241 c->pix_abs[1][3] = sad8_xy2_mmx;
249 c->nsse[0] = nsse16_mmx;
250 c->nsse[1] = nsse8_mmx;
255 #if !HAVE_ALIGNED_STACK
256 c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
257 c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
286 #if HAVE_ALIGNED_STACK
287 c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
288 c->hadamard8_diff[1] = ff_hadamard8_diff_sse2;
306 #if HAVE_ALIGNED_STACK
307 c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
308 c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
#define INLINE_MMX(flags)
int ff_vsad16_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad8_y2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
#define DECLARE_ASM_CONST(n, t, v)
int ff_sad16_y2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
int ff_vsad16_approx_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int flags
AV_CODEC_FLAG_*.
int ff_sum_abs_dctelem_sse2(int16_t *block)
int ff_sad16_y2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad16_approx_xy2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad8_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
int ff_vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sum_abs_dctelem_ssse3(int16_t *block)
int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
int ff_vsad_intra16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_hf_noise16_mmx(uint8_t *pix1, ptrdiff_t stride, int h)
int ff_sad16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
#define AV_CPU_FLAG_SSE2SLOW
SSE2 supported, but usually not faster.
int ff_sad16_x2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad8_approx_xy2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
#define EXTERNAL_SSE2(flags)
int ff_vsad_intra8_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad8_x2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
main external API structure.
int ff_vsad8_approx_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
#define hadamard_func(cpu)
int ff_hf_noise8_mmx(uint8_t *pix1, ptrdiff_t stride, int h)
int ff_sad16_x2_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sad16_approx_xy2_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
The exact code depends on how similar the blocks are and how related they are to the block
#define EXTERNAL_SSSE3(flags)
#define EXTERNAL_MMX(flags)
#define EXTERNAL_MMXEXT(flags)
int ff_sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)