#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/mathops.h"
#include "dsputil_mmx.h"
#include "dsputil_mmx_qns_template.c"
Go to the source code of this file.
Defines | |
#define | SUM(in0, in1, out0, out1) |
#define | SUM(in0, in1, out0, out1) |
#define | SUM(in0, in1, out0, out1) |
#define | SUM(in0, in1, out0, out1) |
#define | DIFF_PIXELS_1(m, a, t, p1, p2) |
#define | DIFF_PIXELS_8(m0, m1, mm, p1, p2, stride, temp) |
#define | DIFF_PIXELS_4x8(p1, p2, stride, temp) DIFF_PIXELS_8(d, q, %%mm, p1, p2, stride, temp) |
#define | DIFF_PIXELS_8x8(p1, p2, stride, temp) DIFF_PIXELS_8(q, dqa, %%xmm, p1, p2, stride, temp) |
#define | LBUTTERFLY2(a1, b1, a2, b2) |
#define | HADAMARD8(m0, m1, m2, m3, m4, m5, m6, m7) |
#define | HADAMARD48 HADAMARD8(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm6, %%mm7) |
#define | MMABS_MMX(a, z) |
#define | MMABS_MMX2(a, z) |
#define | MMABS_SSSE3(a, z) "pabsw " #a ", " #a " \n\t" |
#define | MMABS_SUM(a, z, sum) |
#define | MMABS_SUM_8x8_NOSPILL |
#define | MMABS_SUM_8x8_SSE2 |
#define | HSUM_MMX(a, t, dst) |
#define | HSUM_MMX2(a, t, dst) |
#define | HSUM_SSE2(a, t, dst) |
#define | HADAMARD8_DIFF_MMX(cpu) |
#define | HADAMARD8_DIFF_SSE2(cpu) |
#define | MMABS(a, z) MMABS_MMX(a,z) |
#define | HSUM(a, t, dst) HSUM_MMX(a,t,dst) |
#define | MMABS(a, z) MMABS_MMX2(a,z) |
#define | MMABS_SUM_8x8 MMABS_SUM_8x8_SSE2 |
#define | HSUM(a, t, dst) HSUM_MMX2(a,t,dst) |
#define | DCT_SAD4(m, mm, o) |
#define | DCT_SAD_MMX |
#define | DCT_SAD_SSE2 |
#define | DCT_SAD_FUNC(cpu) |
#define | DCT_SAD DCT_SAD_MMX |
#define | HSUM(a, t, dst) HSUM_MMX(a,t,dst) |
#define | MMABS(a, z) MMABS_MMX(a,z) |
#define | HSUM(a, t, dst) HSUM_MMX2(a,t,dst) |
#define | MMABS(a, z) MMABS_MMX2(a,z) |
#define | DCT_SAD DCT_SAD_SSE2 |
#define | HSUM(a, t, dst) HSUM_SSE2(a,t,dst) |
#define | PHADDD(a, t) |
#define | PMULHRW(x, y, s, o) |
#define | DEF(x) x ## _mmx |
#define | SET_RND MOVQ_WONE |
#define | SCALE_OFFSET 1 |
#define | DEF(x) x ## _3dnow |
#define | SET_RND(x) |
#define | SCALE_OFFSET 0 |
#define | PMULHRW(x, y, s, o) |
Functions | |
static void | get_pixels_mmx (DCTELEM *block, const uint8_t *pixels, int line_size) |
static void | get_pixels_sse2 (DCTELEM *block, const uint8_t *pixels, int line_size) |
static void | diff_pixels_mmx (DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride) |
static int | pix_sum16_mmx (uint8_t *pix, int line_size) |
static int | pix_norm1_mmx (uint8_t *pix, int line_size) |
static int | sse8_mmx (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
static int | sse16_mmx (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
static int | sse16_sse2 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
static int | hf_noise8_mmx (uint8_t *pix1, int line_size, int h) |
static int | hf_noise16_mmx (uint8_t *pix1, int line_size, int h) |
static int | nsse16_mmx (void *p, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
static int | nsse8_mmx (void *p, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
static int | vsad_intra16_mmx (void *v, uint8_t *pix, uint8_t *dummy, int line_size, int h) |
static int | vsad_intra16_mmx2 (void *v, uint8_t *pix, uint8_t *dummy, int line_size, int h) |
static int | vsad16_mmx (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
static int | vsad16_mmx2 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
static void | diff_bytes_mmx (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w) |
static void | sub_hfyu_median_prediction_mmx2 (uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top) |
static int | ssd_int8_vs_int16_mmx (const int8_t *pix1, const int16_t *pix2, int size) |
void | dsputilenc_init_mmx (DSPContext *c, AVCodecContext *avctx) |
#define DCT_SAD DCT_SAD_SSE2 |
Definition at line 1244 of file dsputilenc_mmx.c.
#define DCT_SAD DCT_SAD_MMX |
Definition at line 1244 of file dsputilenc_mmx.c.
#define DCT_SAD4 | ( | m, | |||
mm, | |||||
o | ) |
Value:
"mov"#m" "#o"+ 0(%1), "#mm"2 \n\t"\ "mov"#m" "#o"+16(%1), "#mm"3 \n\t"\ "mov"#m" "#o"+32(%1), "#mm"4 \n\t"\ "mov"#m" "#o"+48(%1), "#mm"5 \n\t"\ MMABS_SUM(mm##2, mm##6, mm##0)\ MMABS_SUM(mm##3, mm##7, mm##1)\ MMABS_SUM(mm##4, mm##6, mm##0)\ MMABS_SUM(mm##5, mm##7, mm##1)\
Definition at line 1192 of file dsputilenc_mmx.c.
#define DCT_SAD_FUNC | ( | cpu | ) |
Value:
static int sum_abs_dctelem_##cpu(DCTELEM *block){\ int sum;\ __asm__ volatile(\ DCT_SAD\ :"=r"(sum)\ :"r"(block)\ );\ return sum&0xFFFF;\ }
Definition at line 1220 of file dsputilenc_mmx.c.
#define DCT_SAD_MMX |
#define DCT_SAD_SSE2 |
Value:
"pxor %%xmm0, %%xmm0 \n\t"\ "pxor %%xmm1, %%xmm1 \n\t"\ DCT_SAD4(dqa, %%xmm, 0)\ DCT_SAD4(dqa, %%xmm, 64)\ "paddusw %%xmm1, %%xmm0 \n\t"\ HSUM(%%xmm0, %%xmm1, %0)
Definition at line 1212 of file dsputilenc_mmx.c.
#define DEF | ( | x | ) | x ## _3dnow |
Definition at line 1315 of file dsputilenc_mmx.c.
#define DEF | ( | x | ) | x ## _mmx |
Definition at line 1315 of file dsputilenc_mmx.c.
#define DIFF_PIXELS_1 | ( | m, | |||
a, | |||||
t, | |||||
p1, | |||||
p2 | ) |
Value:
"mov"#m" "#p1", "#a" \n\t"\ "mov"#m" "#p2", "#t" \n\t"\ "punpcklbw "#a", "#t" \n\t"\ "punpcklbw "#a", "#a" \n\t"\ "psubw "#t", "#a" \n\t"\
Definition at line 941 of file dsputilenc_mmx.c.
Definition at line 969 of file dsputilenc_mmx.c.
#define DIFF_PIXELS_8 | ( | m0, | |||
m1, | |||||
mm, | |||||
p1, | |||||
p2, | |||||
stride, | |||||
temp | ) |
Value:
{\ uint8_t *p1b=p1, *p2b=p2;\ __asm__ volatile(\ DIFF_PIXELS_1(m0, mm##0, mm##7, (%1), (%2))\ DIFF_PIXELS_1(m0, mm##1, mm##7, (%1,%3), (%2,%3))\ DIFF_PIXELS_1(m0, mm##2, mm##7, (%1,%3,2), (%2,%3,2))\ "add %4, %1 \n\t"\ "add %4, %2 \n\t"\ DIFF_PIXELS_1(m0, mm##3, mm##7, (%1), (%2))\ DIFF_PIXELS_1(m0, mm##4, mm##7, (%1,%3), (%2,%3))\ DIFF_PIXELS_1(m0, mm##5, mm##7, (%1,%3,2), (%2,%3,2))\ DIFF_PIXELS_1(m0, mm##6, mm##7, (%1,%4), (%2,%4))\ "mov"#m1" "#mm"0, %0 \n\t"\ DIFF_PIXELS_1(m0, mm##7, mm##0, (%1,%3,4), (%2,%3,4))\ "mov"#m1" %0, "#mm"0 \n\t"\ : "+m"(temp), "+r"(p1b), "+r"(p2b)\ : "r"((x86_reg)stride), "r"((x86_reg)stride*3)\ );\ }
Definition at line 948 of file dsputilenc_mmx.c.
Definition at line 970 of file dsputilenc_mmx.c.
Definition at line 988 of file dsputilenc_mmx.c.
#define HADAMARD8 | ( | m0, | |||
m1, | |||||
m2, | |||||
m3, | |||||
m4, | |||||
m5, | |||||
m6, | |||||
m7 | ) |
Value:
LBUTTERFLY2(m0, m1, m2, m3)\ LBUTTERFLY2(m4, m5, m6, m7)\ LBUTTERFLY2(m0, m2, m1, m3)\ LBUTTERFLY2(m4, m6, m5, m7)\ LBUTTERFLY2(m0, m4, m1, m5)\ LBUTTERFLY2(m2, m6, m3, m7)\
Definition at line 980 of file dsputilenc_mmx.c.
#define HADAMARD8_DIFF_MMX | ( | cpu | ) |
Definition at line 1064 of file dsputilenc_mmx.c.
#define HADAMARD8_DIFF_SSE2 | ( | cpu | ) |
Value:
static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){\ DECLARE_ALIGNED(16, uint64_t, temp)[4];\ int sum;\ \ assert(h==8);\ \ DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\ \ __asm__ volatile(\ HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)\ TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%1))\ HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1)\ MMABS_SUM_8x8\ HSUM_SSE2(%%xmm0, %%xmm1, %0)\ : "=r" (sum)\ : "r"(temp)\ );\ return sum&0xFFFF;\ }\ WRAPPER8_16_SQ(hadamard8_diff_##cpu, hadamard8_diff16_##cpu)
Definition at line 1147 of file dsputilenc_mmx.c.
Definition at line 1245 of file dsputilenc_mmx.c.
Definition at line 1245 of file dsputilenc_mmx.c.
Definition at line 1245 of file dsputilenc_mmx.c.
Definition at line 1245 of file dsputilenc_mmx.c.
Definition at line 1245 of file dsputilenc_mmx.c.
#define HSUM_MMX | ( | a, | |||
t, | |||||
dst | ) |
#define HSUM_MMX2 | ( | a, | |||
t, | |||||
dst | ) |
#define HSUM_SSE2 | ( | a, | |||
t, | |||||
dst | ) |
#define LBUTTERFLY2 | ( | a1, | |||
b1, | |||||
a2, | |||||
b2 | ) |
#define MMABS | ( | a, | |||
z | ) | MMABS_MMX2(a,z) |
Definition at line 1239 of file dsputilenc_mmx.c.
#define MMABS | ( | a, | |||
z | ) | MMABS_MMX(a,z) |
Definition at line 1239 of file dsputilenc_mmx.c.
#define MMABS | ( | a, | |||
z | ) | MMABS_MMX2(a,z) |
Definition at line 1239 of file dsputilenc_mmx.c.
#define MMABS | ( | a, | |||
z | ) | MMABS_MMX(a,z) |
Definition at line 1239 of file dsputilenc_mmx.c.
#define MMABS_MMX | ( | a, | |||
z | ) |
Value:
"pxor " #z ", " #z " \n\t"\ "pcmpgtw " #a ", " #z " \n\t"\ "pxor " #z ", " #a " \n\t"\ "psubw " #z ", " #a " \n\t"
Definition at line 990 of file dsputilenc_mmx.c.
#define MMABS_MMX2 | ( | a, | |||
z | ) |
Value:
"pxor " #z ", " #z " \n\t"\ "psubw " #a ", " #z " \n\t"\ "pmaxsw " #z ", " #a " \n\t"
Definition at line 996 of file dsputilenc_mmx.c.
#define MMABS_SSSE3 | ( | a, | |||
z | ) | "pabsw " #a ", " #a " \n\t" |
Definition at line 1001 of file dsputilenc_mmx.c.
#define MMABS_SUM | ( | a, | |||
z, | |||||
sum | ) |
#define MMABS_SUM_8x8 MMABS_SUM_8x8_SSE2 |
Definition at line 1176 of file dsputilenc_mmx.c.
#define MMABS_SUM_8x8_NOSPILL |
Value:
MMABS(%%xmm0, %%xmm8)\ MMABS(%%xmm1, %%xmm9)\ MMABS_SUM(%%xmm2, %%xmm8, %%xmm0)\ MMABS_SUM(%%xmm3, %%xmm9, %%xmm1)\ MMABS_SUM(%%xmm4, %%xmm8, %%xmm0)\ MMABS_SUM(%%xmm5, %%xmm9, %%xmm1)\ MMABS_SUM(%%xmm6, %%xmm8, %%xmm0)\ MMABS_SUM(%%xmm7, %%xmm9, %%xmm1)\ "paddusw %%xmm1, %%xmm0 \n\t"
Definition at line 1008 of file dsputilenc_mmx.c.
#define MMABS_SUM_8x8_SSE2 |
Value:
"movdqa %%xmm7, (%1) \n\t"\ MMABS(%%xmm0, %%xmm7)\ MMABS(%%xmm1, %%xmm7)\ MMABS_SUM(%%xmm2, %%xmm7, %%xmm0)\ MMABS_SUM(%%xmm3, %%xmm7, %%xmm1)\ MMABS_SUM(%%xmm4, %%xmm7, %%xmm0)\ MMABS_SUM(%%xmm5, %%xmm7, %%xmm1)\ MMABS_SUM(%%xmm6, %%xmm7, %%xmm0)\ "movdqa (%1), %%xmm2 \n\t"\ MMABS_SUM(%%xmm2, %%xmm7, %%xmm1)\ "paddusw %%xmm1, %%xmm0 \n\t"
Definition at line 1022 of file dsputilenc_mmx.c.
#define PHADDD | ( | a, | |||
t | ) |
#define PMULHRW | ( | x, | |||
y, | |||||
s, | |||||
o | ) |
Value:
"pmulhrw " #s ", "#x " \n\t"\ "pmulhrw " #s ", "#y " \n\t"
Definition at line 1318 of file dsputilenc_mmx.c.
#define PMULHRW | ( | x, | |||
y, | |||||
s, | |||||
o | ) |
Value:
"pmulhw " #s ", "#x " \n\t"\ "pmulhw " #s ", "#y " \n\t"\ "paddw " #o ", "#x " \n\t"\ "paddw " #o ", "#y " \n\t"\ "psraw $1, "#x " \n\t"\ "psraw $1, "#y " \n\t"
Definition at line 1318 of file dsputilenc_mmx.c.
Referenced by add_8x8basis_TMPL(), and try_8x8basis_TMPL().
#define SCALE_OFFSET 0 |
Definition at line 1317 of file dsputilenc_mmx.c.
#define SCALE_OFFSET 1 |
Definition at line 1317 of file dsputilenc_mmx.c.
Referenced by add_8x8basis_TMPL(), and try_8x8basis_TMPL().
#define SET_RND | ( | x | ) |
Definition at line 1316 of file dsputilenc_mmx.c.
#define SET_RND MOVQ_WONE |
Definition at line 1316 of file dsputilenc_mmx.c.
#define SUM | ( | in0, | |||
in1, | |||||
out0, | |||||
out1 | ) |
Value:
"movq (%0)," #out0 "\n"\ "movq (%1),%%mm2\n"\ "movq 8(%0)," #out1 "\n"\ "movq 8(%1),%%mm3\n"\ "add %3,%0\n"\ "add %3,%1\n"\ "psubb %%mm2, " #out0 "\n"\ "psubb %%mm3, " #out1 "\n"\ "pxor %%mm7, " #out0 "\n"\ "pxor %%mm7, " #out1 "\n"\ "psadbw " #out0 ", " #in0 "\n"\ "psadbw " #out1 ", " #in1 "\n"\ "paddw " #in1 ", " #in0 "\n"\ "paddw " #in0 ", %%mm6\n"
#define SUM | ( | in0, | |||
in1, | |||||
out0, | |||||
out1 | ) |
Value:
"movq (%0),%%mm2\n"\ "movq (%1)," #out0 "\n"\ "movq 8(%0),%%mm3\n"\ "movq 8(%1)," #out1 "\n"\ "add %3,%0\n"\ "add %3,%1\n"\ "psubb " #out0 ", %%mm2\n"\ "psubb " #out1 ", %%mm3\n"\ "pxor %%mm7, %%mm2\n"\ "pxor %%mm7, %%mm3\n"\ "movq %%mm2, " #out0 "\n"\ "movq %%mm3, " #out1 "\n"\ "psubusb " #in0 ", %%mm2\n"\ "psubusb " #in1 ", %%mm3\n"\ "psubusb " #out0 ", " #in0 "\n"\ "psubusb " #out1 ", " #in1 "\n"\ "por %%mm2, " #in0 "\n"\ "por %%mm3, " #in1 "\n"\ "movq " #in0 ", %%mm2\n"\ "movq " #in1 ", %%mm3\n"\ "punpcklbw %%mm7, " #in0 "\n"\ "punpcklbw %%mm7, " #in1 "\n"\ "punpckhbw %%mm7, %%mm2\n"\ "punpckhbw %%mm7, %%mm3\n"\ "paddw " #in1 ", " #in0 "\n"\ "paddw %%mm3, %%mm2\n"\ "paddw %%mm2, " #in0 "\n"\ "paddw " #in0 ", %%mm6\n"
#define SUM | ( | in0, | |||
in1, | |||||
out0, | |||||
out1 | ) |
Value:
"movq (%0), " #out0 "\n"\ "movq 8(%0), " #out1 "\n"\ "add %2,%0\n"\ "psadbw " #out0 ", " #in0 "\n"\ "psadbw " #out1 ", " #in1 "\n"\ "paddw " #in1 ", " #in0 "\n"\ "paddw " #in0 ", %%mm6\n"
#define SUM | ( | in0, | |||
in1, | |||||
out0, | |||||
out1 | ) |
Value:
"movq (%0), %%mm2\n"\ "movq 8(%0), %%mm3\n"\ "add %2,%0\n"\ "movq %%mm2, " #out0 "\n"\ "movq %%mm3, " #out1 "\n"\ "psubusb " #in0 ", %%mm2\n"\ "psubusb " #in1 ", %%mm3\n"\ "psubusb " #out0 ", " #in0 "\n"\ "psubusb " #out1 ", " #in1 "\n"\ "por %%mm2, " #in0 "\n"\ "por %%mm3, " #in1 "\n"\ "movq " #in0 ", %%mm2\n"\ "movq " #in1 ", %%mm3\n"\ "punpcklbw %%mm7, " #in0 "\n"\ "punpcklbw %%mm7, " #in1 "\n"\ "punpckhbw %%mm7, %%mm2\n"\ "punpckhbw %%mm7, %%mm3\n"\ "paddw " #in1 ", " #in0 "\n"\ "paddw %%mm3, %%mm2\n"\ "paddw %%mm2, " #in0 "\n"\ "paddw " #in0 ", %%mm6\n"
Referenced by vsad16_mmx(), vsad16_mmx2(), vsad_intra16_mmx(), and vsad_intra16_mmx2().
static void diff_bytes_mmx | ( | uint8_t * | dst, | |
uint8_t * | src1, | |||
uint8_t * | src2, | |||
int | w | |||
) | [static] |
static void diff_pixels_mmx | ( | DCTELEM * | block, | |
const uint8_t * | s1, | |||
const uint8_t * | s2, | |||
int | stride | |||
) | [inline, static] |
void dsputilenc_init_mmx | ( | DSPContext * | c, | |
AVCodecContext * | avctx | |||
) |
static void get_pixels_mmx | ( | DCTELEM * | block, | |
const uint8_t * | pixels, | |||
int | line_size | |||
) | [static] |
static void get_pixels_sse2 | ( | DCTELEM * | block, | |
const uint8_t * | pixels, | |||
int | line_size | |||
) | [static] |
static int hf_noise16_mmx | ( | uint8_t * | pix1, | |
int | line_size, | |||
int | h | |||
) | [static] |
static int hf_noise8_mmx | ( | uint8_t * | pix1, | |
int | line_size, | |||
int | h | |||
) | [static] |
static int nsse16_mmx | ( | void * | p, | |
uint8_t * | pix1, | |||
uint8_t * | pix2, | |||
int | line_size, | |||
int | h | |||
) | [static] |
static int nsse8_mmx | ( | void * | p, | |
uint8_t * | pix1, | |||
uint8_t * | pix2, | |||
int | line_size, | |||
int | h | |||
) | [static] |
static int pix_norm1_mmx | ( | uint8_t * | pix, | |
int | line_size | |||
) | [static] |
static int pix_sum16_mmx | ( | uint8_t * | pix, | |
int | line_size | |||
) | [static] |
static int ssd_int8_vs_int16_mmx | ( | const int8_t * | pix1, | |
const int16_t * | pix2, | |||
int | size | |||
) | [static] |
static int sse16_mmx | ( | void * | v, | |
uint8_t * | pix1, | |||
uint8_t * | pix2, | |||
int | line_size, | |||
int | h | |||
) | [static] |
Definition at line 265 of file dsputilenc_mmx.c.
Referenced by dsputilenc_init_mmx(), and nsse16_mmx().
static int sse16_sse2 | ( | void * | v, | |
uint8_t * | pix1, | |||
uint8_t * | pix2, | |||
int | line_size, | |||
int | h | |||
) | [static] |
static int sse8_mmx | ( | void * | v, | |
uint8_t * | pix1, | |||
uint8_t * | pix2, | |||
int | line_size, | |||
int | h | |||
) | [static] |
Definition at line 204 of file dsputilenc_mmx.c.
Referenced by dsputilenc_init_mmx(), and nsse8_mmx().
static void sub_hfyu_median_prediction_mmx2 | ( | uint8_t * | dst, | |
const uint8_t * | src1, | |||
const uint8_t * | src2, | |||
int | w, | |||
int * | left, | |||
int * | left_top | |||
) | [static] |
static int vsad16_mmx | ( | void * | v, | |
uint8_t * | pix1, | |||
uint8_t * | pix2, | |||
int | line_size, | |||
int | h | |||
) | [static] |
static int vsad16_mmx2 | ( | void * | v, | |
uint8_t * | pix1, | |||
uint8_t * | pix2, | |||
int | line_size, | |||
int | h | |||
) | [static] |
static int vsad_intra16_mmx | ( | void * | v, | |
uint8_t * | pix, | |||
uint8_t * | dummy, | |||
int | line_size, | |||
int | h | |||
) | [static] |
static int vsad_intra16_mmx2 | ( | void * | v, | |
uint8_t * | pix, | |||
uint8_t * | dummy, | |||
int | line_size, | |||
int | h | |||
) | [static] |