30 int stride,
int h,
int ox,
int oy,
31 int dxx,
int dxy,
int dyx,
int dyy,
35 const int ix = ox >> (16 +
shift);
36 const int iy = oy >> (16 +
shift);
37 const int oxs = ox >> 4;
38 const int oys = oy >> 4;
39 const int dxxs = dxx >> 4;
40 const int dxys = dxy >> 4;
41 const int dyxs = dyx >> 4;
42 const int dyys = dyy >> 4;
43 const uint16_t r4[4] = {
r,
r,
r, r };
44 const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
45 const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
47 #define MAX_STRIDE 4096U
49 uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE];
52 const int dxw = (dxx - (1 << (16 +
shift))) * (w - 1);
53 const int dyh = (dyy - (1 << (16 +
shift))) * (
h - 1);
54 const int dxh = dxy * (
h - 1);
55 const int dyw = dyx * (w - 1);
56 int need_emu = (unsigned) ix >=
width - w ||
60 ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
61 (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 +
shift) ||
63 (dxx | dxy | dyx | dyy) & 15 ||
64 (need_emu && (h > MAX_H ||
stride > MAX_STRIDE))) {
66 ff_gmc_c(dst,
src,
stride, h, ox, oy, dxx, dxy, dyx, dyy,
73 ff_emulated_edge_mc_8(edge_buf,
src, stride, stride, w + 1, h + 1, ix, iy,
width,
height);
79 "pxor %%mm7, %%mm7 \n\t"
80 "punpcklwd %%mm6, %%mm6 \n\t"
81 "punpcklwd %%mm6, %%mm6 \n\t"
84 for (x = 0; x < w; x += 4) {
85 uint16_t dx4[4] = { oxs - dxys + dxxs * (x + 0),
86 oxs - dxys + dxxs * (x + 1),
87 oxs - dxys + dxxs * (x + 2),
88 oxs - dxys + dxxs * (x + 3) };
89 uint16_t dy4[4] = { oys - dyys + dyxs * (x + 0),
90 oys - dyys + dyxs * (x + 1),
91 oys - dyys + dyxs * (x + 2),
92 oys - dyys + dyxs * (x + 3) };
94 for (y = 0; y <
h; y++) {
98 "paddw %2, %%mm4 \n\t"
99 "paddw %3, %%mm5 \n\t"
100 "movq %%mm4, %0 \n\t"
101 "movq %%mm5, %1 \n\t"
102 "psrlw $12, %%mm4 \n\t"
103 "psrlw $12, %%mm5 \n\t"
104 :
"+m" (*dx4),
"+m" (*dy4)
105 :
"m" (*dxy4),
"m" (*dyy4));
108 "movq %%mm6, %%mm2 \n\t"
109 "movq %%mm6, %%mm1 \n\t"
110 "psubw %%mm4, %%mm2 \n\t"
111 "psubw %%mm5, %%mm1 \n\t"
112 "movq %%mm2, %%mm0 \n\t"
113 "movq %%mm4, %%mm3 \n\t"
114 "pmullw %%mm1, %%mm0 \n\t"
115 "pmullw %%mm5, %%mm3 \n\t"
116 "pmullw %%mm5, %%mm2 \n\t"
117 "pmullw %%mm4, %%mm1 \n\t"
119 "movd %4, %%mm5 \n\t"
120 "movd %3, %%mm4 \n\t"
121 "punpcklbw %%mm7, %%mm5 \n\t"
122 "punpcklbw %%mm7, %%mm4 \n\t"
123 "pmullw %%mm5, %%mm3 \n\t"
124 "pmullw %%mm4, %%mm2 \n\t"
126 "movd %2, %%mm5 \n\t"
127 "movd %1, %%mm4 \n\t"
128 "punpcklbw %%mm7, %%mm5 \n\t"
129 "punpcklbw %%mm7, %%mm4 \n\t"
130 "pmullw %%mm5, %%mm1 \n\t"
131 "pmullw %%mm4, %%mm0 \n\t"
132 "paddw %5, %%mm1 \n\t"
133 "paddw %%mm3, %%mm2 \n\t"
134 "paddw %%mm1, %%mm0 \n\t"
135 "paddw %%mm2, %%mm0 \n\t"
137 "psrlw %6, %%mm0 \n\t"
138 "packuswb %%mm0, %%mm0 \n\t"
139 "movd %%mm0, %0 \n\t"
141 :
"=m" (dst[x + y *
stride])
142 :
"m" (
src[0]),
"m" (
src[1]),
143 "m" (
src[stride]),
"m" (
src[stride + 1]),
static int shift(int a, int b)
Macro definitions for various function/variable attributes.
void(* gmc)(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
global motion compensation.
#define INLINE_MMX(flags)
av_cold void ff_mpegvideodsp_init_x86(MpegVideoDSPContext *c)
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
BYTE int const BYTE int int int height
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
GLint GLenum GLboolean GLsizei stride
static const int shift2[6]
Core video DSP helper functions.