33 offset <<= log2_denom;
36 offset += 1 << (log2_denom - 1);
46 "pshufh $f6, $f6, $f20 \r\n"
47 "pshufh $f8, $f8, $f20 \r\n"
48 "punpckhbh $f14, $f2, $f20 \r\n"
49 "punpckhbh $f16, $f4, $f20 \r\n"
50 "punpcklbh $f2, $f2, $f20 \r\n"
51 "punpcklbh $f4, $f4, $f20 \r\n"
52 "pmullh $f14, $f14, $f6 \r\n"
53 "pmullh $f16, $f16, $f6 \r\n"
54 "pmullh $f2, $f2, $f6 \r\n"
55 "pmullh $f4, $f4, $f6 \r\n"
56 "paddsh $f14, $f14, $f8 \r\n"
57 "paddsh $f16, $f16, $f8 \r\n"
58 "paddsh $f2, $f2, $f8 \r\n"
59 "paddsh $f4, $f4, $f8 \r\n"
60 "psrah $f14, $f14, $f10 \r\n"
61 "psrah $f16, $f16, $f10 \r\n"
62 "psrah $f2, $f2, $f10 \r\n"
63 "psrah $f4, $f4, $f10 \r\n"
64 "packushb $f2, $f2, $f14 \r\n"
65 "packushb $f4, $f4, $f16 \r\n"
68 :
"=m"(*block),
"=m"(*(block + 8))
69 :
"r"(
weight),
"r"(offset),
"r"(log2_denom)
75 int stride,
int height,
int log2_denom,
int weightd,
int weights,
80 offset = ((offset + 1) | 1) << log2_denom;
91 "pshufh $f6, $f6, $f20 \r\n"
92 "pshufh $f8, $f8, $f20 \r\n"
93 "pshufh $f10, $f10, $f20 \r\n"
94 "punpckhbh $f14, $f2, $f20 \r\n"
95 "punpckhbh $f16, $f4, $f20 \r\n"
96 "punpcklbh $f2, $f2, $f20 \r\n"
97 "punpcklbh $f4, $f4, $f20 \r\n"
98 "pmullh $f14, $f14, $f6 \r\n"
99 "pmullh $f16, $f16, $f8 \r\n"
100 "pmullh $f2, $f2, $f6 \r\n"
101 "pmullh $f4, $f4, $f8 \r\n"
102 "paddsh $f14, $f14, $f10 \r\n"
103 "paddsh $f2, $f2, $f10 \r\n"
104 "paddsh $f14, $f14, $f16 \r\n"
105 "paddsh $f2, $f2, $f4 \r\n"
106 "psrah $f14, $f14, $f12 \r\n"
107 "psrah $f2, $f2, $f12 \r\n"
108 "packushb $f2, $f2, $f14 \r\n"
112 "punpckhbh $f14, $f2, $f20 \r\n"
113 "punpckhbh $f16, $f4, $f20 \r\n"
114 "punpcklbh $f2, $f2, $f20 \r\n"
115 "punpcklbh $f4, $f4, $f20 \r\n"
116 "pmullh $f14, $f14, $f6 \r\n"
117 "pmullh $f16, $f16, $f8 \r\n"
118 "pmullh $f2, $f2, $f6 \r\n"
119 "pmullh $f4, $f4, $f8 \r\n"
120 "paddsh $f14, $f14, $f10 \r\n"
121 "paddsh $f2, $f2, $f10 \r\n"
122 "paddsh $f14, $f14, $f16 \r\n"
123 "paddsh $f2, $f2, $f4 \r\n"
124 "psrah $f14, $f14, $f12 \r\n"
125 "psrah $f2, $f2, $f12 \r\n"
126 "packushb $f2, $f2, $f14 \r\n"
128 :
"=m"(*dst),
"=m"(*(dst+8))
129 :
"m"(*src),
"m"(*dst),
"m"(*(src+8)),
"m"(*(dst+8)),
130 "r"(weights),
"r"(weightd),
"r"(
offset),
"r"(log2_denom+1)
140 offset <<= log2_denom;
143 offset += 1 << (log2_denom - 1);
151 "dmtc1 $0, $f20 \r\n"
152 "pshufh $f6, $f6, $f20 \r\n"
153 "pshufh $f8, $f8, $f20 \r\n"
154 "punpckhbh $f14, $f2, $f20 \r\n"
155 "punpcklbh $f2, $f2, $f20 \r\n"
156 "pmullh $f14, $f14, $f6 \r\n"
157 "pmullh $f2, $f2, $f6 \r\n"
158 "paddsh $f14, $f14, $f8 \r\n"
159 "paddsh $f2, $f2, $f8 \r\n"
160 "psrah $f14, $f14, $f10 \r\n"
161 "psrah $f2, $f2, $f10 \r\n"
162 "packushb $f2, $f2, $f14 \r\n"
165 :
"r"(
weight),
"r"(offset),
"r"(log2_denom)
171 int stride,
int height,
int log2_denom,
int weightd,
int weights,
176 offset = ((offset + 1) | 1) << log2_denom;
182 "dmtc1 $0, $f20 \r\n"
187 "pshufh $f6, $f6, $f20 \r\n"
188 "pshufh $f8, $f8, $f20 \r\n"
189 "pshufh $f10, $f10, $f20 \r\n"
190 "punpckhbh $f14, $f2, $f20 \r\n"
191 "punpckhbh $f16, $f4, $f20 \r\n"
192 "punpcklbh $f2, $f2, $f20 \r\n"
193 "punpcklbh $f4, $f4, $f20 \r\n"
194 "pmullh $f14, $f14, $f6 \r\n"
195 "pmullh $f16, $f16, $f8 \r\n"
196 "pmullh $f2, $f2, $f6 \r\n"
197 "pmullh $f4, $f4, $f8 \r\n"
198 "paddsh $f14, $f14, $f10 \r\n"
199 "paddsh $f2, $f2, $f10 \r\n"
200 "paddsh $f14, $f14, $f16 \r\n"
201 "paddsh $f2, $f2, $f4 \r\n"
202 "psrah $f14, $f14, $f12 \r\n"
203 "psrah $f2, $f2, $f12 \r\n"
204 "packushb $f2, $f2, $f14 \r\n"
207 :
"m"(*src),
"m"(*dst),
"r"(weights),
208 "r"(weightd),
"r"(
offset),
"r"(log2_denom+1)
218 offset <<= log2_denom;
221 offset += 1 << (log2_denom - 1);
229 "dmtc1 $0, $f20 \r\n"
230 "pshufh $f6, $f6, $f20 \r\n"
231 "pshufh $f8, $f8, $f20 \r\n"
232 "punpcklbh $f2, $f2, $f20 \r\n"
233 "pmullh $f2, $f2, $f6 \r\n"
234 "paddsh $f2, $f2, $f8 \r\n"
235 "psrah $f2, $f2, $f10 \r\n"
236 "packushb $f2, $f2, $f20 \r\n"
239 :
"r"(
weight),
"r"(offset),
"r"(log2_denom)
245 int stride,
int height,
int log2_denom,
int weightd,
int weights,
250 offset = ((offset + 1) | 1) << log2_denom;
256 "dmtc1 $0, $f20 \r\n"
261 "pshufh $f6, $f6, $f20 \r\n"
262 "pshufh $f8, $f8, $f20 \r\n"
263 "pshufh $f10, $f10, $f20 \r\n"
264 "punpcklbh $f2, $f2, $f20 \r\n"
265 "punpcklbh $f4, $f4, $f20 \r\n"
266 "pmullh $f2, $f2, $f6 \r\n"
267 "pmullh $f4, $f4, $f8 \r\n"
268 "paddsh $f2, $f2, $f10 \r\n"
269 "paddsh $f2, $f2, $f4 \r\n"
270 "psrah $f2, $f2, $f12 \r\n"
271 "packushb $f2, $f2, $f20 \r\n"
274 :
"m"(*src),
"m"(*dst),
"r"(weights),
275 "r"(weightd),
"r"(
offset),
"r"(log2_denom+1)
void ff_h264_weight_pixels8_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
void ff_h264_weight_pixels4_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
static const uint8_t offset[127][2]
void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
BYTE int const BYTE int int int height
static int weight(int i, int blen, int offset)
GLint GLenum GLboolean GLsizei stride
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)