38 register vector
unsigned char pixelsv1;
39 register vector
unsigned char pixelsv1B;
40 register vector
unsigned char pixelsv1C;
41 register vector
unsigned char pixelsv1D;
44 register ptrdiff_t line_size_2 = line_size << 1;
45 register ptrdiff_t line_size_3 = line_size + line_size_2;
46 register ptrdiff_t line_size_4 = line_size << 2;
53 for (
i = 0;
i <
h;
i += 4) {
54 pixelsv1 = unaligned_load( 0, pixels);
55 pixelsv1B = unaligned_load(line_size, pixels);
56 pixelsv1C = unaligned_load(line_size_2, pixels);
57 pixelsv1D = unaligned_load(line_size_3, pixels);
58 VEC_ST(pixelsv1, 0, (
unsigned char*)
block);
59 VEC_ST(pixelsv1B, line_size, (
unsigned char*)
block);
60 VEC_ST(pixelsv1C, line_size_2, (
unsigned char*)
block);
61 VEC_ST(pixelsv1D, line_size_3, (
unsigned char*)
block);
68 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
71 register vector
unsigned char pixelsv, blockv;
74 for (
i = 0;
i <
h;
i++) {
75 blockv = vec_ld(0,
block);
76 pixelsv = VEC_LD( 0, pixels);
77 blockv = vec_avg(blockv,pixelsv);
78 vec_st(blockv, 0, (
unsigned char*)
block);
85 static void avg_pixels8_altivec(uint8_t *
block,
const uint8_t * pixels, ptrdiff_t line_size,
int h)
87 register vector
unsigned char pixelsv, blockv;
90 for (
i = 0;
i <
h;
i++) {
93 int rightside = ((
unsigned long)
block & 0x0000000F);
95 blockv = vec_ld(0,
block);
96 pixelsv = VEC_LD( 0, pixels);
99 pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,
s0,
s1));
101 pixelsv = vec_perm(blockv, pixelsv, vcprm(
s0,
s1,2,3));
104 blockv = vec_avg(blockv, pixelsv);
106 vec_st(blockv, 0,
block);
114 static void put_pixels8_xy2_altivec(uint8_t *
block,
const uint8_t *pixels, ptrdiff_t line_size,
int h)
117 register vector
unsigned char pixelsv1, pixelsv2, pixelsavg;
118 register vector
unsigned char blockv;
119 register vector
unsigned short pixelssum1, pixelssum2, temp3;
120 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
121 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
123 pixelsv1 = VEC_LD(0, pixels);
124 pixelsv2 = VEC_LD(1, pixels);
125 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
126 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
128 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
129 (vector
unsigned short)pixelsv2);
130 pixelssum1 = vec_add(pixelssum1, vctwo);
132 for (
i = 0;
i <
h ;
i++) {
133 int rightside = ((
unsigned long)
block & 0x0000000F);
134 blockv = vec_ld(0,
block);
136 pixelsv1 = unaligned_load(line_size, pixels);
137 pixelsv2 = unaligned_load(line_size+1, pixels);
138 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
139 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
140 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
141 (vector
unsigned short)pixelsv2);
142 temp3 = vec_add(pixelssum1, pixelssum2);
143 temp3 = vec_sra(temp3, vctwo);
144 pixelssum1 = vec_add(pixelssum2, vctwo);
145 pixelsavg = vec_packsu(temp3, (vector
unsigned short) vczero);
148 blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1,
s0,
s1));
150 blockv = vec_perm(blockv, pixelsavg, vcprm(
s0,
s1, 2, 3));
153 vec_st(blockv, 0,
block);
161 static void put_no_rnd_pixels8_xy2_altivec(uint8_t *
block,
const uint8_t *pixels, ptrdiff_t line_size,
int h)
164 register vector
unsigned char pixelsv1, pixelsv2, pixelsavg;
165 register vector
unsigned char blockv;
166 register vector
unsigned short pixelssum1, pixelssum2, temp3;
167 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
168 register const vector
unsigned short vcone = (
const vector
unsigned short)vec_splat_u16(1);
169 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
171 pixelsv1 = VEC_LD(0, pixels);
172 pixelsv2 = VEC_LD(1, pixels);
173 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
174 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
175 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
176 (vector
unsigned short)pixelsv2);
177 pixelssum1 = vec_add(pixelssum1, vcone);
179 for (
i = 0;
i <
h ;
i++) {
180 int rightside = ((
unsigned long)
block & 0x0000000F);
181 blockv = vec_ld(0,
block);
183 pixelsv1 = unaligned_load(line_size, pixels);
184 pixelsv2 = unaligned_load(line_size+1, pixels);
185 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
186 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
187 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
188 (vector
unsigned short)pixelsv2);
189 temp3 = vec_add(pixelssum1, pixelssum2);
190 temp3 = vec_sra(temp3, vctwo);
191 pixelssum1 = vec_add(pixelssum2, vcone);
192 pixelsavg = vec_packsu(temp3, (vector
unsigned short) vczero);
195 blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1,
s0,
s1));
197 blockv = vec_perm(blockv, pixelsavg, vcprm(
s0,
s1, 2, 3));
200 vec_st(blockv, 0,
block);
208 static void put_pixels16_xy2_altivec(uint8_t *
block,
const uint8_t * pixels, ptrdiff_t line_size,
int h)
211 register vector
unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
212 register vector
unsigned char blockv;
213 register vector
unsigned short temp3, temp4,
214 pixelssum1, pixelssum2, pixelssum3, pixelssum4;
215 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
216 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
218 pixelsv1 = VEC_LD(0, pixels);
219 pixelsv2 = VEC_LD(1, pixels);
220 pixelsv3 = VEC_MERGEL(vczero, pixelsv1);
221 pixelsv4 = VEC_MERGEL(vczero, pixelsv2);
222 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
223 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
224 pixelssum3 = vec_add((vector
unsigned short)pixelsv3,
225 (vector
unsigned short)pixelsv4);
226 pixelssum3 = vec_add(pixelssum3, vctwo);
227 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
228 (vector
unsigned short)pixelsv2);
229 pixelssum1 = vec_add(pixelssum1, vctwo);
231 for (
i = 0;
i <
h ;
i++) {
232 blockv = vec_ld(0,
block);
234 pixelsv1 = unaligned_load(line_size, pixels);
235 pixelsv2 = unaligned_load(line_size+1, pixels);
237 pixelsv3 = VEC_MERGEL(vczero, pixelsv1);
238 pixelsv4 = VEC_MERGEL(vczero, pixelsv2);
239 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
240 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
241 pixelssum4 = vec_add((vector
unsigned short)pixelsv3,
242 (vector
unsigned short)pixelsv4);
243 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
244 (vector
unsigned short)pixelsv2);
245 temp4 = vec_add(pixelssum3, pixelssum4);
246 temp4 = vec_sra(temp4, vctwo);
247 temp3 = vec_add(pixelssum1, pixelssum2);
248 temp3 = vec_sra(temp3, vctwo);
250 pixelssum3 = vec_add(pixelssum4, vctwo);
251 pixelssum1 = vec_add(pixelssum2, vctwo);
253 blockv = vec_packsu(temp3, temp4);
255 vec_st(blockv, 0,
block);
263 static void put_no_rnd_pixels16_xy2_altivec(uint8_t *
block,
const uint8_t * pixels, ptrdiff_t line_size,
int h)
266 register vector
unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
267 register vector
unsigned char blockv;
268 register vector
unsigned short temp3, temp4,
269 pixelssum1, pixelssum2, pixelssum3, pixelssum4;
270 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
271 register const vector
unsigned short vcone = (
const vector
unsigned short)vec_splat_u16(1);
272 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
274 pixelsv1 = VEC_LD(0, pixels);
275 pixelsv2 = VEC_LD(1, pixels);
276 pixelsv3 = VEC_MERGEL(vczero, pixelsv1);
277 pixelsv4 = VEC_MERGEL(vczero, pixelsv2);
278 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
279 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
280 pixelssum3 = vec_add((vector
unsigned short)pixelsv3,
281 (vector
unsigned short)pixelsv4);
282 pixelssum3 = vec_add(pixelssum3, vcone);
283 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
284 (vector
unsigned short)pixelsv2);
285 pixelssum1 = vec_add(pixelssum1, vcone);
287 for (
i = 0;
i <
h ;
i++) {
288 pixelsv1 = unaligned_load(line_size, pixels);
289 pixelsv2 = unaligned_load(line_size+1, pixels);
291 pixelsv3 = VEC_MERGEL(vczero, pixelsv1);
292 pixelsv4 = VEC_MERGEL(vczero, pixelsv2);
293 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
294 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
295 pixelssum4 = vec_add((vector
unsigned short)pixelsv3,
296 (vector
unsigned short)pixelsv4);
297 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
298 (vector
unsigned short)pixelsv2);
299 temp4 = vec_add(pixelssum3, pixelssum4);
300 temp4 = vec_sra(temp4, vctwo);
301 temp3 = vec_add(pixelssum1, pixelssum2);
302 temp3 = vec_sra(temp3, vctwo);
304 pixelssum3 = vec_add(pixelssum4, vcone);
305 pixelssum1 = vec_add(pixelssum2, vcone);
307 blockv = vec_packsu(temp3, temp4);
309 VEC_ST(blockv, 0,
block);
317 static void avg_pixels8_xy2_altivec(uint8_t *
block,
const uint8_t *pixels, ptrdiff_t line_size,
int h)
320 register vector
unsigned char pixelsv1, pixelsv2, pixelsavg;
321 register vector
unsigned char blockv, blocktemp;
322 register vector
unsigned short pixelssum1, pixelssum2, temp3;
324 register const vector
unsigned char vczero = (
const vector
unsigned char)
326 register const vector
unsigned short vctwo = (
const vector
unsigned short)
329 pixelsv1 = VEC_LD(0, pixels);
330 pixelsv2 = VEC_LD(1, pixels);
331 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
332 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
333 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
334 (vector
unsigned short)pixelsv2);
335 pixelssum1 = vec_add(pixelssum1, vctwo);
337 for (
i = 0;
i <
h ;
i++) {
338 int rightside = ((
unsigned long)
block & 0x0000000F);
339 blockv = vec_ld(0,
block);
341 pixelsv1 = unaligned_load(line_size, pixels);
342 pixelsv2 = unaligned_load(line_size+1, pixels);
344 pixelsv1 = VEC_MERGEH(vczero, pixelsv1);
345 pixelsv2 = VEC_MERGEH(vczero, pixelsv2);
346 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
347 (vector
unsigned short)pixelsv2);
348 temp3 = vec_add(pixelssum1, pixelssum2);
349 temp3 = vec_sra(temp3, vctwo);
350 pixelssum1 = vec_add(pixelssum2, vctwo);
351 pixelsavg = vec_packsu(temp3, (vector
unsigned short) vczero);
354 blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1,
s0,
s1));
356 blocktemp = vec_perm(blockv, pixelsavg, vcprm(
s0,
s1, 2, 3));
359 blockv = vec_avg(blocktemp, blockv);
360 vec_st(blockv, 0,
block);
375 c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
376 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
379 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
380 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
383 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
384 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;