41 register vector
unsigned char pixelsv1, pixelsv2;
42 register vector
unsigned char pixelsv1B, pixelsv2B;
43 register vector
unsigned char pixelsv1C, pixelsv2C;
44 register vector
unsigned char pixelsv1D, pixelsv2D;
46 register vector
unsigned char perm = vec_lvsl(0, pixels);
48 register ptrdiff_t line_size_2 = line_size << 1;
49 register ptrdiff_t line_size_3 = line_size + line_size_2;
50 register ptrdiff_t line_size_4 = line_size << 2;
57 for (i = 0; i < h; i += 4) {
58 pixelsv1 = vec_ld( 0, pixels);
59 pixelsv2 = vec_ld(15, pixels);
60 pixelsv1B = vec_ld(line_size, pixels);
61 pixelsv2B = vec_ld(15 + line_size, pixels);
62 pixelsv1C = vec_ld(line_size_2, pixels);
63 pixelsv2C = vec_ld(15 + line_size_2, pixels);
64 pixelsv1D = vec_ld(line_size_3, pixels);
65 pixelsv2D = vec_ld(15 + line_size_3, pixels);
66 vec_st(vec_perm(pixelsv1, pixelsv2, perm),
67 0, (
unsigned char*)block);
68 vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
69 line_size, (
unsigned char*)block);
70 vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
71 line_size_2, (
unsigned char*)block);
72 vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
73 line_size_3, (
unsigned char*)block);
80 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
83 register vector
unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
84 register vector
unsigned char perm = vec_lvsl(0, pixels);
87 for (i = 0; i < h; i++) {
88 pixelsv1 = vec_ld( 0, pixels);
89 pixelsv2 = vec_ld(16,pixels);
90 blockv = vec_ld(0, block);
91 pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
92 blockv = vec_avg(blockv,pixelsv);
93 vec_st(blockv, 0, (
unsigned char*)block);
100 static void avg_pixels8_altivec(
uint8_t * block,
const uint8_t * pixels, ptrdiff_t line_size,
int h)
102 register vector
unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
105 for (i = 0; i < h; i++) {
108 int rightside = ((
unsigned long)block & 0x0000000F);
110 blockv = vec_ld(0, block);
111 pixelsv1 = vec_ld( 0, pixels);
112 pixelsv2 = vec_ld(16, pixels);
113 pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
116 pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,
s0,
s1));
118 pixelsv = vec_perm(blockv, pixelsv, vcprm(
s0,
s1,2,3));
121 blockv = vec_avg(blockv, pixelsv);
123 vec_st(blockv, 0, block);
131 static void put_pixels8_xy2_altivec(
uint8_t *block,
const uint8_t *pixels, ptrdiff_t line_size,
int h)
134 register vector
unsigned char pixelsv1, pixelsv2, pixelsavg;
135 register vector
unsigned char blockv, temp1, temp2;
136 register vector
unsigned short pixelssum1, pixelssum2, temp3;
137 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
138 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
140 temp1 = vec_ld(0, pixels);
141 temp2 = vec_ld(16, pixels);
142 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
143 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
146 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
148 pixelsv1 = vec_mergeh(vczero, pixelsv1);
149 pixelsv2 = vec_mergeh(vczero, pixelsv2);
150 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
151 (vector
unsigned short)pixelsv2);
152 pixelssum1 = vec_add(pixelssum1, vctwo);
154 for (i = 0; i < h ; i++) {
155 int rightside = ((
unsigned long)block & 0x0000000F);
156 blockv = vec_ld(0, block);
158 temp1 = vec_ld(line_size, pixels);
159 temp2 = vec_ld(line_size + 16, pixels);
160 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
161 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
164 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
167 pixelsv1 = vec_mergeh(vczero, pixelsv1);
168 pixelsv2 = vec_mergeh(vczero, pixelsv2);
169 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
170 (vector
unsigned short)pixelsv2);
171 temp3 = vec_add(pixelssum1, pixelssum2);
172 temp3 = vec_sra(temp3, vctwo);
173 pixelssum1 = vec_add(pixelssum2, vctwo);
174 pixelsavg = vec_packsu(temp3, (vector
unsigned short) vczero);
177 blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1,
s0,
s1));
179 blockv = vec_perm(blockv, pixelsavg, vcprm(
s0,
s1, 2, 3));
182 vec_st(blockv, 0, block);
190 static void put_no_rnd_pixels8_xy2_altivec(
uint8_t *block,
const uint8_t *pixels, ptrdiff_t line_size,
int h)
193 register vector
unsigned char pixelsv1, pixelsv2, pixelsavg;
194 register vector
unsigned char blockv, temp1, temp2;
195 register vector
unsigned short pixelssum1, pixelssum2, temp3;
196 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
197 register const vector
unsigned short vcone = (
const vector
unsigned short)vec_splat_u16(1);
198 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
200 temp1 = vec_ld(0, pixels);
201 temp2 = vec_ld(16, pixels);
202 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
203 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
206 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
208 pixelsv1 = vec_mergeh(vczero, pixelsv1);
209 pixelsv2 = vec_mergeh(vczero, pixelsv2);
210 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
211 (vector
unsigned short)pixelsv2);
212 pixelssum1 = vec_add(pixelssum1, vcone);
214 for (i = 0; i < h ; i++) {
215 int rightside = ((
unsigned long)block & 0x0000000F);
216 blockv = vec_ld(0, block);
218 temp1 = vec_ld(line_size, pixels);
219 temp2 = vec_ld(line_size + 16, pixels);
220 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
221 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
224 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
227 pixelsv1 = vec_mergeh(vczero, pixelsv1);
228 pixelsv2 = vec_mergeh(vczero, pixelsv2);
229 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
230 (vector
unsigned short)pixelsv2);
231 temp3 = vec_add(pixelssum1, pixelssum2);
232 temp3 = vec_sra(temp3, vctwo);
233 pixelssum1 = vec_add(pixelssum2, vcone);
234 pixelsavg = vec_packsu(temp3, (vector
unsigned short) vczero);
237 blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1,
s0,
s1));
239 blockv = vec_perm(blockv, pixelsavg, vcprm(
s0,
s1, 2, 3));
242 vec_st(blockv, 0, block);
250 static void put_pixels16_xy2_altivec(
uint8_t * block,
const uint8_t * pixels, ptrdiff_t line_size,
int h)
253 register vector
unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
254 register vector
unsigned char blockv, temp1, temp2;
255 register vector
unsigned short temp3, temp4,
256 pixelssum1, pixelssum2, pixelssum3, pixelssum4;
257 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
258 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
260 temp1 = vec_ld(0, pixels);
261 temp2 = vec_ld(16, pixels);
262 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
263 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
266 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
268 pixelsv3 = vec_mergel(vczero, pixelsv1);
269 pixelsv4 = vec_mergel(vczero, pixelsv2);
270 pixelsv1 = vec_mergeh(vczero, pixelsv1);
271 pixelsv2 = vec_mergeh(vczero, pixelsv2);
272 pixelssum3 = vec_add((vector
unsigned short)pixelsv3,
273 (vector
unsigned short)pixelsv4);
274 pixelssum3 = vec_add(pixelssum3, vctwo);
275 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
276 (vector
unsigned short)pixelsv2);
277 pixelssum1 = vec_add(pixelssum1, vctwo);
279 for (i = 0; i < h ; i++) {
280 blockv = vec_ld(0, block);
282 temp1 = vec_ld(line_size, pixels);
283 temp2 = vec_ld(line_size + 16, pixels);
284 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
285 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
288 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
291 pixelsv3 = vec_mergel(vczero, pixelsv1);
292 pixelsv4 = vec_mergel(vczero, pixelsv2);
293 pixelsv1 = vec_mergeh(vczero, pixelsv1);
294 pixelsv2 = vec_mergeh(vczero, pixelsv2);
296 pixelssum4 = vec_add((vector
unsigned short)pixelsv3,
297 (vector
unsigned short)pixelsv4);
298 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
299 (vector
unsigned short)pixelsv2);
300 temp4 = vec_add(pixelssum3, pixelssum4);
301 temp4 = vec_sra(temp4, vctwo);
302 temp3 = vec_add(pixelssum1, pixelssum2);
303 temp3 = vec_sra(temp3, vctwo);
305 pixelssum3 = vec_add(pixelssum4, vctwo);
306 pixelssum1 = vec_add(pixelssum2, vctwo);
308 blockv = vec_packsu(temp3, temp4);
310 vec_st(blockv, 0, block);
318 static void put_no_rnd_pixels16_xy2_altivec(
uint8_t * block,
const uint8_t * pixels, ptrdiff_t line_size,
int h)
321 register vector
unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
322 register vector
unsigned char blockv, temp1, temp2;
323 register vector
unsigned short temp3, temp4,
324 pixelssum1, pixelssum2, pixelssum3, pixelssum4;
325 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
326 register const vector
unsigned short vcone = (
const vector
unsigned short)vec_splat_u16(1);
327 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
329 temp1 = vec_ld(0, pixels);
330 temp2 = vec_ld(16, pixels);
331 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
332 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
335 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
337 pixelsv3 = vec_mergel(vczero, pixelsv1);
338 pixelsv4 = vec_mergel(vczero, pixelsv2);
339 pixelsv1 = vec_mergeh(vczero, pixelsv1);
340 pixelsv2 = vec_mergeh(vczero, pixelsv2);
341 pixelssum3 = vec_add((vector
unsigned short)pixelsv3,
342 (vector
unsigned short)pixelsv4);
343 pixelssum3 = vec_add(pixelssum3, vcone);
344 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
345 (vector
unsigned short)pixelsv2);
346 pixelssum1 = vec_add(pixelssum1, vcone);
348 for (i = 0; i < h ; i++) {
349 blockv = vec_ld(0, block);
351 temp1 = vec_ld(line_size, pixels);
352 temp2 = vec_ld(line_size + 16, pixels);
353 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
354 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
357 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
360 pixelsv3 = vec_mergel(vczero, pixelsv1);
361 pixelsv4 = vec_mergel(vczero, pixelsv2);
362 pixelsv1 = vec_mergeh(vczero, pixelsv1);
363 pixelsv2 = vec_mergeh(vczero, pixelsv2);
365 pixelssum4 = vec_add((vector
unsigned short)pixelsv3,
366 (vector
unsigned short)pixelsv4);
367 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
368 (vector
unsigned short)pixelsv2);
369 temp4 = vec_add(pixelssum3, pixelssum4);
370 temp4 = vec_sra(temp4, vctwo);
371 temp3 = vec_add(pixelssum1, pixelssum2);
372 temp3 = vec_sra(temp3, vctwo);
374 pixelssum3 = vec_add(pixelssum4, vcone);
375 pixelssum1 = vec_add(pixelssum2, vcone);
377 blockv = vec_packsu(temp3, temp4);
379 vec_st(blockv, 0, block);
387 static void avg_pixels8_xy2_altivec(
uint8_t *block,
const uint8_t *pixels, ptrdiff_t line_size,
int h)
390 register vector
unsigned char pixelsv1, pixelsv2, pixelsavg;
391 register vector
unsigned char blockv, temp1, temp2, blocktemp;
392 register vector
unsigned short pixelssum1, pixelssum2, temp3;
394 register const vector
unsigned char vczero = (
const vector
unsigned char)
396 register const vector
unsigned short vctwo = (
const vector
unsigned short)
399 temp1 = vec_ld(0, pixels);
400 temp2 = vec_ld(16, pixels);
401 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
402 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
405 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
407 pixelsv1 = vec_mergeh(vczero, pixelsv1);
408 pixelsv2 = vec_mergeh(vczero, pixelsv2);
409 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
410 (vector
unsigned short)pixelsv2);
411 pixelssum1 = vec_add(pixelssum1, vctwo);
413 for (i = 0; i < h ; i++) {
414 int rightside = ((
unsigned long)block & 0x0000000F);
415 blockv = vec_ld(0, block);
417 temp1 = vec_ld(line_size, pixels);
418 temp2 = vec_ld(line_size + 16, pixels);
419 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
420 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
423 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
426 pixelsv1 = vec_mergeh(vczero, pixelsv1);
427 pixelsv2 = vec_mergeh(vczero, pixelsv2);
428 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
429 (vector
unsigned short)pixelsv2);
430 temp3 = vec_add(pixelssum1, pixelssum2);
431 temp3 = vec_sra(temp3, vctwo);
432 pixelssum1 = vec_add(pixelssum2, vctwo);
433 pixelsavg = vec_packsu(temp3, (vector
unsigned short) vczero);
436 blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1,
s0,
s1));
438 blocktemp = vec_perm(blockv, pixelsavg, vcprm(
s0,
s1, 2, 3));
441 blockv = vec_avg(blocktemp, blockv);
442 vec_st(blockv, 0, block);