36 static void get_pixels_altivec(int16_t *restrict
block,
const uint8_t *pixels,
40 vector
unsigned char perm =
41 (vector
unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
42 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
43 const vector
unsigned char zero =
44 (
const vector
unsigned char) vec_splat_u8(0);
46 for (
i = 0;
i < 8;
i++) {
50 vector
unsigned char bytes = vec_vsx_ld(0, pixels);
54 vector
signed short shorts = (vector
signed short) vec_perm(bytes,
zero,
perm);
57 vec_vsx_st(shorts,
i * 16, (vector
signed short *)
block);
63 static void get_pixels_altivec(int16_t *restrict
block,
const uint8_t *pixels,
69 for (
i = 0;
i < 8;
i++) {
74 vec_u8 pixl = vec_ld(0, pixels);
75 vec_u8 pixr = vec_ld(7, pixels);
91 static void diff_pixels_altivec(int16_t *restrict
block,
const uint8_t *s1,
92 const uint8_t *s2, ptrdiff_t
stride)
95 const vector
unsigned char zero =
96 (
const vector
unsigned char) vec_splat_u8(0);
97 vector
signed short shorts1, shorts2;
99 for (
i = 0;
i < 4;
i++) {
103 vector
unsigned char bytes = vec_vsx_ld(0, s1);
106 shorts1 = (vector
signed short) vec_mergeh(bytes,
zero);
109 bytes =vec_vsx_ld(0, s2);
112 shorts2 = (vector
signed short) vec_mergeh(bytes,
zero);
115 shorts1 = vec_sub(shorts1, shorts2);
118 vec_vsx_st(shorts1, 0, (vector
signed short *)
block);
130 bytes = vec_vsx_ld(0, s1);
133 shorts1 = (vector
signed short) vec_mergeh(bytes,
zero);
136 bytes = vec_vsx_ld(0, s2);
139 shorts2 = (vector
signed short) vec_mergeh(bytes,
zero);
142 shorts1 = vec_sub(shorts1, shorts2);
145 vec_vsx_st(shorts1, 0, (vector
signed short *)
block);
153 static void diff_pixels_altivec(int16_t *restrict
block,
const uint8_t *s1,
154 const uint8_t *s2, ptrdiff_t
stride)
161 for (
i = 0;
i < 4;
i++) {
165 perm = vec_lvsl(0, s1);
166 vec_u8 pixl = vec_ld(0, s1);
167 vec_u8 pixr = vec_ld(15, s1);
174 perm = vec_lvsl(0, s2);
175 pixl = vec_ld(0, s2);
176 pixr = vec_ld(15, s2);
177 bytes = vec_perm(pixl, pixr,
perm);
183 shorts1 = vec_sub(shorts1, shorts2);
198 perm = vec_lvsl(0, s1);
199 pixl = vec_ld(0, s1);
200 pixr = vec_ld(15, s1);
201 bytes = vec_perm(pixl, pixr,
perm);
207 perm = vec_lvsl(0, s2);
208 pixl = vec_ld(0, s2);
209 pixr = vec_ld(15, s2);
210 bytes = vec_perm(pixl, pixr,
perm);
216 shorts1 = vec_sub(shorts1, shorts2);
232 static void get_pixels_vsx(int16_t *restrict
block,
const uint8_t *pixels,
236 for (
i = 0;
i < 8;
i++) {
237 vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
239 vec_vsx_st(shorts,
i * 16,
block);
245 static void diff_pixels_vsx(int16_t *restrict
block,
const uint8_t *s1,
246 const uint8_t *s2, ptrdiff_t
stride)
250 for (
i = 0;
i < 8;
i++) {
251 shorts1 = vsx_ld_u8_s16(0, s1);
252 shorts2 = vsx_ld_u8_s16(0, s2);
254 shorts1 = vec_sub(shorts1, shorts2);
256 vec_vsx_st(shorts1, 0,
block);
267 unsigned high_bit_depth)
273 c->diff_pixels = diff_pixels_altivec;
275 if (!high_bit_depth) {
276 c->get_pixels = get_pixels_altivec;
284 c->diff_pixels = diff_pixels_vsx;
287 c->get_pixels = get_pixels_vsx;