90 const uint8_t *mm_end = end - 3;
93 register unsigned x = *((
const uint32_t *)s);
94 *((uint32_t *)d) = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
99 register unsigned short x = *((
const uint16_t *)s);
100 *((uint16_t *)d) = (x & 0x7FFF) + (x & 0x7FE0);
109 const uint8_t *mm_end = end - 3;
112 register uint32_t x = *((
const uint32_t *)s);
113 *((uint32_t *)d) = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
118 register uint16_t x = *((
const uint16_t *)s);
119 *((uint16_t *)d) = ((x >> 1) & 0x7FE0) | (x & 0x001F);
125 uint16_t *d = (uint16_t *)dst;
130 register int rgb = *(
const uint32_t *)s;
132 *d++ = ((rgb & 0xFF) >> 3) +
133 ((rgb & 0xFC00) >> 5) +
134 ((rgb & 0xF80000) >> 8);
141 uint16_t *d = (uint16_t *)dst;
146 register int rgb = *(
const uint32_t *)s;
148 *d++ = ((rgb & 0xF8) << 8) +
149 ((rgb & 0xFC00) >> 5) +
150 ((rgb & 0xF80000) >> 19);
156 uint16_t *d = (uint16_t *)dst;
161 register int rgb = *(
const uint32_t *)s;
163 *d++ = ((rgb & 0xFF) >> 3) +
164 ((rgb & 0xF800) >> 6) +
165 ((rgb & 0xF80000) >> 9);
172 uint16_t *d = (uint16_t *)dst;
177 register int rgb = *(
const uint32_t *)s;
179 *d++ = ((rgb & 0xF8) << 7) +
180 ((rgb & 0xF800) >> 6) +
181 ((rgb & 0xF80000) >> 19);
188 uint16_t *d = (uint16_t *)dst;
196 *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
202 uint16_t *d = (uint16_t *)dst;
210 *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
217 uint16_t *d = (uint16_t *)dst;
225 *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
231 uint16_t *d = (uint16_t *)dst;
239 *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
247 const uint16_t *
s = (
const uint16_t *)src;
248 const uint16_t *
end = s + src_size / 2;
251 register uint16_t bgr = *s++;
252 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
253 *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
254 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
262 const uint16_t *
s = (
const uint16_t *)src;
263 const uint16_t *
end = s + src_size / 2;
266 register uint16_t bgr = *s++;
267 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
268 *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
269 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
276 const uint16_t *
s = (
const uint16_t *)src;
277 const uint16_t *
end = s + src_size / 2;
280 register uint16_t bgr = *s++;
283 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
284 *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
285 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
287 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
288 *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
289 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
298 const uint16_t *
s = (
const uint16_t *)src;
299 const uint16_t *
end = s + src_size / 2;
302 register uint16_t bgr = *s++;
305 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
306 *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
307 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
309 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
310 *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
311 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
320 int idx = 15 - src_size;
324 for (; idx < 15; idx += 4) {
325 register int v = *(
const uint32_t *)&s[idx],
g = v & 0xff00ff00;
327 *(uint32_t *)&d[idx] = (v >> 16) +
g + (v << 16);
335 for (i = 0; i < src_size; i += 3) {
336 register uint8_t x = src[i + 2];
337 dst[i + 1] = src[i + 1];
338 dst[i + 2] = src[i + 0];
346 int lumStride,
int chromStride,
347 int dstStride,
int vertLumPerChroma)
350 const int chromWidth = width >> 1;
352 for (y = 0; y <
height; y++) {
354 uint64_t *ldst = (uint64_t *)dst;
355 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
356 for (i = 0; i < chromWidth; i += 2) {
357 uint64_t k = yc[0] + (uc[0] << 8) +
358 (yc[1] << 16) + ((unsigned) vc[0] << 24);
359 uint64_t l = yc[2] + (uc[1] << 8) +
360 (yc[3] << 16) + ((unsigned) vc[1] << 24);
361 *ldst++ = k + (l << 32);
369 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
371 for (i = 0; i < chromWidth; i++) {
373 *idst++ = (yc[0] << 24) + (uc[0] << 16) +
374 (yc[1] << 8) + (vc[0] << 0);
376 *idst++ = yc[0] + (uc[0] << 8) +
377 (yc[1] << 16) + (vc[0] << 24);
384 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
400 int chromStride,
int dstStride)
404 chromStride, dstStride, 2);
410 int lumStride,
int chromStride,
411 int dstStride,
int vertLumPerChroma)
414 const int chromWidth = width >> 1;
416 for (y = 0; y <
height; y++) {
418 uint64_t *ldst = (uint64_t *)dst;
419 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
420 for (i = 0; i < chromWidth; i += 2) {
421 uint64_t k = uc[0] + (yc[0] << 8) +
422 (vc[0] << 16) + ((unsigned) yc[1] << 24);
423 uint64_t l = uc[1] + (yc[2] << 8) +
424 (vc[1] << 16) + ((unsigned) yc[3] << 24);
425 *ldst++ = k + (l << 32);
433 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
435 for (i = 0; i < chromWidth; i++) {
437 *idst++ = (uc[0] << 24) + (yc[0] << 16) +
438 (vc[0] << 8) + (yc[1] << 0);
440 *idst++ = uc[0] + (yc[0] << 8) +
441 (vc[0] << 16) + (yc[1] << 24);
448 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
464 int chromStride,
int dstStride)
468 chromStride, dstStride, 2);
477 int chromStride,
int dstStride)
480 chromStride, dstStride, 1);
489 int chromStride,
int dstStride)
492 chromStride, dstStride, 1);
502 int chromStride,
int srcStride)
505 const int chromWidth = width >> 1;
507 for (y = 0; y <
height; y += 2) {
509 for (i = 0; i < chromWidth; i++) {
510 ydst[2 * i + 0] = src[4 * i + 0];
511 udst[i] = src[4 * i + 1];
512 ydst[2 * i + 1] = src[4 * i + 2];
513 vdst[i] = src[4 * i + 3];
518 for (i = 0; i < chromWidth; i++) {
519 ydst[2 * i + 0] = src[4 * i + 0];
520 ydst[2 * i + 1] = src[4 * i + 2];
530 int srcHeight,
int srcStride,
int dstStride)
537 for (x = 0; x < srcWidth - 1; x++) {
538 dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
539 dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
541 dst[2 * srcWidth - 1] = src[srcWidth - 1];
545 for (y = 1; y < srcHeight; y++) {
546 const int mmxSize = 1;
548 dst[0] = (src[0] * 3 + src[srcStride]) >> 2;
549 dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
551 for (x = mmxSize - 1; x < srcWidth - 1; x++) {
552 dst[2 * x + 1] = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
553 dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
554 dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride]) >> 2;
555 dst[2 * x + 2] = (src[x + 1] * 3 + src[x + srcStride]) >> 2;
557 dst[srcWidth * 2 - 1] = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
558 dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
560 dst += dstStride * 2;
567 for (x = 0; x < srcWidth - 1; x++) {
568 dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
569 dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
571 dst[2 * srcWidth - 1] = src[srcWidth - 1];
583 int chromStride,
int srcStride)
586 const int chromWidth = width >> 1;
588 for (y = 0; y <
height; y += 2) {
590 for (i = 0; i < chromWidth; i++) {
591 udst[i] = src[4 * i + 0];
592 ydst[2 * i + 0] = src[4 * i + 1];
593 vdst[i] = src[4 * i + 2];
594 ydst[2 * i + 1] = src[4 * i + 3];
599 for (i = 0; i < chromWidth; i++) {
600 ydst[2 * i + 0] = src[4 * i + 1];
601 ydst[2 * i + 1] = src[4 * i + 3];
625 const int chromWidth = width >> 1;
627 for (y = 0; y <
height; y += 2) {
629 for (i = 0; i < chromWidth; i++) {
630 unsigned int b = src[6 * i + 0];
631 unsigned int g = src[6 * i + 1];
632 unsigned int r = src[6 * i + 2];
655 for (i = 0; i < chromWidth; i++) {
656 unsigned int b = src[6 * i + 0];
657 unsigned int g = src[6 * i + 1];
658 unsigned int r = src[6 * i + 2];
680 int src1Stride,
int src2Stride,
int dstStride)
684 for (h = 0; h <
height; h++) {
686 for (w = 0; w <
width; w++) {
687 dest[2 * w + 0] = src1[w];
688 dest[2 * w + 1] = src2[w];
698 int dst1Stride,
int dst2Stride)
702 for (h = 0; h <
height; h++) {
704 for (w = 0; w <
width; w++) {
705 dst1[w] = src[2 * w + 0];
706 dst2[w] = src[2 * w + 1];
717 int srcStride1,
int srcStride2,
718 int dstStride1,
int dstStride2)
724 for (y = 0; y < h; y++) {
725 const uint8_t *
s1 = src1 + srcStride1 * (y >> 1);
727 for (x = 0; x < w; x++)
728 d[2 * x] = d[2 * x + 1] = s1[x];
730 for (y = 0; y < h; y++) {
731 const uint8_t *
s2 = src2 + srcStride2 * (y >> 1);
733 for (x = 0; x < w; x++)
734 d[2 * x] = d[2 * x + 1] = s2[x];
741 int srcStride1,
int srcStride2,
742 int srcStride3,
int dstStride)
748 for (y = 0; y < h; y++) {
749 const uint8_t *yp = src1 + srcStride1 *
y;
750 const uint8_t *up = src2 + srcStride2 * (y >> 2);
751 const uint8_t *vp = src3 + srcStride3 * (y >> 2);
753 for (x = 0; x < w; x++) {
754 const int x2 = x << 2;
755 d[8 * x + 0] = yp[x2];
756 d[8 * x + 1] = up[x];
757 d[8 * x + 2] = yp[x2 + 1];
758 d[8 * x + 3] = vp[x];
759 d[8 * x + 4] = yp[x2 + 2];
760 d[8 * x + 5] = up[x];
761 d[8 * x + 6] = yp[x2 + 3];
762 d[8 * x + 7] = vp[x];
786 dst0[
count] = src[4 * count + 0];
787 dst1[
count] = src[4 * count + 2];
801 dst0[
count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
802 dst1[
count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
816 dst0[
count] = src[4 * count + 0];
817 dst1[
count] = src[4 * count + 2];
833 dst0[
count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
834 dst1[
count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
841 int lumStride,
int chromStride,
int srcStride)
846 for (y = 0; y <
height; y++) {
861 int lumStride,
int chromStride,
int srcStride)
866 for (y = 0; y <
height; y++) {
879 int lumStride,
int chromStride,
int srcStride)
884 for (y = 0; y <
height; y++) {
899 int lumStride,
int chromStride,
int srcStride)
904 for (y = 0; y <
height; y++) {