40 { 36, 68, 60, 92, 34, 66, 58, 90, },
41 { 100, 4, 124, 28, 98, 2, 122, 26, },
42 { 52, 84, 44, 76, 50, 82, 42, 74, },
43 { 116, 20, 108, 12, 114, 18, 106, 10, },
44 { 32, 64, 56, 88, 38, 70, 62, 94, },
45 { 96, 0, 120, 24, 102, 6, 126, 30, },
46 { 48, 80, 40, 72, 54, 86, 46, 78, },
47 { 112, 16, 104, 8, 118, 22, 110, 14, },
48 { 36, 68, 60, 92, 34, 66, 58, 90, },
52 64, 64, 64, 64, 64, 64, 64, 64
68 const int32_t *filterPos,
int filterSize)
73 const uint16_t *
src = (
const uint16_t *) _src;
83 for (
i = 0;
i < dstW;
i++) {
85 int srcPos = filterPos[
i];
88 for (j = 0; j < filterSize; j++) {
98 const int32_t *filterPos,
int filterSize)
102 const uint16_t *
src = (
const uint16_t *) _src;
103 int sh =
desc->comp[0].depth - 1;
111 for (
i = 0;
i < dstW;
i++) {
113 int srcPos = filterPos[
i];
116 for (j = 0; j < filterSize; j++) {
120 dst[
i] =
FFMIN(
val >> sh, (1 << 15) - 1);
127 const int32_t *filterPos,
int filterSize)
130 for (
i = 0;
i < dstW;
i++) {
132 int srcPos = filterPos[
i];
134 for (j = 0; j < filterSize; j++) {
143 const int32_t *filterPos,
int filterSize)
147 for (
i = 0;
i < dstW;
i++) {
149 int srcPos = filterPos[
i];
151 for (j = 0; j < filterSize; j++) {
164 dstU[
i] = (
FFMIN(dstU[
i], 30775) * 4663 - 9289992) >> 12;
165 dstV[
i] = (
FFMIN(dstV[
i], 30775) * 4663 - 9289992) >> 12;
173 dstU[
i] = (dstU[
i] * 1799 + 4081085) >> 11;
174 dstV[
i] = (dstV[
i] * 1799 + 4081085) >> 11;
182 dst[
i] = (
FFMIN(dst[
i], 30189) * 19077 - 39057361) >> 14;
189 dst[
i] = (dst[
i] * 14071 + 33561947) >> 14;
198 dstU[
i] = (
FFMIN(dstU[
i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
199 dstV[
i] = (
FFMIN(dstV[
i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
209 dstU[
i] = (dstU[
i] * 1799 + (4081085 << 4)) >> 11;
210 dstV[
i] = (dstV[
i] * 1799 + (4081085 << 4)) >> 11;
219 dst[
i] = ((
int)(
FFMIN(dst[
i], 30189 << 4) * 4769
U - (39057361 << 2))) >> 12;
228 dst[
i] = (dst[
i]*(14071/4) + (33561947<<4)/4)>>12;
232 #define DEBUG_SWSCALE_BUFFERS 0
233 #define DEBUG_BUFFERS(...) \
234 if (DEBUG_SWSCALE_BUFFERS) \
235 av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
238 int srcStride[],
int srcSliceY,
239 int srcSliceH,
uint8_t *dst[],
int dstStride[])
243 const int dstW =
c->dstW;
244 const int dstH =
c->dstH;
247 const int flags =
c->flags;
248 int32_t *vLumFilterPos =
c->vLumFilterPos;
249 int32_t *vChrFilterPos =
c->vChrFilterPos;
251 const int vLumFilterSize =
c->vLumFilterSize;
252 const int vChrFilterSize =
c->vChrFilterSize;
261 const int chrSrcSliceY = srcSliceY >>
c->chrSrcVSubSample;
262 const int chrSrcSliceH =
AV_CEIL_RSHIFT(srcSliceH,
c->chrSrcVSubSample);
263 int should_dither =
isNBPS(
c->srcFormat) ||
269 int lumBufIndex =
c->lumBufIndex;
270 int chrBufIndex =
c->chrBufIndex;
271 int lastInLumBuf =
c->lastInLumBuf;
272 int lastInChrBuf =
c->lastInChrBuf;
276 int lumEnd =
c->descIndex[0];
277 int chrStart = lumEnd;
278 int chrEnd =
c->descIndex[1];
280 int vEnd =
c->numDesc;
281 SwsSlice *src_slice = &
c->slice[lumStart];
282 SwsSlice *hout_slice = &
c->slice[
c->numSlice-2];
283 SwsSlice *vout_slice = &
c->slice[
c->numSlice-1];
287 int needAlpha =
c->needAlpha;
301 srcStride[3] = srcStride[0];
303 srcStride[1] *= 1 <<
c->vChrDrop;
304 srcStride[2] *= 1 <<
c->vChrDrop;
306 DEBUG_BUFFERS(
"swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
307 src[0], srcStride[0],
src[1], srcStride[1],
308 src[2], srcStride[2],
src[3], srcStride[3],
309 dst[0], dstStride[0], dst[1], dstStride[1],
310 dst[2], dstStride[2], dst[3], dstStride[3]);
311 DEBUG_BUFFERS(
"srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
312 srcSliceY, srcSliceH, dstY, dstH);
314 vLumFilterSize, vChrFilterSize);
316 if (dstStride[0]&15 || dstStride[1]&15 ||
317 dstStride[2]&15 || dstStride[3]&15) {
318 static int warnedAlready = 0;
321 "Warning: dstStride is not aligned!\n"
322 " ->cannot do aligned memory accesses anymore\n");
327 if ( (uintptr_t)dst[0]&15 || (uintptr_t)dst[1]&15 || (uintptr_t)dst[2]&15
328 || (uintptr_t)
src[0]&15 || (uintptr_t)
src[1]&15 || (uintptr_t)
src[2]&15
329 || dstStride[0]&15 || dstStride[1]&15 || dstStride[2]&15 || dstStride[3]&15
330 || srcStride[0]&15 || srcStride[1]&15 || srcStride[2]&15 || srcStride[3]&15
332 static int warnedAlready=0;
343 if (srcSliceY == 0) {
351 if (!should_dither) {
357 yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX,
c->use_mmx_vfilter);
360 srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
363 dstY, dstH, dstY >>
c->chrDstVSubSample,
365 if (srcSliceY == 0) {
375 hout_slice->
width = dstW;
378 for (; dstY < dstH; dstY++) {
379 const int chrDstY = dstY >>
c->chrDstVSubSample;
380 int use_mmx_vfilter=
c->use_mmx_vfilter;
383 const int firstLumSrcY =
FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
384 const int firstLumSrcY2 =
FFMAX(1 - vLumFilterSize, vLumFilterPos[
FFMIN(dstY | ((1 <<
c->chrDstVSubSample) - 1), dstH - 1)]);
386 const int firstChrSrcY =
FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
389 int lastLumSrcY =
FFMIN(
c->srcH, firstLumSrcY + vLumFilterSize) - 1;
390 int lastLumSrcY2 =
FFMIN(
c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
391 int lastChrSrcY =
FFMIN(
c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
395 int posY, cPosY, firstPosY, lastPosY, firstCPosY, lastCPosY;
398 if (firstLumSrcY > lastInLumBuf) {
400 hasLumHoles = lastInLumBuf != firstLumSrcY - 1;
408 lastInLumBuf = firstLumSrcY - 1;
410 if (firstChrSrcY > lastInChrBuf) {
412 hasChrHoles = lastInChrBuf != firstChrSrcY - 1;
420 lastInChrBuf = firstChrSrcY - 1;
424 DEBUG_BUFFERS(
"\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
425 firstLumSrcY, lastLumSrcY, lastInLumBuf);
426 DEBUG_BUFFERS(
"\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
427 firstChrSrcY, lastChrSrcY, lastInChrBuf);
430 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
431 lastChrSrcY <
AV_CEIL_RSHIFT(srcSliceY + srcSliceH,
c->chrSrcVSubSample);
434 lastLumSrcY = srcSliceY + srcSliceH - 1;
435 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
436 DEBUG_BUFFERS(
"buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
437 lastLumSrcY, lastChrSrcY);
445 if (posY <= lastLumSrcY && !hasLumHoles) {
446 firstPosY =
FFMAX(firstLumSrcY, posY);
450 lastPosY = lastLumSrcY;
454 if (cPosY <= lastChrSrcY && !hasChrHoles) {
455 firstCPosY =
FFMAX(firstChrSrcY, cPosY);
459 lastCPosY = lastChrSrcY;
464 if (posY < lastLumSrcY + 1) {
465 for (
i = lumStart;
i < lumEnd; ++
i)
469 lumBufIndex += lastLumSrcY - lastInLumBuf;
470 lastInLumBuf = lastLumSrcY;
472 if (cPosY < lastChrSrcY + 1) {
473 for (
i = chrStart;
i < chrEnd; ++
i)
477 chrBufIndex += lastChrSrcY - lastInChrBuf;
478 lastInChrBuf = lastChrSrcY;
481 if (lumBufIndex >= vLumFilterSize)
482 lumBufIndex -= vLumFilterSize;
483 if (chrBufIndex >= vChrFilterSize)
484 chrBufIndex -= vChrFilterSize;
490 lastInLumBuf, lastInChrBuf);
496 if (dstY >= dstH - 2) {
500 &yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX);
503 yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, use_mmx_vfilter);
507 for (
i = vStart;
i < vEnd; ++
i)
513 int height = dstY - lastDstY;
518 1,
desc->comp[3].depth,
524 #if HAVE_MMXEXT_INLINE
526 __asm__
volatile (
"sfence" :::
"memory");
532 c->lumBufIndex = lumBufIndex;
533 c->chrBufIndex = chrBufIndex;
534 c->lastInLumBuf = lastInLumBuf;
535 c->lastInChrBuf = lastInChrBuf;
537 return dstY - lastDstY;
542 c->lumConvertRange =
NULL;
543 c->chrConvertRange =
NULL;
544 if (
c->srcRange !=
c->dstRange && !
isAnyRGB(
c->dstFormat)) {
545 if (
c->dstBpc <= 14) {
570 &
c->yuv2nv12cX, &
c->yuv2packed1,
571 &
c->yuv2packed2, &
c->yuv2packedX, &
c->yuv2anyX);
576 if (
c->srcBpc == 8) {
577 if (
c->dstBpc <= 14) {
595 c->needs_hcscale = 1;
627 const int linesizes[4])
634 for (
i = 0;
i < 4;
i++) {
649 for (yp=0; yp<
h; yp++) {
650 for (xp=0; xp+2<
stride; xp+=3) {
651 int x, y, z,
r,
g,
b;
663 x =
c->xyzgamma[x>>4];
664 y =
c->xyzgamma[y>>4];
665 z =
c->xyzgamma[z>>4];
668 r =
c->xyz2rgb_matrix[0][0] * x +
669 c->xyz2rgb_matrix[0][1] * y +
670 c->xyz2rgb_matrix[0][2] * z >> 12;
671 g =
c->xyz2rgb_matrix[1][0] * x +
672 c->xyz2rgb_matrix[1][1] * y +
673 c->xyz2rgb_matrix[1][2] * z >> 12;
674 b =
c->xyz2rgb_matrix[2][0] * x +
675 c->xyz2rgb_matrix[2][1] * y +
676 c->xyz2rgb_matrix[2][2] * z >> 12;
679 r = av_clip_uintp2(
r, 12);
680 g = av_clip_uintp2(
g, 12);
681 b = av_clip_uintp2(
b, 12);
685 AV_WB16(dst + xp + 0,
c->rgbgamma[
r] << 4);
686 AV_WB16(dst + xp + 1,
c->rgbgamma[
g] << 4);
687 AV_WB16(dst + xp + 2,
c->rgbgamma[
b] << 4);
689 AV_WL16(dst + xp + 0,
c->rgbgamma[
r] << 4);
690 AV_WL16(dst + xp + 1,
c->rgbgamma[
g] << 4);
691 AV_WL16(dst + xp + 2,
c->rgbgamma[
b] << 4);
705 for (yp=0; yp<
h; yp++) {
706 for (xp=0; xp+2<
stride; xp+=3) {
707 int x, y, z,
r,
g,
b;
719 r =
c->rgbgammainv[
r>>4];
720 g =
c->rgbgammainv[
g>>4];
721 b =
c->rgbgammainv[
b>>4];
724 x =
c->rgb2xyz_matrix[0][0] *
r +
725 c->rgb2xyz_matrix[0][1] *
g +
726 c->rgb2xyz_matrix[0][2] *
b >> 12;
727 y =
c->rgb2xyz_matrix[1][0] *
r +
728 c->rgb2xyz_matrix[1][1] *
g +
729 c->rgb2xyz_matrix[1][2] *
b >> 12;
730 z =
c->rgb2xyz_matrix[2][0] *
r +
731 c->rgb2xyz_matrix[2][1] *
g +
732 c->rgb2xyz_matrix[2][2] *
b >> 12;
735 x = av_clip_uintp2(x, 12);
736 y = av_clip_uintp2(y, 12);
737 z = av_clip_uintp2(z, 12);
741 AV_WB16(dst + xp + 0,
c->xyzgammainv[x] << 4);
742 AV_WB16(dst + xp + 1,
c->xyzgammainv[y] << 4);
743 AV_WB16(dst + xp + 2,
c->xyzgammainv[z] << 4);
745 AV_WL16(dst + xp + 0,
c->xyzgammainv[x] << 4);
746 AV_WL16(dst + xp + 1,
c->xyzgammainv[y] << 4);
747 AV_WL16(dst + xp + 2,
c->xyzgammainv[z] << 4);
760 const uint8_t *
const srcSlice[],
761 const int srcStride[],
int srcSliceY,
762 int srcSliceH,
uint8_t *
const dst[],
763 const int dstStride[])
769 int macro_height =
isBayer(
c->srcFormat) ? 2 : (1 <<
c->chrSrcVSubSample);
773 int srcSliceY_internal = srcSliceY;
775 if (!srcStride || !dstStride || !dst || !srcSlice) {
776 av_log(
c,
AV_LOG_ERROR,
"One of the input parameters to sws_scale() is NULL, please check the calling code\n");
780 for (
i=0;
i<4;
i++) {
781 srcStride2[
i] = srcStride[
i];
782 dstStride2[
i] = dstStride[
i];
785 if ((srcSliceY & (macro_height-1)) ||
786 ((srcSliceH& (macro_height-1)) && srcSliceY + srcSliceH !=
c->srcH) ||
787 srcSliceY + srcSliceH >
c->srcH) {
792 if (
c->gamma_flag &&
c->cascaded_context[0]) {
796 srcSlice, srcStride, srcSliceY, srcSliceH,
797 c->cascaded_tmp,
c->cascaded_tmpStride);
802 if (
c->cascaded_context[2])
803 ret =
sws_scale(
c->cascaded_context[1], (
const uint8_t *
const *)
c->cascaded_tmp,
c->cascaded_tmpStride, srcSliceY, srcSliceH,
c->cascaded1_tmp,
c->cascaded1_tmpStride);
805 ret =
sws_scale(
c->cascaded_context[1], (
const uint8_t *
const *)
c->cascaded_tmp,
c->cascaded_tmpStride, srcSliceY, srcSliceH, dst, dstStride);
810 if (
c->cascaded_context[2]) {
812 (
const uint8_t *
const *)
c->cascaded1_tmp,
c->cascaded1_tmpStride,
c->cascaded_context[1]->dstY -
ret,
c->cascaded_context[1]->dstY,
818 if (
c->cascaded_context[0] && srcSliceY == 0 && srcSliceH ==
c->cascaded_context[0]->srcH) {
820 srcSlice, srcStride, srcSliceY, srcSliceH,
821 c->cascaded_tmp,
c->cascaded_tmpStride);
825 (
const uint8_t *
const * )
c->cascaded_tmp,
c->cascaded_tmpStride, 0,
c->cascaded_context[0]->dstH,
830 memcpy(src2, srcSlice,
sizeof(src2));
831 memcpy(dst2, dst,
sizeof(dst2));
846 if (
c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH !=
c->srcH) {
850 if (
c->sliceDir == 0) {
851 if (srcSliceY == 0)
c->sliceDir = 1;
else c->sliceDir = -1;
855 for (
i = 0;
i < 256;
i++) {
856 int r,
g,
b, y,
u, v,
a = 0xff;
858 uint32_t p = ((
const uint32_t *)(srcSlice[1]))[
i];
859 a = (p >> 24) & 0xFF;
860 r = (p >> 16) & 0xFF;
865 g = ((
i >> 2) & 7) * 36;
869 g = ((
i >> 3) & 7) * 36;
872 r = (
i >> 3 ) * 255;
873 g = ((
i >> 1) & 3) * 85;
879 b = (
i >> 3 ) * 255;
880 g = ((
i >> 1) & 3) * 85;
883 #define RGB2YUV_SHIFT 15
884 #define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
885 #define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
886 #define BU ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
887 #define GY ( (int) (0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
888 #define GV (-(int) (0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
889 #define GU (-(int) (0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
890 #define RY ( (int) (0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
891 #define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
892 #define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
897 c->pal_yuv[
i]= y + (
u<<8) + (v<<16) + ((unsigned)
a<<24);
899 switch (
c->dstFormat) {
904 c->pal_rgb[
i]=
r + (
g<<8) + (
b<<16) + ((unsigned)
a<<24);
910 c->pal_rgb[
i]=
a + (
r<<8) + (
g<<16) + ((unsigned)
b<<24);
916 c->pal_rgb[
i]=
a + (
b<<8) + (
g<<16) + ((unsigned)
r<<24);
923 c->pal_rgb[
i]=
b + (
g<<8) + (
r<<16) + ((unsigned)
a<<24);
928 if (
c->src0Alpha && !
c->dst0Alpha &&
isALPHA(
c->dstFormat)) {
935 base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
936 for (y=0; y<srcSliceH; y++){
937 memcpy(
base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*
c->srcW);
938 for (x=
c->src0Alpha-1; x<4*c->srcW; x+=4) {
939 base[ srcStride[0]*y + x] = 0xFF;
945 if (
c->srcXYZ && !(
c->dstXYZ &&
c->srcW==
c->dstW &&
c->srcH==
c->dstH)) {
951 base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
953 xyz12Torgb48(
c, (uint16_t*)
base, (
const uint16_t*)src2[0], srcStride[0]/2, srcSliceH);
958 for (
i = 0;
i < 4;
i++)
959 memset(
c->dither_error[
i], 0,
sizeof(
c->dither_error[0][0]) * (
c->dstW+2));
961 if (
c->sliceDir != 1) {
963 for (
i=0;
i<4;
i++) {
968 src2[0] += (srcSliceH - 1) * srcStride[0];
970 src2[1] += ((srcSliceH >>
c->chrSrcVSubSample) - 1) * srcStride[1];
971 src2[2] += ((srcSliceH >>
c->chrSrcVSubSample) - 1) * srcStride[2];
972 src2[3] += (srcSliceH - 1) * srcStride[3];
973 dst2[0] += (
c->dstH - 1) * dstStride[0];
974 dst2[1] += ((
c->dstH >>
c->chrDstVSubSample) - 1) * dstStride[1];
975 dst2[2] += ((
c->dstH >>
c->chrDstVSubSample) - 1) * dstStride[2];
976 dst2[3] += (
c->dstH - 1) * dstStride[3];
978 srcSliceY_internal =
c->srcH-srcSliceY-srcSliceH;
984 if (srcSliceY_internal + srcSliceH ==
c->srcH)
986 ret =
c->swscale(
c, src2, srcStride2, srcSliceY_internal, srcSliceH, dst2, dstStride2);
989 if (
c->dstXYZ && !(
c->srcXYZ &&
c->srcW==
c->dstW &&
c->srcH==
c->dstH)) {
990 int dstY =
c->dstY ?
c->dstY : srcSliceY + srcSliceH;
991 uint16_t *dst16 = (uint16_t*)(dst2[0] + (dstY -
ret) * dstStride2[0]);