00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <assert.h>
00022 #include <inttypes.h>
00023 #include <math.h>
00024 #include <stdio.h>
00025 #include <string.h>
00026
00027 #include "libavutil/avassert.h"
00028 #include "libavutil/avutil.h"
00029 #include "libavutil/bswap.h"
00030 #include "libavutil/cpu.h"
00031 #include "libavutil/intreadwrite.h"
00032 #include "libavutil/mathematics.h"
00033 #include "libavutil/pixdesc.h"
00034 #include "config.h"
00035 #include "rgb2rgb.h"
00036 #include "swscale_internal.h"
00037 #include "swscale.h"
00038
00039 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
00040 { 36, 68, 60, 92, 34, 66, 58, 90, },
00041 { 100, 4, 124, 28, 98, 2, 122, 26, },
00042 { 52, 84, 44, 76, 50, 82, 42, 74, },
00043 { 116, 20, 108, 12, 114, 18, 106, 10, },
00044 { 32, 64, 56, 88, 38, 70, 62, 94, },
00045 { 96, 0, 120, 24, 102, 6, 126, 30, },
00046 { 48, 80, 40, 72, 54, 86, 46, 78, },
00047 { 112, 16, 104, 8, 118, 22, 110, 14, },
00048 };
00049
00050 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = {
00051 64, 64, 64, 64, 64, 64, 64, 64
00052 };
00053
00054 static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
00055 int height, int y, uint8_t val)
00056 {
00057 int i;
00058 uint8_t *ptr = plane + stride * y;
00059 for (i = 0; i < height; i++) {
00060 memset(ptr, val, width);
00061 ptr += stride;
00062 }
00063 }
00064
00065 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
00066 const uint8_t *_src, const int16_t *filter,
00067 const int32_t *filterPos, int filterSize)
00068 {
00069 int i;
00070 int32_t *dst = (int32_t *) _dst;
00071 const uint16_t *src = (const uint16_t *) _src;
00072 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
00073 int sh = bits - 4;
00074
00075 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
00076 sh= 9;
00077
00078 for (i = 0; i < dstW; i++) {
00079 int j;
00080 int srcPos = filterPos[i];
00081 int val = 0;
00082
00083 for (j = 0; j < filterSize; j++) {
00084 val += src[srcPos + j] * filter[filterSize * i + j];
00085 }
00086
00087 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
00088 }
00089 }
00090
00091 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
00092 const uint8_t *_src, const int16_t *filter,
00093 const int32_t *filterPos, int filterSize)
00094 {
00095 int i;
00096 const uint16_t *src = (const uint16_t *) _src;
00097 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
00098
00099 if(sh<15)
00100 sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
00101
00102 for (i = 0; i < dstW; i++) {
00103 int j;
00104 int srcPos = filterPos[i];
00105 int val = 0;
00106
00107 for (j = 0; j < filterSize; j++) {
00108 val += src[srcPos + j] * filter[filterSize * i + j];
00109 }
00110
00111 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
00112 }
00113 }
00114
00115
00116 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW,
00117 const uint8_t *src, const int16_t *filter,
00118 const int32_t *filterPos, int filterSize)
00119 {
00120 int i;
00121 for (i = 0; i < dstW; i++) {
00122 int j;
00123 int srcPos = filterPos[i];
00124 int val = 0;
00125 for (j = 0; j < filterSize; j++) {
00126 val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
00127 }
00128 dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
00129 }
00130 }
00131
00132 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW,
00133 const uint8_t *src, const int16_t *filter,
00134 const int32_t *filterPos, int filterSize)
00135 {
00136 int i;
00137 int32_t *dst = (int32_t *) _dst;
00138 for (i = 0; i < dstW; i++) {
00139 int j;
00140 int srcPos = filterPos[i];
00141 int val = 0;
00142 for (j = 0; j < filterSize; j++) {
00143 val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
00144 }
00145 dst[i] = FFMIN(val >> 3, (1 << 19) - 1);
00146 }
00147 }
00148
00149
00150
00151 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
00152 {
00153 int i;
00154 for (i = 0; i < width; i++) {
00155 dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12;
00156 dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12;
00157 }
00158 }
00159
00160 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
00161 {
00162 int i;
00163 for (i = 0; i < width; i++) {
00164 dstU[i] = (dstU[i] * 1799 + 4081085) >> 11;
00165 dstV[i] = (dstV[i] * 1799 + 4081085) >> 11;
00166 }
00167 }
00168
00169 static void lumRangeToJpeg_c(int16_t *dst, int width)
00170 {
00171 int i;
00172 for (i = 0; i < width; i++)
00173 dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
00174 }
00175
00176 static void lumRangeFromJpeg_c(int16_t *dst, int width)
00177 {
00178 int i;
00179 for (i = 0; i < width; i++)
00180 dst[i] = (dst[i] * 14071 + 33561947) >> 14;
00181 }
00182
00183 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
00184 {
00185 int i;
00186 int32_t *dstU = (int32_t *) _dstU;
00187 int32_t *dstV = (int32_t *) _dstV;
00188 for (i = 0; i < width; i++) {
00189 dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
00190 dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
00191 }
00192 }
00193
00194 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
00195 {
00196 int i;
00197 int32_t *dstU = (int32_t *) _dstU;
00198 int32_t *dstV = (int32_t *) _dstV;
00199 for (i = 0; i < width; i++) {
00200 dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11;
00201 dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11;
00202 }
00203 }
00204
00205 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
00206 {
00207 int i;
00208 int32_t *dst = (int32_t *) _dst;
00209 for (i = 0; i < width; i++)
00210 dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
00211 }
00212
00213 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
00214 {
00215 int i;
00216 int32_t *dst = (int32_t *) _dst;
00217 for (i = 0; i < width; i++)
00218 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
00219 }
00220
00221 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
00222 const uint8_t *src, int srcW, int xInc)
00223 {
00224 int i;
00225 unsigned int xpos = 0;
00226 for (i = 0; i < dstWidth; i++) {
00227 register unsigned int xx = xpos >> 16;
00228 register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
00229 dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
00230 xpos += xInc;
00231 }
00232 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
00233 dst[i] = src[srcW-1]*128;
00234 }
00235
00236
00237 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
00238 const uint8_t *src_in[4],
00239 int srcW, int xInc,
00240 const int16_t *hLumFilter,
00241 const int32_t *hLumFilterPos,
00242 int hLumFilterSize,
00243 uint8_t *formatConvBuffer,
00244 uint32_t *pal, int isAlpha)
00245 {
00246 void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) =
00247 isAlpha ? c->alpToYV12 : c->lumToYV12;
00248 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
00249 const uint8_t *src = src_in[isAlpha ? 3 : 0];
00250
00251 if (toYV12) {
00252 toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
00253 src = formatConvBuffer;
00254 } else if (c->readLumPlanar && !isAlpha) {
00255 c->readLumPlanar(formatConvBuffer, src_in, srcW);
00256 src = formatConvBuffer;
00257 }
00258
00259 if (!c->hyscale_fast) {
00260 c->hyScale(c, dst, dstWidth, src, hLumFilter,
00261 hLumFilterPos, hLumFilterSize);
00262 } else {
00263 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
00264 }
00265
00266 if (convertRange)
00267 convertRange(dst, dstWidth);
00268 }
00269
00270 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
00271 int dstWidth, const uint8_t *src1,
00272 const uint8_t *src2, int srcW, int xInc)
00273 {
00274 int i;
00275 unsigned int xpos = 0;
00276 for (i = 0; i < dstWidth; i++) {
00277 register unsigned int xx = xpos >> 16;
00278 register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
00279 dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
00280 dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
00281 xpos += xInc;
00282 }
00283 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
00284 dst1[i] = src1[srcW-1]*128;
00285 dst2[i] = src2[srcW-1]*128;
00286 }
00287 }
00288
00289 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
00290 int16_t *dst2, int dstWidth,
00291 const uint8_t *src_in[4],
00292 int srcW, int xInc,
00293 const int16_t *hChrFilter,
00294 const int32_t *hChrFilterPos,
00295 int hChrFilterSize,
00296 uint8_t *formatConvBuffer, uint32_t *pal)
00297 {
00298 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
00299 if (c->chrToYV12) {
00300 uint8_t *buf2 = formatConvBuffer +
00301 FFALIGN(srcW*2+78, 16);
00302 c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
00303 src1= formatConvBuffer;
00304 src2= buf2;
00305 } else if (c->readChrPlanar) {
00306 uint8_t *buf2 = formatConvBuffer +
00307 FFALIGN(srcW*2+78, 16);
00308 c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
00309 src1 = formatConvBuffer;
00310 src2 = buf2;
00311 }
00312
00313 if (!c->hcscale_fast) {
00314 c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
00315 c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
00316 } else {
00317 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
00318 }
00319
00320 if (c->chrConvertRange)
00321 c->chrConvertRange(dst1, dst2, dstWidth);
00322 }
00323
00324 #define DEBUG_SWSCALE_BUFFERS 0
00325 #define DEBUG_BUFFERS(...) \
00326 if (DEBUG_SWSCALE_BUFFERS) \
00327 av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
00328
00329 static int swScale(SwsContext *c, const uint8_t *src[],
00330 int srcStride[], int srcSliceY,
00331 int srcSliceH, uint8_t *dst[], int dstStride[])
00332 {
00333
00334
00335 const int srcW = c->srcW;
00336 const int dstW = c->dstW;
00337 const int dstH = c->dstH;
00338 const int chrDstW = c->chrDstW;
00339 const int chrSrcW = c->chrSrcW;
00340 const int lumXInc = c->lumXInc;
00341 const int chrXInc = c->chrXInc;
00342 const enum PixelFormat dstFormat = c->dstFormat;
00343 const int flags = c->flags;
00344 int32_t *vLumFilterPos = c->vLumFilterPos;
00345 int32_t *vChrFilterPos = c->vChrFilterPos;
00346 int32_t *hLumFilterPos = c->hLumFilterPos;
00347 int32_t *hChrFilterPos = c->hChrFilterPos;
00348 int16_t *hLumFilter = c->hLumFilter;
00349 int16_t *hChrFilter = c->hChrFilter;
00350 int32_t *lumMmxFilter = c->lumMmxFilter;
00351 int32_t *chrMmxFilter = c->chrMmxFilter;
00352 const int vLumFilterSize = c->vLumFilterSize;
00353 const int vChrFilterSize = c->vChrFilterSize;
00354 const int hLumFilterSize = c->hLumFilterSize;
00355 const int hChrFilterSize = c->hChrFilterSize;
00356 int16_t **lumPixBuf = c->lumPixBuf;
00357 int16_t **chrUPixBuf = c->chrUPixBuf;
00358 int16_t **chrVPixBuf = c->chrVPixBuf;
00359 int16_t **alpPixBuf = c->alpPixBuf;
00360 const int vLumBufSize = c->vLumBufSize;
00361 const int vChrBufSize = c->vChrBufSize;
00362 uint8_t *formatConvBuffer = c->formatConvBuffer;
00363 uint32_t *pal = c->pal_yuv;
00364 yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
00365 yuv2planarX_fn yuv2planeX = c->yuv2planeX;
00366 yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
00367 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
00368 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
00369 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
00370 const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
00371 const int chrSrcSliceH = -((-srcSliceH) >> c->chrSrcVSubSample);
00372 int should_dither = is9_OR_10BPS(c->srcFormat) ||
00373 is16BPS(c->srcFormat);
00374 int lastDstY;
00375
00376
00377 int dstY = c->dstY;
00378 int lumBufIndex = c->lumBufIndex;
00379 int chrBufIndex = c->chrBufIndex;
00380 int lastInLumBuf = c->lastInLumBuf;
00381 int lastInChrBuf = c->lastInChrBuf;
00382
00383 if (isPacked(c->srcFormat)) {
00384 src[0] =
00385 src[1] =
00386 src[2] =
00387 src[3] = src[0];
00388 srcStride[0] =
00389 srcStride[1] =
00390 srcStride[2] =
00391 srcStride[3] = srcStride[0];
00392 }
00393 srcStride[1] <<= c->vChrDrop;
00394 srcStride[2] <<= c->vChrDrop;
00395
00396 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
00397 src[0], srcStride[0], src[1], srcStride[1],
00398 src[2], srcStride[2], src[3], srcStride[3],
00399 dst[0], dstStride[0], dst[1], dstStride[1],
00400 dst[2], dstStride[2], dst[3], dstStride[3]);
00401 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
00402 srcSliceY, srcSliceH, dstY, dstH);
00403 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
00404 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
00405
00406 if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 ||
00407 dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
00408 static int warnedAlready = 0;
00409 if (flags & SWS_PRINT_INFO && !warnedAlready) {
00410 av_log(c, AV_LOG_WARNING,
00411 "Warning: dstStride is not aligned!\n"
00412 " ->cannot do aligned memory accesses anymore\n");
00413 warnedAlready = 1;
00414 }
00415 }
00416
00417 if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
00418 || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
00419 || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
00420 ) {
00421 static int warnedAlready=0;
00422 int cpu_flags = av_get_cpu_flags();
00423 if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
00424 av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
00425 warnedAlready=1;
00426 }
00427 }
00428
00429
00430
00431
00432 if (srcSliceY == 0) {
00433 lumBufIndex = -1;
00434 chrBufIndex = -1;
00435 dstY = 0;
00436 lastInLumBuf = -1;
00437 lastInChrBuf = -1;
00438 }
00439
00440 if (!should_dither) {
00441 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
00442 }
00443 lastDstY = dstY;
00444
00445 for (; dstY < dstH; dstY++) {
00446 const int chrDstY = dstY >> c->chrDstVSubSample;
00447 uint8_t *dest[4] = {
00448 dst[0] + dstStride[0] * dstY,
00449 dst[1] + dstStride[1] * chrDstY,
00450 dst[2] + dstStride[2] * chrDstY,
00451 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
00452 };
00453 int use_mmx_vfilter= c->use_mmx_vfilter;
00454
00455
00456 const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
00457 const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
00458
00459 const int firstChrSrcY = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
00460
00461
00462 int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
00463 int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
00464 int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
00465 int enough_lines;
00466
00467
00468 if (firstLumSrcY > lastInLumBuf)
00469 lastInLumBuf = firstLumSrcY - 1;
00470 if (firstChrSrcY > lastInChrBuf)
00471 lastInChrBuf = firstChrSrcY - 1;
00472 av_assert0(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
00473 av_assert0(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
00474
00475 DEBUG_BUFFERS("dstY: %d\n", dstY);
00476 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
00477 firstLumSrcY, lastLumSrcY, lastInLumBuf);
00478 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
00479 firstChrSrcY, lastChrSrcY, lastInChrBuf);
00480
00481
00482 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
00483 lastChrSrcY < -((-srcSliceY - srcSliceH) >> c->chrSrcVSubSample);
00484
00485 if (!enough_lines) {
00486 lastLumSrcY = srcSliceY + srcSliceH - 1;
00487 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
00488 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
00489 lastLumSrcY, lastChrSrcY);
00490 }
00491
00492
00493 while (lastInLumBuf < lastLumSrcY) {
00494 const uint8_t *src1[4] = {
00495 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
00496 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
00497 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
00498 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
00499 };
00500 lumBufIndex++;
00501 av_assert0(lumBufIndex < 2 * vLumBufSize);
00502 av_assert0(lastInLumBuf + 1 - srcSliceY < srcSliceH);
00503 av_assert0(lastInLumBuf + 1 - srcSliceY >= 0);
00504 hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
00505 hLumFilter, hLumFilterPos, hLumFilterSize,
00506 formatConvBuffer, pal, 0);
00507 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
00508 hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
00509 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
00510 formatConvBuffer, pal, 1);
00511 lastInLumBuf++;
00512 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
00513 lumBufIndex, lastInLumBuf);
00514 }
00515 while (lastInChrBuf < lastChrSrcY) {
00516 const uint8_t *src1[4] = {
00517 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
00518 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
00519 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
00520 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
00521 };
00522 chrBufIndex++;
00523 av_assert0(chrBufIndex < 2 * vChrBufSize);
00524 av_assert0(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
00525 av_assert0(lastInChrBuf + 1 - chrSrcSliceY >= 0);
00526
00527
00528 if (c->needs_hcscale)
00529 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
00530 chrDstW, src1, chrSrcW, chrXInc,
00531 hChrFilter, hChrFilterPos, hChrFilterSize,
00532 formatConvBuffer, pal);
00533 lastInChrBuf++;
00534 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
00535 chrBufIndex, lastInChrBuf);
00536 }
00537
00538 if (lumBufIndex >= vLumBufSize)
00539 lumBufIndex -= vLumBufSize;
00540 if (chrBufIndex >= vChrBufSize)
00541 chrBufIndex -= vChrBufSize;
00542 if (!enough_lines)
00543 break;
00544
00545 #if HAVE_MMX_INLINE
00546 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
00547 lastInLumBuf, lastInChrBuf);
00548 #endif
00549 if (should_dither) {
00550 c->chrDither8 = dither_8x8_128[chrDstY & 7];
00551 c->lumDither8 = dither_8x8_128[dstY & 7];
00552 }
00553 if (dstY >= dstH - 2) {
00554
00555
00556 ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
00557 &yuv2packed1, &yuv2packed2, &yuv2packedX);
00558 use_mmx_vfilter= 0;
00559 }
00560
00561 {
00562 const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
00563 const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00564 const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00565 const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
00566 (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
00567 int16_t *vLumFilter = c->vLumFilter;
00568 int16_t *vChrFilter = c->vChrFilter;
00569
00570 if (isPlanarYUV(dstFormat) ||
00571 (isGray(dstFormat) && !isALPHA(dstFormat))) {
00572 const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
00573
00574 vLumFilter += dstY * vLumFilterSize;
00575 vChrFilter += chrDstY * vChrFilterSize;
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586 if(use_mmx_vfilter){
00587 vLumFilter= c->lumMmxFilter;
00588 vChrFilter= c->chrMmxFilter;
00589 }
00590
00591 if (vLumFilterSize == 1) {
00592 yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
00593 } else {
00594 yuv2planeX(vLumFilter, vLumFilterSize,
00595 lumSrcPtr, dest[0],
00596 dstW, c->lumDither8, 0);
00597 }
00598
00599 if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
00600 if (yuv2nv12cX) {
00601 yuv2nv12cX(c, vChrFilter,
00602 vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
00603 dest[1], chrDstW);
00604 } else if (vChrFilterSize == 1) {
00605 yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
00606 yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
00607 } else {
00608 yuv2planeX(vChrFilter,
00609 vChrFilterSize, chrUSrcPtr, dest[1],
00610 chrDstW, c->chrDither8, 0);
00611 yuv2planeX(vChrFilter,
00612 vChrFilterSize, chrVSrcPtr, dest[2],
00613 chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
00614 }
00615 }
00616
00617 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
00618 if(use_mmx_vfilter){
00619 vLumFilter= c->alpMmxFilter;
00620 }
00621 if (vLumFilterSize == 1) {
00622 yuv2plane1(alpSrcPtr[0], dest[3], dstW,
00623 c->lumDither8, 0);
00624 } else {
00625 yuv2planeX(vLumFilter,
00626 vLumFilterSize, alpSrcPtr, dest[3],
00627 dstW, c->lumDither8, 0);
00628 }
00629 }
00630 } else {
00631 av_assert1(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize * 2);
00632 av_assert1(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize * 2);
00633 if (c->yuv2packed1 && vLumFilterSize == 1 &&
00634 vChrFilterSize <= 2) {
00635 int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
00636 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
00637 alpPixBuf ? *alpSrcPtr : NULL,
00638 dest[0], dstW, chrAlpha, dstY);
00639 } else if (c->yuv2packed2 && vLumFilterSize == 2 &&
00640 vChrFilterSize == 2) {
00641 int lumAlpha = vLumFilter[2 * dstY + 1];
00642 int chrAlpha = vChrFilter[2 * dstY + 1];
00643 lumMmxFilter[2] =
00644 lumMmxFilter[3] = vLumFilter[2 * dstY] * 0x10001;
00645 chrMmxFilter[2] =
00646 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
00647 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
00648 alpPixBuf ? alpSrcPtr : NULL,
00649 dest[0], dstW, lumAlpha, chrAlpha, dstY);
00650 } else {
00651 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
00652 lumSrcPtr, vLumFilterSize,
00653 vChrFilter + dstY * vChrFilterSize,
00654 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00655 alpSrcPtr, dest[0], dstW, dstY);
00656 }
00657 }
00658 }
00659 }
00660
00661 if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
00662 fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
00663
00664 #if HAVE_MMXEXT_INLINE
00665 if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT)
00666 __asm__ volatile ("sfence" ::: "memory");
00667 #endif
00668 emms_c();
00669
00670
00671 c->dstY = dstY;
00672 c->lumBufIndex = lumBufIndex;
00673 c->chrBufIndex = chrBufIndex;
00674 c->lastInLumBuf = lastInLumBuf;
00675 c->lastInChrBuf = lastInChrBuf;
00676
00677 return dstY - lastDstY;
00678 }
00679
00680 static av_cold void sws_init_swScale_c(SwsContext *c)
00681 {
00682 enum PixelFormat srcFormat = c->srcFormat;
00683
00684 ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
00685 &c->yuv2nv12cX, &c->yuv2packed1,
00686 &c->yuv2packed2, &c->yuv2packedX);
00687
00688 ff_sws_init_input_funcs(c);
00689
00690
00691 if (c->srcBpc == 8) {
00692 if (c->dstBpc <= 14) {
00693 c->hyScale = c->hcScale = hScale8To15_c;
00694 if (c->flags & SWS_FAST_BILINEAR) {
00695 c->hyscale_fast = hyscale_fast_c;
00696 c->hcscale_fast = hcscale_fast_c;
00697 }
00698 } else {
00699 c->hyScale = c->hcScale = hScale8To19_c;
00700 }
00701 } else {
00702 c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c
00703 : hScale16To15_c;
00704 }
00705
00706 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
00707 if (c->dstBpc <= 14) {
00708 if (c->srcRange) {
00709 c->lumConvertRange = lumRangeFromJpeg_c;
00710 c->chrConvertRange = chrRangeFromJpeg_c;
00711 } else {
00712 c->lumConvertRange = lumRangeToJpeg_c;
00713 c->chrConvertRange = chrRangeToJpeg_c;
00714 }
00715 } else {
00716 if (c->srcRange) {
00717 c->lumConvertRange = lumRangeFromJpeg16_c;
00718 c->chrConvertRange = chrRangeFromJpeg16_c;
00719 } else {
00720 c->lumConvertRange = lumRangeToJpeg16_c;
00721 c->chrConvertRange = chrRangeToJpeg16_c;
00722 }
00723 }
00724 }
00725
00726 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
00727 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
00728 c->needs_hcscale = 1;
00729 }
00730
00731 SwsFunc ff_getSwsFunc(SwsContext *c)
00732 {
00733 sws_init_swScale_c(c);
00734
00735 if (HAVE_MMX)
00736 ff_sws_init_swScale_mmx(c);
00737 if (HAVE_ALTIVEC)
00738 ff_sws_init_swScale_altivec(c);
00739
00740 return swScale;
00741 }