FFmpeg: libswscale/swscale.c Source File

00001 /*
00002  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #include <assert.h>
00022 #include <inttypes.h>
00023 #include <math.h>
00024 #include <stdio.h>
00025 #include <string.h>
00026 
00027 #include "libavutil/avassert.h"
00028 #include "libavutil/avutil.h"
00029 #include "libavutil/bswap.h"
00030 #include "libavutil/cpu.h"
00031 #include "libavutil/intreadwrite.h"
00032 #include "libavutil/mathematics.h"
00033 #include "libavutil/pixdesc.h"
00034 #include "config.h"
00035 #include "rgb2rgb.h"
00036 #include "swscale_internal.h"
00037 #include "swscale.h"
00038 
00039 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
00040     {  36, 68,  60, 92,  34, 66,  58, 90, },
00041     { 100,  4, 124, 28,  98,  2, 122, 26, },
00042     {  52, 84,  44, 76,  50, 82,  42, 74, },
00043     { 116, 20, 108, 12, 114, 18, 106, 10, },
00044     {  32, 64,  56, 88,  38, 70,  62, 94, },
00045     {  96,  0, 120, 24, 102,  6, 126, 30, },
00046     {  48, 80,  40, 72,  54, 86,  46, 78, },
00047     { 112, 16, 104,  8, 118, 22, 110, 14, },
00048 };
00049 
00050 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = {
00051     64, 64, 64, 64, 64, 64, 64, 64
00052 };
00053 
00054 static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
00055                                        int height, int y, uint8_t val)
00056 {
00057     int i;
00058     uint8_t *ptr = plane + stride * y;
00059     for (i = 0; i < height; i++) {
00060         memset(ptr, val, width);
00061         ptr += stride;
00062     }
00063 }
00064 
00065 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
00066                            const uint8_t *_src, const int16_t *filter,
00067                            const int32_t *filterPos, int filterSize)
00068 {
00069     int i;
00070     int32_t *dst        = (int32_t *) _dst;
00071     const uint16_t *src = (const uint16_t *) _src;
00072     int bits            = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
00073     int sh              = bits - 4;
00074 
00075     if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
00076         sh= 9;
00077 
00078     for (i = 0; i < dstW; i++) {
00079         int j;
00080         int srcPos = filterPos[i];
00081         int val    = 0;
00082 
00083         for (j = 0; j < filterSize; j++) {
00084             val += src[srcPos + j] * filter[filterSize * i + j];
00085         }
00086         // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
00087         dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
00088     }
00089 }
00090 
00091 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
00092                            const uint8_t *_src, const int16_t *filter,
00093                            const int32_t *filterPos, int filterSize)
00094 {
00095     int i;
00096     const uint16_t *src = (const uint16_t *) _src;
00097     int sh              = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
00098 
00099     if(sh<15)
00100         sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
00101 
00102     for (i = 0; i < dstW; i++) {
00103         int j;
00104         int srcPos = filterPos[i];
00105         int val    = 0;
00106 
00107         for (j = 0; j < filterSize; j++) {
00108             val += src[srcPos + j] * filter[filterSize * i + j];
00109         }
00110         // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
00111         dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
00112     }
00113 }
00114 
00115 // bilinear / bicubic scaling
00116 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW,
00117                           const uint8_t *src, const int16_t *filter,
00118                           const int32_t *filterPos, int filterSize)
00119 {
00120     int i;
00121     for (i = 0; i < dstW; i++) {
00122         int j;
00123         int srcPos = filterPos[i];
00124         int val    = 0;
00125         for (j = 0; j < filterSize; j++) {
00126             val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
00127         }
00128         dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
00129     }
00130 }
00131 
00132 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW,
00133                           const uint8_t *src, const int16_t *filter,
00134                           const int32_t *filterPos, int filterSize)
00135 {
00136     int i;
00137     int32_t *dst = (int32_t *) _dst;
00138     for (i = 0; i < dstW; i++) {
00139         int j;
00140         int srcPos = filterPos[i];
00141         int val    = 0;
00142         for (j = 0; j < filterSize; j++) {
00143             val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
00144         }
00145         dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
00146     }
00147 }
00148 
00149 // FIXME all pal and rgb srcFormats could do this convertion as well
00150 // FIXME all scalers more complex than bilinear could do half of this transform
00151 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
00152 {
00153     int i;
00154     for (i = 0; i < width; i++) {
00155         dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
00156         dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
00157     }
00158 }
00159 
00160 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
00161 {
00162     int i;
00163     for (i = 0; i < width; i++) {
00164         dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
00165         dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
00166     }
00167 }
00168 
00169 static void lumRangeToJpeg_c(int16_t *dst, int width)
00170 {
00171     int i;
00172     for (i = 0; i < width; i++)
00173         dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
00174 }
00175 
00176 static void lumRangeFromJpeg_c(int16_t *dst, int width)
00177 {
00178     int i;
00179     for (i = 0; i < width; i++)
00180         dst[i] = (dst[i] * 14071 + 33561947) >> 14;
00181 }
00182 
00183 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
00184 {
00185     int i;
00186     int32_t *dstU = (int32_t *) _dstU;
00187     int32_t *dstV = (int32_t *) _dstV;
00188     for (i = 0; i < width; i++) {
00189         dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
00190         dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
00191     }
00192 }
00193 
00194 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
00195 {
00196     int i;
00197     int32_t *dstU = (int32_t *) _dstU;
00198     int32_t *dstV = (int32_t *) _dstV;
00199     for (i = 0; i < width; i++) {
00200         dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
00201         dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
00202     }
00203 }
00204 
00205 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
00206 {
00207     int i;
00208     int32_t *dst = (int32_t *) _dst;
00209     for (i = 0; i < width; i++)
00210         dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
00211 }
00212 
00213 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
00214 {
00215     int i;
00216     int32_t *dst = (int32_t *) _dst;
00217     for (i = 0; i < width; i++)
00218         dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
00219 }
00220 
00221 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
00222                            const uint8_t *src, int srcW, int xInc)
00223 {
00224     int i;
00225     unsigned int xpos = 0;
00226     for (i = 0; i < dstWidth; i++) {
00227         register unsigned int xx     = xpos >> 16;
00228         register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
00229         dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
00230         xpos  += xInc;
00231     }
00232     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
00233         dst[i] = src[srcW-1]*128;
00234 }
00235 
00236 // *** horizontal scale Y line to temp buffer
00237 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
00238                                      const uint8_t *src_in[4],
00239                                      int srcW, int xInc,
00240                                      const int16_t *hLumFilter,
00241                                      const int32_t *hLumFilterPos,
00242                                      int hLumFilterSize,
00243                                      uint8_t *formatConvBuffer,
00244                                      uint32_t *pal, int isAlpha)
00245 {
00246     void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) =
00247         isAlpha ? c->alpToYV12 : c->lumToYV12;
00248     void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
00249     const uint8_t *src = src_in[isAlpha ? 3 : 0];
00250 
00251     if (toYV12) {
00252         toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
00253         src = formatConvBuffer;
00254     } else if (c->readLumPlanar && !isAlpha) {
00255         c->readLumPlanar(formatConvBuffer, src_in, srcW);
00256         src = formatConvBuffer;
00257     }
00258 
00259     if (!c->hyscale_fast) {
00260         c->hyScale(c, dst, dstWidth, src, hLumFilter,
00261                    hLumFilterPos, hLumFilterSize);
00262     } else { // fast bilinear upscale / crap downscale
00263         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
00264     }
00265 
00266     if (convertRange)
00267         convertRange(dst, dstWidth);
00268 }
00269 
00270 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
00271                            int dstWidth, const uint8_t *src1,
00272                            const uint8_t *src2, int srcW, int xInc)
00273 {
00274     int i;
00275     unsigned int xpos = 0;
00276     for (i = 0; i < dstWidth; i++) {
00277         register unsigned int xx     = xpos >> 16;
00278         register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
00279         dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
00280         dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
00281         xpos   += xInc;
00282     }
00283     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
00284         dst1[i] = src1[srcW-1]*128;
00285         dst2[i] = src2[srcW-1]*128;
00286     }
00287 }
00288 
00289 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
00290                                      int16_t *dst2, int dstWidth,
00291                                      const uint8_t *src_in[4],
00292                                      int srcW, int xInc,
00293                                      const int16_t *hChrFilter,
00294                                      const int32_t *hChrFilterPos,
00295                                      int hChrFilterSize,
00296                                      uint8_t *formatConvBuffer, uint32_t *pal)
00297 {
00298     const uint8_t *src1 = src_in[1], *src2 = src_in[2];
00299     if (c->chrToYV12) {
00300         uint8_t *buf2 = formatConvBuffer +
00301                         FFALIGN(srcW*2+78, 16);
00302         c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
00303         src1= formatConvBuffer;
00304         src2= buf2;
00305     } else if (c->readChrPlanar) {
00306         uint8_t *buf2 = formatConvBuffer +
00307                         FFALIGN(srcW*2+78, 16);
00308         c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
00309         src1 = formatConvBuffer;
00310         src2 = buf2;
00311     }
00312 
00313     if (!c->hcscale_fast) {
00314         c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
00315         c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
00316     } else { // fast bilinear upscale / crap downscale
00317         c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
00318     }
00319 
00320     if (c->chrConvertRange)
00321         c->chrConvertRange(dst1, dst2, dstWidth);
00322 }
00323 
00324 #define DEBUG_SWSCALE_BUFFERS 0
00325 #define DEBUG_BUFFERS(...)                      \
00326     if (DEBUG_SWSCALE_BUFFERS)                  \
00327         av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
00328 
00329 static int swScale(SwsContext *c, const uint8_t *src[],
00330                    int srcStride[], int srcSliceY,
00331                    int srcSliceH, uint8_t *dst[], int dstStride[])
00332 {
00333     /* load a few things into local vars to make the code more readable?
00334      * and faster */
00335     const int srcW                   = c->srcW;
00336     const int dstW                   = c->dstW;
00337     const int dstH                   = c->dstH;
00338     const int chrDstW                = c->chrDstW;
00339     const int chrSrcW                = c->chrSrcW;
00340     const int lumXInc                = c->lumXInc;
00341     const int chrXInc                = c->chrXInc;
00342     const enum PixelFormat dstFormat = c->dstFormat;
00343     const int flags                  = c->flags;
00344     int32_t *vLumFilterPos           = c->vLumFilterPos;
00345     int32_t *vChrFilterPos           = c->vChrFilterPos;
00346     int32_t *hLumFilterPos           = c->hLumFilterPos;
00347     int32_t *hChrFilterPos           = c->hChrFilterPos;
00348     int16_t *hLumFilter              = c->hLumFilter;
00349     int16_t *hChrFilter              = c->hChrFilter;
00350     int32_t *lumMmxFilter            = c->lumMmxFilter;
00351     int32_t *chrMmxFilter            = c->chrMmxFilter;
00352     const int vLumFilterSize         = c->vLumFilterSize;
00353     const int vChrFilterSize         = c->vChrFilterSize;
00354     const int hLumFilterSize         = c->hLumFilterSize;
00355     const int hChrFilterSize         = c->hChrFilterSize;
00356     int16_t **lumPixBuf              = c->lumPixBuf;
00357     int16_t **chrUPixBuf             = c->chrUPixBuf;
00358     int16_t **chrVPixBuf             = c->chrVPixBuf;
00359     int16_t **alpPixBuf              = c->alpPixBuf;
00360     const int vLumBufSize            = c->vLumBufSize;
00361     const int vChrBufSize            = c->vChrBufSize;
00362     uint8_t *formatConvBuffer        = c->formatConvBuffer;
00363     uint32_t *pal                    = c->pal_yuv;
00364     yuv2planar1_fn yuv2plane1        = c->yuv2plane1;
00365     yuv2planarX_fn yuv2planeX        = c->yuv2planeX;
00366     yuv2interleavedX_fn yuv2nv12cX   = c->yuv2nv12cX;
00367     yuv2packed1_fn yuv2packed1       = c->yuv2packed1;
00368     yuv2packed2_fn yuv2packed2       = c->yuv2packed2;
00369     yuv2packedX_fn yuv2packedX       = c->yuv2packedX;
00370     const int chrSrcSliceY           =     srcSliceY  >> c->chrSrcVSubSample;
00371     const int chrSrcSliceH           = -((-srcSliceH) >> c->chrSrcVSubSample);
00372     int should_dither                = is9_OR_10BPS(c->srcFormat) ||
00373                                        is16BPS(c->srcFormat);
00374     int lastDstY;
00375 
00376     /* vars which will change and which we need to store back in the context */
00377     int dstY         = c->dstY;
00378     int lumBufIndex  = c->lumBufIndex;
00379     int chrBufIndex  = c->chrBufIndex;
00380     int lastInLumBuf = c->lastInLumBuf;
00381     int lastInChrBuf = c->lastInChrBuf;
00382 
00383     if (isPacked(c->srcFormat)) {
00384         src[0] =
00385         src[1] =
00386         src[2] =
00387         src[3] = src[0];
00388         srcStride[0] =
00389         srcStride[1] =
00390         srcStride[2] =
00391         srcStride[3] = srcStride[0];
00392     }
00393     srcStride[1] <<= c->vChrDrop;
00394     srcStride[2] <<= c->vChrDrop;
00395 
00396     DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
00397                   src[0], srcStride[0], src[1], srcStride[1],
00398                   src[2], srcStride[2], src[3], srcStride[3],
00399                   dst[0], dstStride[0], dst[1], dstStride[1],
00400                   dst[2], dstStride[2], dst[3], dstStride[3]);
00401     DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
00402                   srcSliceY, srcSliceH, dstY, dstH);
00403     DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
00404                   vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
00405 
00406     if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 ||
00407         dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
00408         static int warnedAlready = 0; // FIXME maybe move this into the context
00409         if (flags & SWS_PRINT_INFO && !warnedAlready) {
00410             av_log(c, AV_LOG_WARNING,
00411                    "Warning: dstStride is not aligned!\n"
00412                    "         ->cannot do aligned memory accesses anymore\n");
00413             warnedAlready = 1;
00414         }
00415     }
00416 
00417     if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
00418         || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
00419         || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
00420     ) {
00421         static int warnedAlready=0;
00422         int cpu_flags = av_get_cpu_flags();
00423         if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
00424             av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
00425             warnedAlready=1;
00426         }
00427     }
00428 
00429     /* Note the user might start scaling the picture in the middle so this
00430      * will not get executed. This is not really intended but works
00431      * currently, so people might do it. */
00432     if (srcSliceY == 0) {
00433         lumBufIndex  = -1;
00434         chrBufIndex  = -1;
00435         dstY         = 0;
00436         lastInLumBuf = -1;
00437         lastInChrBuf = -1;
00438     }
00439 
00440     if (!should_dither) {
00441         c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
00442     }
00443     lastDstY = dstY;
00444 
00445     for (; dstY < dstH; dstY++) {
00446         const int chrDstY = dstY >> c->chrDstVSubSample;
00447         uint8_t *dest[4]  = {
00448             dst[0] + dstStride[0] * dstY,
00449             dst[1] + dstStride[1] * chrDstY,
00450             dst[2] + dstStride[2] * chrDstY,
00451             (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
00452         };
00453         int use_mmx_vfilter= c->use_mmx_vfilter;
00454 
00455         // First line needed as input
00456         const int firstLumSrcY  = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
00457         const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
00458         // First line needed as input
00459         const int firstChrSrcY  = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
00460 
00461         // Last line needed as input
00462         int lastLumSrcY  = FFMIN(c->srcH,    firstLumSrcY  + vLumFilterSize) - 1;
00463         int lastLumSrcY2 = FFMIN(c->srcH,    firstLumSrcY2 + vLumFilterSize) - 1;
00464         int lastChrSrcY  = FFMIN(c->chrSrcH, firstChrSrcY  + vChrFilterSize) - 1;
00465         int enough_lines;
00466 
00467         // handle holes (FAST_BILINEAR & weird filters)
00468         if (firstLumSrcY > lastInLumBuf)
00469             lastInLumBuf = firstLumSrcY - 1;
00470         if (firstChrSrcY > lastInChrBuf)
00471             lastInChrBuf = firstChrSrcY - 1;
00472         av_assert0(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
00473         av_assert0(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
00474 
00475         DEBUG_BUFFERS("dstY: %d\n", dstY);
00476         DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
00477                       firstLumSrcY, lastLumSrcY, lastInLumBuf);
00478         DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
00479                       firstChrSrcY, lastChrSrcY, lastInChrBuf);
00480 
00481         // Do we have enough lines in this slice to output the dstY line
00482         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
00483                        lastChrSrcY < -((-srcSliceY - srcSliceH) >> c->chrSrcVSubSample);
00484 
00485         if (!enough_lines) {
00486             lastLumSrcY = srcSliceY + srcSliceH - 1;
00487             lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
00488             DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
00489                           lastLumSrcY, lastChrSrcY);
00490         }
00491 
00492         // Do horizontal scaling
00493         while (lastInLumBuf < lastLumSrcY) {
00494             const uint8_t *src1[4] = {
00495                 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
00496                 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
00497                 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
00498                 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
00499             };
00500             lumBufIndex++;
00501             av_assert0(lumBufIndex < 2 * vLumBufSize);
00502             av_assert0(lastInLumBuf + 1 - srcSliceY < srcSliceH);
00503             av_assert0(lastInLumBuf + 1 - srcSliceY >= 0);
00504             hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
00505                     hLumFilter, hLumFilterPos, hLumFilterSize,
00506                     formatConvBuffer, pal, 0);
00507             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
00508                 hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
00509                         lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
00510                         formatConvBuffer, pal, 1);
00511             lastInLumBuf++;
00512             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
00513                           lumBufIndex, lastInLumBuf);
00514         }
00515         while (lastInChrBuf < lastChrSrcY) {
00516             const uint8_t *src1[4] = {
00517                 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
00518                 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
00519                 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
00520                 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
00521             };
00522             chrBufIndex++;
00523             av_assert0(chrBufIndex < 2 * vChrBufSize);
00524             av_assert0(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
00525             av_assert0(lastInChrBuf + 1 - chrSrcSliceY >= 0);
00526             // FIXME replace parameters through context struct (some at least)
00527 
00528             if (c->needs_hcscale)
00529                 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
00530                         chrDstW, src1, chrSrcW, chrXInc,
00531                         hChrFilter, hChrFilterPos, hChrFilterSize,
00532                         formatConvBuffer, pal);
00533             lastInChrBuf++;
00534             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
00535                           chrBufIndex, lastInChrBuf);
00536         }
00537         // wrap buf index around to stay inside the ring buffer
00538         if (lumBufIndex >= vLumBufSize)
00539             lumBufIndex -= vLumBufSize;
00540         if (chrBufIndex >= vChrBufSize)
00541             chrBufIndex -= vChrBufSize;
00542         if (!enough_lines)
00543             break;  // we can't output a dstY line so let's try with the next slice
00544 
00545 #if HAVE_MMX_INLINE
00546         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
00547                               lastInLumBuf, lastInChrBuf);
00548 #endif
00549         if (should_dither) {
00550             c->chrDither8 = dither_8x8_128[chrDstY & 7];
00551             c->lumDither8 = dither_8x8_128[dstY    & 7];
00552         }
00553         if (dstY >= dstH - 2) {
00554             /* hmm looks like we can't use MMX here without overwriting
00555              * this array's tail */
00556             ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
00557                                      &yuv2packed1, &yuv2packed2, &yuv2packedX);
00558             use_mmx_vfilter= 0;
00559         }
00560 
00561         {
00562             const int16_t **lumSrcPtr  = (const int16_t **)(void*) lumPixBuf  + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
00563             const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00564             const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00565             const int16_t **alpSrcPtr  = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
00566                                          (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
00567             int16_t *vLumFilter = c->vLumFilter;
00568             int16_t *vChrFilter = c->vChrFilter;
00569 
00570             if (isPlanarYUV(dstFormat) ||
00571                 (isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like
00572                 const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
00573 
00574                 vLumFilter +=    dstY * vLumFilterSize;
00575                 vChrFilter += chrDstY * vChrFilterSize;
00576 
00577 //                 av_assert0(use_mmx_vfilter != (
00578 //                                yuv2planeX == yuv2planeX_10BE_c
00579 //                             || yuv2planeX == yuv2planeX_10LE_c
00580 //                             || yuv2planeX == yuv2planeX_9BE_c
00581 //                             || yuv2planeX == yuv2planeX_9LE_c
00582 //                             || yuv2planeX == yuv2planeX_16BE_c
00583 //                             || yuv2planeX == yuv2planeX_16LE_c
00584 //                             || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
00585 
00586                 if(use_mmx_vfilter){
00587                     vLumFilter= c->lumMmxFilter;
00588                     vChrFilter= c->chrMmxFilter;
00589                 }
00590 
00591                 if (vLumFilterSize == 1) {
00592                     yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
00593                 } else {
00594                     yuv2planeX(vLumFilter, vLumFilterSize,
00595                                lumSrcPtr, dest[0],
00596                                dstW, c->lumDither8, 0);
00597                 }
00598 
00599                 if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
00600                     if (yuv2nv12cX) {
00601                         yuv2nv12cX(c, vChrFilter,
00602                                    vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
00603                                    dest[1], chrDstW);
00604                     } else if (vChrFilterSize == 1) {
00605                         yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
00606                         yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
00607                     } else {
00608                         yuv2planeX(vChrFilter,
00609                                    vChrFilterSize, chrUSrcPtr, dest[1],
00610                                    chrDstW, c->chrDither8, 0);
00611                         yuv2planeX(vChrFilter,
00612                                    vChrFilterSize, chrVSrcPtr, dest[2],
00613                                    chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
00614                     }
00615                 }
00616 
00617                 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
00618                     if(use_mmx_vfilter){
00619                         vLumFilter= c->alpMmxFilter;
00620                     }
00621                     if (vLumFilterSize == 1) {
00622                         yuv2plane1(alpSrcPtr[0], dest[3], dstW,
00623                                    c->lumDither8, 0);
00624                     } else {
00625                         yuv2planeX(vLumFilter,
00626                                    vLumFilterSize, alpSrcPtr, dest[3],
00627                                    dstW, c->lumDither8, 0);
00628                     }
00629                 }
00630             } else {
00631                 av_assert1(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize * 2);
00632                 av_assert1(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize * 2);
00633                 if (c->yuv2packed1 && vLumFilterSize == 1 &&
00634                     vChrFilterSize <= 2) { // unscaled RGB
00635                     int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
00636                     yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
00637                                 alpPixBuf ? *alpSrcPtr : NULL,
00638                                 dest[0], dstW, chrAlpha, dstY);
00639                 } else if (c->yuv2packed2 && vLumFilterSize == 2 &&
00640                            vChrFilterSize == 2) { // bilinear upscale RGB
00641                     int lumAlpha = vLumFilter[2 * dstY + 1];
00642                     int chrAlpha = vChrFilter[2 * dstY + 1];
00643                     lumMmxFilter[2] =
00644                     lumMmxFilter[3] = vLumFilter[2 * dstY]    * 0x10001;
00645                     chrMmxFilter[2] =
00646                     chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
00647                     yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
00648                                 alpPixBuf ? alpSrcPtr : NULL,
00649                                 dest[0], dstW, lumAlpha, chrAlpha, dstY);
00650                 } else { // general RGB
00651                     yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
00652                                 lumSrcPtr, vLumFilterSize,
00653                                 vChrFilter + dstY * vChrFilterSize,
00654                                 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00655                                 alpSrcPtr, dest[0], dstW, dstY);
00656                 }
00657             }
00658         }
00659     }
00660 
00661     if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
00662         fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
00663 
00664 #if HAVE_MMXEXT_INLINE
00665     if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT)
00666         __asm__ volatile ("sfence" ::: "memory");
00667 #endif
00668     emms_c();
00669 
00670     /* store changed local vars back in the context */
00671     c->dstY         = dstY;
00672     c->lumBufIndex  = lumBufIndex;
00673     c->chrBufIndex  = chrBufIndex;
00674     c->lastInLumBuf = lastInLumBuf;
00675     c->lastInChrBuf = lastInChrBuf;
00676 
00677     return dstY - lastDstY;
00678 }
00679 
00680 static av_cold void sws_init_swScale_c(SwsContext *c)
00681 {
00682     enum PixelFormat srcFormat = c->srcFormat;
00683 
00684     ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
00685                              &c->yuv2nv12cX, &c->yuv2packed1,
00686                              &c->yuv2packed2, &c->yuv2packedX);
00687 
00688     ff_sws_init_input_funcs(c);
00689 
00690 
00691     if (c->srcBpc == 8) {
00692         if (c->dstBpc <= 14) {
00693             c->hyScale = c->hcScale = hScale8To15_c;
00694             if (c->flags & SWS_FAST_BILINEAR) {
00695                 c->hyscale_fast = hyscale_fast_c;
00696                 c->hcscale_fast = hcscale_fast_c;
00697             }
00698         } else {
00699             c->hyScale = c->hcScale = hScale8To19_c;
00700         }
00701     } else {
00702         c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c
00703                                                  : hScale16To15_c;
00704     }
00705 
00706     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
00707         if (c->dstBpc <= 14) {
00708             if (c->srcRange) {
00709                 c->lumConvertRange = lumRangeFromJpeg_c;
00710                 c->chrConvertRange = chrRangeFromJpeg_c;
00711             } else {
00712                 c->lumConvertRange = lumRangeToJpeg_c;
00713                 c->chrConvertRange = chrRangeToJpeg_c;
00714             }
00715         } else {
00716             if (c->srcRange) {
00717                 c->lumConvertRange = lumRangeFromJpeg16_c;
00718                 c->chrConvertRange = chrRangeFromJpeg16_c;
00719             } else {
00720                 c->lumConvertRange = lumRangeToJpeg16_c;
00721                 c->chrConvertRange = chrRangeToJpeg16_c;
00722             }
00723         }
00724     }
00725 
00726     if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
00727           srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
00728         c->needs_hcscale = 1;
00729 }
00730 
00731 SwsFunc ff_getSwsFunc(SwsContext *c)
00732 {
00733     sws_init_swScale_c(c);
00734 
00735     if (HAVE_MMX)
00736         ff_sws_init_swScale_mmx(c);
00737     if (HAVE_ALTIVEC)
00738         ff_sws_init_swScale_altivec(c);
00739 
00740     return swScale;
00741 }