FFmpeg: libswscale/swscale_template.c Source File

00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
00022                               const int16_t **lumSrc, int lumFilterSize,
00023                               const int16_t *chrFilter, const int16_t **chrUSrc,
00024                               const int16_t **chrVSrc,
00025                               int chrFilterSize, const int16_t **alpSrc,
00026                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00027                               uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
00028 {
00029     yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
00030                 chrFilter, chrUSrc, chrVSrc, chrFilterSize,
00031                 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
00032 }
00033 
00034 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
00035                                const int16_t **lumSrc, int lumFilterSize,
00036                                const int16_t *chrFilter, const int16_t **chrUSrc,
00037                                const int16_t **chrVSrc,
00038                                int chrFilterSize, uint8_t *dest, uint8_t *uDest,
00039                                int dstW, int chrDstW, enum PixelFormat dstFormat, const uint8_t *dither, const uint8_t *chrDither)
00040 {
00041     yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
00042                  chrFilter, chrUSrc, chrVSrc, chrFilterSize,
00043                  dest, uDest, dstW, chrDstW, dstFormat, dither, chrDither);
00044 }
00045 
00046 static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
00047                               const int16_t *chrUSrc, const int16_t *chrVSrc,
00048                               const int16_t *alpSrc,
00049                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00050                               uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
00051 {
00052     int i;
00053 
00054     for (i=0; i<dstW; i++) {
00055         int val= (lumSrc[i]+lumDither[i&7])>>7;
00056         dest[i]= av_clip_uint8(val);
00057     }
00058 
00059     if (uDest)
00060         for (i=0; i<chrDstW; i++) {
00061             int u=(chrUSrc[i]+chrDither[i&7])>>7;
00062             int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7;
00063             uDest[i]= av_clip_uint8(u);
00064             vDest[i]= av_clip_uint8(v);
00065         }
00066 
00067     if (CONFIG_SWSCALE_ALPHA && aDest)
00068         for (i=0; i<dstW; i++) {
00069             int val= (alpSrc[i]+lumDither[i&7])>>7;
00070             aDest[i]= av_clip_uint8(val);
00071         }
00072 }
00073 
00074 
00078 static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
00079                                  const int16_t **lumSrc, int lumFilterSize,
00080                                  const int16_t *chrFilter, const int16_t **chrUSrc,
00081                                  const int16_t **chrVSrc,
00082                                  int chrFilterSize, const int16_t **alpSrc,
00083                                  uint8_t *dest, int dstW, int dstY)
00084 {
00085         yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
00086                        chrFilter, chrUSrc, chrVSrc, chrFilterSize,
00087                        alpSrc, dest, dstW, dstY);
00088 }
00089 
00093 static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
00094                                  const uint16_t *buf1, const uint16_t *ubuf0,
00095                                  const uint16_t *ubuf1, const uint16_t *vbuf0,
00096                                  const uint16_t *vbuf1, const uint16_t *abuf0,
00097                                  const uint16_t *abuf1, uint8_t *dest, int dstW,
00098                                  int yalpha, int uvalpha, int y)
00099 {
00100     int  yalpha1=4095- yalpha;
00101     int uvalpha1=4095-uvalpha;
00102     int i;
00103 
00104     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
00105 }
00106 
00110 static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
00111                                  const uint16_t *ubuf0, const uint16_t *ubuf1,
00112                                  const uint16_t *vbuf0, const uint16_t *vbuf1,
00113                                  const uint16_t *abuf0, uint8_t *dest, int dstW,
00114                                  int uvalpha, enum PixelFormat dstFormat,
00115                                  int flags, int y)
00116 {
00117     const int yalpha1=0;
00118     int i;
00119 
00120     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
00121     const int yalpha= 4096; //FIXME ...
00122 
00123     if (uvalpha < 2048) {
00124         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
00125     } else {
00126         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
00127     }
00128 }
00129 
00130 //FIXME yuy2* can read up to 7 samples too much
00131 
00132 static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
00133                              uint32_t *unused)
00134 {
00135     int i;
00136     for (i=0; i<width; i++)
00137         dst[i]= src[2*i];
00138 }
00139 
00140 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
00141                               const uint8_t *src2, int width, uint32_t *unused)
00142 {
00143     int i;
00144     for (i=0; i<width; i++) {
00145         dstU[i]= src1[4*i + 1];
00146         dstV[i]= src1[4*i + 3];
00147     }
00148     assert(src1 == src2);
00149 }
00150 
00151 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
00152                             const uint8_t *src2, int width, uint32_t *unused)
00153 {
00154     int i;
00155     // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
00156     // we need to skip each second pixel. Same for BEToUV.
00157     for (i=0; i<width; i++) {
00158         dstU[i]= src1[2*i + 1];
00159         dstV[i]= src2[2*i + 1];
00160     }
00161 }
00162 
00163 /* This is almost identical to the previous, end exists only because
00164  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
00165 static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
00166                              uint32_t *unused)
00167 {
00168     int i;
00169     for (i=0; i<width; i++)
00170         dst[i]= src[2*i+1];
00171 }
00172 
00173 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
00174                               const uint8_t *src2, int width, uint32_t *unused)
00175 {
00176     int i;
00177     for (i=0; i<width; i++) {
00178         dstU[i]= src1[4*i + 0];
00179         dstV[i]= src1[4*i + 2];
00180     }
00181     assert(src1 == src2);
00182 }
00183 
00184 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
00185                             const uint8_t *src2, int width, uint32_t *unused)
00186 {
00187     int i;
00188     for (i=0; i<width; i++) {
00189         dstU[i]= src1[2*i];
00190         dstV[i]= src2[2*i];
00191     }
00192 }
00193 
00194 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
00195                               const uint8_t *src, int width)
00196 {
00197     int i;
00198     for (i = 0; i < width; i++) {
00199         dst1[i] = src[2*i+0];
00200         dst2[i] = src[2*i+1];
00201     }
00202 }
00203 
00204 // FIXME Maybe dither instead.
00205 #ifndef YUV_NBPS
00206 #define YUV_NBPS(depth, endianness, rfunc) \
00207 static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
00208                                           const uint16_t *srcU, const uint16_t *srcV, \
00209                                           int width, uint32_t *unused) \
00210 { \
00211     int i; \
00212     for (i = 0; i < width; i++) { \
00213         dstU[i] = rfunc(&srcU[i])>>(depth-8); \
00214         dstV[i] = rfunc(&srcV[i])>>(depth-8); \
00215     } \
00216 } \
00217 \
00218 static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, int width, uint32_t *unused) \
00219 { \
00220     int i; \
00221     for (i = 0; i < width; i++) \
00222         dstY[i] = rfunc(&srcY[i])>>(depth-8); \
00223 } \
00224 
00225 YUV_NBPS( 9, LE, AV_RL16)
00226 YUV_NBPS( 9, BE, AV_RB16)
00227 YUV_NBPS(10, LE, AV_RL16)
00228 YUV_NBPS(10, BE, AV_RB16)
00229 #endif // YUV_NBPS
00230 
00231 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
00232                               const uint8_t *src1, const uint8_t *src2,
00233                               int width, uint32_t *unused)
00234 {
00235     nvXXtoUV_c(dstU, dstV, src1, width);
00236 }
00237 
00238 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
00239                               const uint8_t *src1, const uint8_t *src2,
00240                               int width, uint32_t *unused)
00241 {
00242     nvXXtoUV_c(dstV, dstU, src1, width);
00243 }
00244 
00245 // bilinear / bicubic scaling
00246 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
00247                             int srcW, int xInc,
00248                             const int16_t *filter, const int16_t *filterPos,
00249                             int filterSize)
00250 {
00251     int i;
00252     for (i=0; i<dstW; i++) {
00253         int j;
00254         int srcPos= filterPos[i];
00255         int val=0;
00256         for (j=0; j<filterSize; j++) {
00257             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
00258         }
00259         //filter += hFilterSize;
00260         dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
00261         //dst[i] = val>>7;
00262     }
00263 }
00264 
00265 
00266 #define DEBUG_SWSCALE_BUFFERS 0
00267 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
00268 
00269 #if HAVE_MMX
00270 static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
00271                                   int lastInLumBuf, int lastInChrBuf);
00272 #endif
00273 
00274 static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
00275                      int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
00276 {
00277     /* load a few things into local vars to make the code more readable? and faster */
00278     const int srcW= c->srcW;
00279     const int dstW= c->dstW;
00280     const int dstH= c->dstH;
00281     const int chrDstW= c->chrDstW;
00282     const int chrSrcW= c->chrSrcW;
00283     const int lumXInc= c->lumXInc;
00284     const int chrXInc= c->chrXInc;
00285     const enum PixelFormat dstFormat= c->dstFormat;
00286     const int flags= c->flags;
00287     int16_t *vLumFilterPos= c->vLumFilterPos;
00288     int16_t *vChrFilterPos= c->vChrFilterPos;
00289     int16_t *hLumFilterPos= c->hLumFilterPos;
00290     int16_t *hChrFilterPos= c->hChrFilterPos;
00291     int16_t *vLumFilter= c->vLumFilter;
00292     int16_t *vChrFilter= c->vChrFilter;
00293     int16_t *hLumFilter= c->hLumFilter;
00294     int16_t *hChrFilter= c->hChrFilter;
00295     int32_t *lumMmxFilter= c->lumMmxFilter;
00296     int32_t *chrMmxFilter= c->chrMmxFilter;
00297     int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
00298     const int vLumFilterSize= c->vLumFilterSize;
00299     const int vChrFilterSize= c->vChrFilterSize;
00300     const int hLumFilterSize= c->hLumFilterSize;
00301     const int hChrFilterSize= c->hChrFilterSize;
00302     int16_t **lumPixBuf= c->lumPixBuf;
00303     int16_t **chrUPixBuf= c->chrUPixBuf;
00304     int16_t **chrVPixBuf= c->chrVPixBuf;
00305     int16_t **alpPixBuf= c->alpPixBuf;
00306     const int vLumBufSize= c->vLumBufSize;
00307     const int vChrBufSize= c->vChrBufSize;
00308     uint8_t *formatConvBuffer= c->formatConvBuffer;
00309     const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
00310     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
00311     int lastDstY;
00312     uint32_t *pal=c->pal_yuv;
00313 
00314     /* vars which will change and which we need to store back in the context */
00315     int dstY= c->dstY;
00316     int lumBufIndex= c->lumBufIndex;
00317     int chrBufIndex= c->chrBufIndex;
00318     int lastInLumBuf= c->lastInLumBuf;
00319     int lastInChrBuf= c->lastInChrBuf;
00320 
00321     if (isPacked(c->srcFormat)) {
00322         src[0]=
00323         src[1]=
00324         src[2]=
00325         src[3]= src[0];
00326         srcStride[0]=
00327         srcStride[1]=
00328         srcStride[2]=
00329         srcStride[3]= srcStride[0];
00330     }
00331     srcStride[1]<<= c->vChrDrop;
00332     srcStride[2]<<= c->vChrDrop;
00333 
00334     DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
00335                   src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
00336                   dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
00337     DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
00338                    srcSliceY,    srcSliceH,    dstY,    dstH);
00339     DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
00340                    vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
00341 
00342     if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
00343         static int warnedAlready=0; //FIXME move this into the context perhaps
00344         if (flags & SWS_PRINT_INFO && !warnedAlready) {
00345             av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
00346                    "         ->cannot do aligned memory accesses anymore\n");
00347             warnedAlready=1;
00348         }
00349     }
00350 
00351     /* Note the user might start scaling the picture in the middle so this
00352        will not get executed. This is not really intended but works
00353        currently, so people might do it. */
00354     if (srcSliceY ==0) {
00355         lumBufIndex=-1;
00356         chrBufIndex=-1;
00357         dstY=0;
00358         lastInLumBuf= -1;
00359         lastInChrBuf= -1;
00360     }
00361 
00362     lastDstY= dstY;
00363 
00364     for (;dstY < dstH; dstY++) {
00365         unsigned char *dest =dst[0]+dstStride[0]*dstY;
00366         const int chrDstY= dstY>>c->chrDstVSubSample;
00367         unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
00368         unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
00369         unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
00370         const uint8_t *lumDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][dstY   &7] : flat64;
00371         const uint8_t *chrDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][chrDstY&7] : flat64;
00372 
00373         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
00374         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
00375         const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
00376         int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
00377         int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
00378         int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
00379         int enough_lines;
00380 
00381         //handle holes (FAST_BILINEAR & weird filters)
00382         if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
00383         if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
00384         assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
00385         assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
00386 
00387         DEBUG_BUFFERS("dstY: %d\n", dstY);
00388         DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
00389                          firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
00390         DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
00391                          firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
00392 
00393         // Do we have enough lines in this slice to output the dstY line
00394         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
00395 
00396         if (!enough_lines) {
00397             lastLumSrcY = srcSliceY + srcSliceH - 1;
00398             lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
00399             DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
00400                                             lastLumSrcY, lastChrSrcY);
00401         }
00402 
00403         //Do horizontal scaling
00404         while(lastInLumBuf < lastLumSrcY) {
00405             const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
00406             const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
00407             lumBufIndex++;
00408             assert(lumBufIndex < 2*vLumBufSize);
00409             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
00410             assert(lastInLumBuf + 1 - srcSliceY >= 0);
00411             hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
00412                       hLumFilter, hLumFilterPos, hLumFilterSize,
00413                       formatConvBuffer,
00414                       pal, 0);
00415             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
00416                 hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
00417                           lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
00418                           formatConvBuffer,
00419                           pal, 1);
00420             lastInLumBuf++;
00421             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
00422                                lumBufIndex,    lastInLumBuf);
00423         }
00424         while(lastInChrBuf < lastChrSrcY) {
00425             const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
00426             const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
00427             chrBufIndex++;
00428             assert(chrBufIndex < 2*vChrBufSize);
00429             assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
00430             assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
00431             //FIXME replace parameters through context struct (some at least)
00432 
00433             if (c->needs_hcscale)
00434                 hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
00435                           chrDstW, src1, src2, chrSrcW, chrXInc,
00436                           hChrFilter, hChrFilterPos, hChrFilterSize,
00437                           formatConvBuffer, pal);
00438             lastInChrBuf++;
00439             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
00440                                chrBufIndex,    lastInChrBuf);
00441         }
00442         //wrap buf index around to stay inside the ring buffer
00443         if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
00444         if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
00445         if (!enough_lines)
00446             break; //we can't output a dstY line so let's try with the next slice
00447 
00448 #if HAVE_MMX
00449         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
00450 #endif
00451         if (dstY < dstH-2) {
00452             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
00453             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00454             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00455             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
00456             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
00457                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
00458                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
00459                 c->yuv2nv12X(c,
00460                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
00461                              vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00462                              dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
00463             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
00464                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
00465                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
00466                 if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
00467                     yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
00468                                   vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
00469                                   chrVSrcPtr, vChrFilterSize,
00470                                   alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
00471                                   (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
00472                                   dstFormat);
00473                 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
00474                     const int16_t *lumBuf = lumSrcPtr[0];
00475                     const int16_t *chrUBuf= chrUSrcPtr[0];
00476                     const int16_t *chrVBuf= chrVSrcPtr[0];
00477                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
00478                     c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
00479                                 uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
00480                 } else { //General YV12
00481                     c->yuv2yuvX(c,
00482                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
00483                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
00484                                 chrVSrcPtr, vChrFilterSize,
00485                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
00486                 }
00487             } else {
00488                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
00489                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
00490                 if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
00491                     int chrAlpha= vChrFilter[2*dstY+1];
00492                     if(flags & SWS_FULL_CHR_H_INT) {
00493                         yuv2rgbXinC_full(c, //FIXME write a packed1_full function
00494                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
00495                                          vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
00496                                          chrVSrcPtr, vChrFilterSize,
00497                                          alpSrcPtr, dest, dstW, dstY);
00498                     } else {
00499                         c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
00500                                        *chrVSrcPtr, *(chrVSrcPtr+1),
00501                                        alpPixBuf ? *alpSrcPtr : NULL,
00502                                        dest, dstW, chrAlpha, dstFormat, flags, dstY);
00503                     }
00504                 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
00505                     int lumAlpha= vLumFilter[2*dstY+1];
00506                     int chrAlpha= vChrFilter[2*dstY+1];
00507                     lumMmxFilter[2]=
00508                     lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
00509                     chrMmxFilter[2]=
00510                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
00511                     if(flags & SWS_FULL_CHR_H_INT) {
00512                         yuv2rgbXinC_full(c, //FIXME write a packed2_full function
00513                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
00514                                          vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00515                                          alpSrcPtr, dest, dstW, dstY);
00516                     } else {
00517                         c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
00518                                        *chrVSrcPtr, *(chrVSrcPtr+1),
00519                                        alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
00520                                        dest, dstW, lumAlpha, chrAlpha, dstY);
00521                     }
00522                 } else { //general RGB
00523                     if(flags & SWS_FULL_CHR_H_INT) {
00524                         yuv2rgbXinC_full(c,
00525                                          vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
00526                                          vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00527                                          alpSrcPtr, dest, dstW, dstY);
00528                     } else {
00529                         c->yuv2packedX(c,
00530                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
00531                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00532                                        alpSrcPtr, dest, dstW, dstY);
00533                     }
00534                 }
00535             }
00536         } else { // hmm looks like we can't use MMX here without overwriting this array's tail
00537             const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
00538             const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00539             const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
00540             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
00541             if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
00542                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
00543                 if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
00544                 yuv2nv12XinC(
00545                              vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
00546                              vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00547                              dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
00548             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
00549                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
00550                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
00551                 if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
00552                     yuv2yuvX16inC(
00553                                   vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
00554                                   vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00555                                   alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
00556                                   dstFormat);
00557                 } else {
00558                     yuv2yuvXinC(
00559                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
00560                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00561                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
00562                 }
00563             } else {
00564                 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
00565                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
00566                 if(flags & SWS_FULL_CHR_H_INT) {
00567                     yuv2rgbXinC_full(c,
00568                                      vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
00569                                      vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00570                                      alpSrcPtr, dest, dstW, dstY);
00571                 } else {
00572                     yuv2packedXinC(c,
00573                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
00574                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
00575                                    alpSrcPtr, dest, dstW, dstY);
00576                 }
00577             }
00578         }
00579     }
00580 
00581     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
00582         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
00583 
00584 #if HAVE_MMX2
00585     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
00586         __asm__ volatile("sfence":::"memory");
00587 #endif
00588     emms_c();
00589 
00590     /* store changed local vars back in the context */
00591     c->dstY= dstY;
00592     c->lumBufIndex= lumBufIndex;
00593     c->chrBufIndex= chrBufIndex;
00594     c->lastInLumBuf= lastInLumBuf;
00595     c->lastInChrBuf= lastInChrBuf;
00596 
00597     return dstY - lastDstY;
00598 }
00599 
00600 static void sws_init_swScale_c(SwsContext *c)
00601 {
00602     enum PixelFormat srcFormat = c->srcFormat;
00603 
00604     c->yuv2nv12X    = yuv2nv12X_c;
00605     c->yuv2yuv1     = yuv2yuv1_c;
00606     c->yuv2yuvX     = yuv2yuvX_c;
00607     c->yuv2packed1  = yuv2packed1_c;
00608     c->yuv2packed2  = yuv2packed2_c;
00609     c->yuv2packedX  = yuv2packedX_c;
00610 
00611     c->hScale       = hScale_c;
00612 
00613     if (c->flags & SWS_FAST_BILINEAR)
00614     {
00615         c->hyscale_fast = hyscale_fast_c;
00616         c->hcscale_fast = hcscale_fast_c;
00617     }
00618 
00619     c->chrToYV12 = NULL;
00620     switch(srcFormat) {
00621         case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
00622         case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
00623         case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
00624         case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
00625         case PIX_FMT_RGB8     :
00626         case PIX_FMT_BGR8     :
00627         case PIX_FMT_PAL8     :
00628         case PIX_FMT_BGR4_BYTE:
00629         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
00630         case PIX_FMT_GRAY16BE :
00631         case PIX_FMT_YUV420P9BE:
00632         case PIX_FMT_YUV422P10BE:
00633         case PIX_FMT_YUV420P10BE:
00634         case PIX_FMT_YUV420P16BE:
00635         case PIX_FMT_YUV422P16BE:
00636         case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break;
00637         case PIX_FMT_GRAY16LE :
00638         case PIX_FMT_YUV420P9LE:
00639         case PIX_FMT_YUV422P10LE:
00640         case PIX_FMT_YUV420P10LE:
00641         case PIX_FMT_YUV420P16LE:
00642         case PIX_FMT_YUV422P16LE:
00643         case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break;
00644     }
00645     if (c->chrSrcHSubSample) {
00646         switch(srcFormat) {
00647         case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half; break;
00648         case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half; break;
00649         case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half; break;
00650         case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half; break;
00651         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half;  break;
00652         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
00653         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
00654         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
00655         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
00656         case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half;  break;
00657         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
00658         case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_half_c; break;
00659         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
00660         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
00661         }
00662     } else {
00663         switch(srcFormat) {
00664         case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV; break;
00665         case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV; break;
00666         case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV; break;
00667         case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV; break;
00668         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV;  break;
00669         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
00670         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
00671         case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
00672         case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
00673         case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV;  break;
00674         case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
00675         case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_c; break;
00676         case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
00677         case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
00678         }
00679     }
00680 
00681     c->lumToYV12 = NULL;
00682     c->alpToYV12 = NULL;
00683     switch (srcFormat) {
00684     case PIX_FMT_YUYV422  :
00685     case PIX_FMT_GRAY8A   :
00686                             c->lumToYV12 = yuy2ToY_c; break;
00687     case PIX_FMT_UYVY422  :
00688                             c->lumToYV12 = uyvyToY_c; break;
00689     case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c; break;
00690     case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY; break;
00691     case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY; break;
00692     case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c; break;
00693     case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY; break;
00694     case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY; break;
00695     case PIX_FMT_RGB8     :
00696     case PIX_FMT_BGR8     :
00697     case PIX_FMT_PAL8     :
00698     case PIX_FMT_BGR4_BYTE:
00699     case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
00700     case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
00701     case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
00702     case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY;  break;
00703     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
00704     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY;  break;
00705     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
00706     case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY; break;
00707     case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY; break;
00708     case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY; break;
00709     case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY; break;
00710     }
00711     if (c->alpPixBuf) {
00712         switch (srcFormat) {
00713         case PIX_FMT_RGB32  :
00714         case PIX_FMT_RGB32_1:
00715         case PIX_FMT_BGR32  :
00716         case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
00717         case PIX_FMT_GRAY8A : c->alpToYV12 = yuy2ToY_c; break;
00718         case PIX_FMT_PAL8   : c->alpToYV12 = palToA; break;
00719         }
00720     }
00721 
00722     if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8)
00723         c->hScale16= hScale16_c;
00724 
00725     switch (srcFormat) {
00726     case PIX_FMT_GRAY8A :
00727         c->alpSrcOffset = 1;
00728         break;
00729     case PIX_FMT_RGB32  :
00730     case PIX_FMT_BGR32  :
00731         c->alpSrcOffset = 3;
00732         break;
00733     }
00734 
00735     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
00736         if (c->srcRange) {
00737             c->lumConvertRange = lumRangeFromJpeg_c;
00738             c->chrConvertRange = chrRangeFromJpeg_c;
00739         } else {
00740             c->lumConvertRange = lumRangeToJpeg_c;
00741             c->chrConvertRange = chrRangeToJpeg_c;
00742         }
00743     }
00744 
00745     if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
00746           srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
00747         c->needs_hcscale = 1;
00748 }