FFmpeg: libswscale/swscale.c Source File

00001 /*
00002  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 /*
00022   supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
00023   supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
00024   {BGR,RGB}{1,4,8,15,16} support dithering
00025 
00026   unscaled special converters (YV12=I420=IYUV, Y800=Y8)
00027   YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
00028   x -> x
00029   YUV9 -> YV12
00030   YUV9/YV12 -> Y800
00031   Y800 -> YUV9/YV12
00032   BGR24 -> BGR32 & RGB24 -> RGB32
00033   BGR32 -> BGR24 & RGB32 -> RGB24
00034   BGR15 -> BGR16
00035 */
00036 
00037 /*
00038 tested special converters (most are tested actually, but I did not write it down ...)
00039  YV12 -> BGR12/BGR16
00040  YV12 -> YV12
00041  BGR15 -> BGR16
00042  BGR16 -> BGR16
00043  YVU9 -> YV12
00044 
00045 untested special converters
00046   YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
00047   YV12/I420 -> YV12/I420
00048   YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
00049   BGR24 -> BGR32 & RGB24 -> RGB32
00050   BGR32 -> BGR24 & RGB32 -> RGB24
00051   BGR24 -> YV12
00052 */
00053 
00054 #include <inttypes.h>
00055 #include <string.h>
00056 #include <math.h>
00057 #include <stdio.h>
00058 #include "config.h"
00059 #include <assert.h>
00060 #include "swscale.h"
00061 #include "swscale_internal.h"
00062 #include "rgb2rgb.h"
00063 #include "libavutil/avassert.h"
00064 #include "libavutil/intreadwrite.h"
00065 #include "libavutil/cpu.h"
00066 #include "libavutil/avutil.h"
00067 #include "libavutil/mathematics.h"
00068 #include "libavutil/bswap.h"
00069 #include "libavutil/pixdesc.h"
00070 
00071 
00072 #define RGB2YUV_SHIFT 15
00073 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00074 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00075 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00076 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00077 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00078 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00079 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00080 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00081 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00082 
00083 /*
00084 NOTES
00085 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
00086 
00087 TODO
00088 more intelligent misalignment avoidance for the horizontal scaler
00089 write special vertical cubic upscale version
00090 optimize C code (YV12 / minmax)
00091 add support for packed pixel YUV input & output
00092 add support for Y8 output
00093 optimize BGR24 & BGR32
00094 add BGR4 output support
00095 write special BGR->BGR scaler
00096 */
00097 
00098 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
00099 {  1,   3,   1,   3,   1,   3,   1,   3, },
00100 {  2,   0,   2,   0,   2,   0,   2,   0, },
00101 };
00102 
00103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
00104 {  6,   2,   6,   2,   6,   2,   6,   2, },
00105 {  0,   4,   0,   4,   0,   4,   0,   4, },
00106 };
00107 
00108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
00109 {  8,   4,  11,   7,   8,   4,  11,   7, },
00110 {  2,  14,   1,  13,   2,  14,   1,  13, },
00111 { 10,   6,   9,   5,  10,   6,   9,   5, },
00112 {  0,  12,   3,  15,   0,  12,   3,  15, },
00113 };
00114 
00115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
00116 { 17,   9,  23,  15,  16,   8,  22,  14, },
00117 {  5,  29,   3,  27,   4,  28,   2,  26, },
00118 { 21,  13,  19,  11,  20,  12,  18,  10, },
00119 {  0,  24,   6,  30,   1,  25,   7,  31, },
00120 { 16,   8,  22,  14,  17,   9,  23,  15, },
00121 {  4,  28,   2,  26,   5,  29,   3,  27, },
00122 { 20,  12,  18,  10,  21,  13,  19,  11, },
00123 {  1,  25,   7,  31,   0,  24,   6,  30, },
00124 };
00125 
00126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
00127 {  0,  55,  14,  68,   3,  58,  17,  72, },
00128 { 37,  18,  50,  32,  40,  22,  54,  35, },
00129 {  9,  64,   5,  59,  13,  67,   8,  63, },
00130 { 46,  27,  41,  23,  49,  31,  44,  26, },
00131 {  2,  57,  16,  71,   1,  56,  15,  70, },
00132 { 39,  21,  52,  34,  38,  19,  51,  33, },
00133 { 11,  66,   7,  62,  10,  65,   6,  60, },
00134 { 48,  30,  43,  25,  47,  29,  42,  24, },
00135 };
00136 
00137 #if 1
00138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00139 {117,  62, 158, 103, 113,  58, 155, 100, },
00140 { 34, 199,  21, 186,  31, 196,  17, 182, },
00141 {144,  89, 131,  76, 141,  86, 127,  72, },
00142 {  0, 165,  41, 206,  10, 175,  52, 217, },
00143 {110,  55, 151,  96, 120,  65, 162, 107, },
00144 { 28, 193,  14, 179,  38, 203,  24, 189, },
00145 {138,  83, 124,  69, 148,  93, 134,  79, },
00146 {  7, 172,  48, 213,   3, 168,  45, 210, },
00147 };
00148 #elif 1
00149 // tries to correct a gamma of 1.5
00150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00151 {  0, 143,  18, 200,   2, 156,  25, 215, },
00152 { 78,  28, 125,  64,  89,  36, 138,  74, },
00153 { 10, 180,   3, 161,  16, 195,   8, 175, },
00154 {109,  51,  93,  38, 121,  60, 105,  47, },
00155 {  1, 152,  23, 210,   0, 147,  20, 205, },
00156 { 85,  33, 134,  71,  81,  30, 130,  67, },
00157 { 14, 190,   6, 171,  12, 185,   5, 166, },
00158 {117,  57, 101,  44, 113,  54,  97,  41, },
00159 };
00160 #elif 1
00161 // tries to correct a gamma of 2.0
00162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00163 {  0, 124,   8, 193,   0, 140,  12, 213, },
00164 { 55,  14, 104,  42,  66,  19, 119,  52, },
00165 {  3, 168,   1, 145,   6, 187,   3, 162, },
00166 { 86,  31,  70,  21,  99,  39,  82,  28, },
00167 {  0, 134,  11, 206,   0, 129,   9, 200, },
00168 { 62,  17, 114,  48,  58,  16, 109,  45, },
00169 {  5, 181,   2, 157,   4, 175,   1, 151, },
00170 { 95,  36,  78,  26,  90,  34,  74,  24, },
00171 };
00172 #else
00173 // tries to correct a gamma of 2.5
00174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00175 {  0, 107,   3, 187,   0, 125,   6, 212, },
00176 { 39,   7,  86,  28,  49,  11, 102,  36, },
00177 {  1, 158,   0, 131,   3, 180,   1, 151, },
00178 { 68,  19,  52,  12,  81,  25,  64,  17, },
00179 {  0, 119,   5, 203,   0, 113,   4, 195, },
00180 { 45,   9,  96,  33,  42,   8,  91,  30, },
00181 {  2, 172,   1, 144,   2, 165,   0, 137, },
00182 { 77,  23,  60,  15,  72,  21,  56,  14, },
00183 };
00184 #endif
00185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
00186 {  36, 68, 60, 92, 34, 66, 58, 90,},
00187 { 100,  4,124, 28, 98,  2,122, 26,},
00188 {  52, 84, 44, 76, 50, 82, 42, 74,},
00189 { 116, 20,108, 12,114, 18,106, 10,},
00190 {  32, 64, 56, 88, 38, 70, 62, 94,},
00191 {  96,  0,120, 24,102,  6,126, 30,},
00192 {  48, 80, 40, 72, 54, 86, 46, 78,},
00193 { 112, 16,104,  8,118, 22,110, 14,},
00194 };
00195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
00196 {  64, 64, 64, 64, 64, 64, 64, 64 };
00197 
00198 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
00199 {
00200   {   0,  1,  0,  1,  0,  1,  0,  1,},
00201   {   1,  0,  1,  0,  1,  0,  1,  0,},
00202   {   0,  1,  0,  1,  0,  1,  0,  1,},
00203   {   1,  0,  1,  0,  1,  0,  1,  0,},
00204   {   0,  1,  0,  1,  0,  1,  0,  1,},
00205   {   1,  0,  1,  0,  1,  0,  1,  0,},
00206   {   0,  1,  0,  1,  0,  1,  0,  1,},
00207   {   1,  0,  1,  0,  1,  0,  1,  0,},
00208 },{
00209   {   1,  2,  1,  2,  1,  2,  1,  2,},
00210   {   3,  0,  3,  0,  3,  0,  3,  0,},
00211   {   1,  2,  1,  2,  1,  2,  1,  2,},
00212   {   3,  0,  3,  0,  3,  0,  3,  0,},
00213   {   1,  2,  1,  2,  1,  2,  1,  2,},
00214   {   3,  0,  3,  0,  3,  0,  3,  0,},
00215   {   1,  2,  1,  2,  1,  2,  1,  2,},
00216   {   3,  0,  3,  0,  3,  0,  3,  0,},
00217 },{
00218   {   2,  4,  3,  5,  2,  4,  3,  5,},
00219   {   6,  0,  7,  1,  6,  0,  7,  1,},
00220   {   3,  5,  2,  4,  3,  5,  2,  4,},
00221   {   7,  1,  6,  0,  7,  1,  6,  0,},
00222   {   2,  4,  3,  5,  2,  4,  3,  5,},
00223   {   6,  0,  7,  1,  6,  0,  7,  1,},
00224   {   3,  5,  2,  4,  3,  5,  2,  4,},
00225   {   7,  1,  6,  0,  7,  1,  6,  0,},
00226 },{
00227   {   4,  8,  7, 11,  4,  8,  7, 11,},
00228   {  12,  0, 15,  3, 12,  0, 15,  3,},
00229   {   6, 10,  5,  9,  6, 10,  5,  9,},
00230   {  14,  2, 13,  1, 14,  2, 13,  1,},
00231   {   4,  8,  7, 11,  4,  8,  7, 11,},
00232   {  12,  0, 15,  3, 12,  0, 15,  3,},
00233   {   6, 10,  5,  9,  6, 10,  5,  9,},
00234   {  14,  2, 13,  1, 14,  2, 13,  1,},
00235 },{
00236   {   9, 17, 15, 23,  8, 16, 14, 22,},
00237   {  25,  1, 31,  7, 24,  0, 30,  6,},
00238   {  13, 21, 11, 19, 12, 20, 10, 18,},
00239   {  29,  5, 27,  3, 28,  4, 26,  2,},
00240   {   8, 16, 14, 22,  9, 17, 15, 23,},
00241   {  24,  0, 30,  6, 25,  1, 31,  7,},
00242   {  12, 20, 10, 18, 13, 21, 11, 19,},
00243   {  28,  4, 26,  2, 29,  5, 27,  3,},
00244 },{
00245   {  18, 34, 30, 46, 17, 33, 29, 45,},
00246   {  50,  2, 62, 14, 49,  1, 61, 13,},
00247   {  26, 42, 22, 38, 25, 41, 21, 37,},
00248   {  58, 10, 54,  6, 57,  9, 53,  5,},
00249   {  16, 32, 28, 44, 19, 35, 31, 47,},
00250   {  48,  0, 60, 12, 51,  3, 63, 15,},
00251   {  24, 40, 20, 36, 27, 43, 23, 39,},
00252   {  56,  8, 52,  4, 59, 11, 55,  7,},
00253 },{
00254   {  18, 34, 30, 46, 17, 33, 29, 45,},
00255   {  50,  2, 62, 14, 49,  1, 61, 13,},
00256   {  26, 42, 22, 38, 25, 41, 21, 37,},
00257   {  58, 10, 54,  6, 57,  9, 53,  5,},
00258   {  16, 32, 28, 44, 19, 35, 31, 47,},
00259   {  48,  0, 60, 12, 51,  3, 63, 15,},
00260   {  24, 40, 20, 36, 27, 43, 23, 39,},
00261   {  56,  8, 52,  4, 59, 11, 55,  7,},
00262 },{
00263   {  36, 68, 60, 92, 34, 66, 58, 90,},
00264   { 100,  4,124, 28, 98,  2,122, 26,},
00265   {  52, 84, 44, 76, 50, 82, 42, 74,},
00266   { 116, 20,108, 12,114, 18,106, 10,},
00267   {  32, 64, 56, 88, 38, 70, 62, 94,},
00268   {  96,  0,120, 24,102,  6,126, 30,},
00269   {  48, 80, 40, 72, 54, 86, 46, 78,},
00270   { 112, 16,104,  8,118, 22,110, 14,},
00271 }};
00272 
00273 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
00274 
00275 const uint16_t dither_scale[15][16]={
00276 {    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,},
00277 {    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   25,   25,   25,   25,},
00278 {    3,    3,    4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  113,  113,  113,  113,},
00279 {    3,    4,    4,    5,   31,   31,   61,  121,  241,  241,  241,  241,  481,  481,  481,  481,},
00280 {    3,    4,    5,    5,    6,   63,   63,  125,  249,  497,  993,  993,  993,  993,  993, 1985,},
00281 {    3,    5,    6,    6,    6,    7,  127,  127,  253,  505, 1009, 2017, 4033, 4033, 4033, 4033,},
00282 {    3,    5,    6,    7,    7,    7,    8,  255,  255,  509, 1017, 2033, 4065, 8129,16257,16257,},
00283 {    3,    5,    6,    8,    8,    8,    8,    9,  511,  511, 1021, 2041, 4081, 8161,16321,32641,},
00284 {    3,    5,    7,    8,    9,    9,    9,    9,   10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
00285 {    3,    5,    7,    8,   10,   10,   10,   10,   10,   11, 2047, 2047, 4093, 8185,16369,32737,},
00286 {    3,    5,    7,    8,   10,   11,   11,   11,   11,   11,   12, 4095, 4095, 8189,16377,32753,},
00287 {    3,    5,    7,    9,   10,   12,   12,   12,   12,   12,   12,   13, 8191, 8191,16381,32761,},
00288 {    3,    5,    7,    9,   10,   12,   13,   13,   13,   13,   13,   13,   14,16383,16383,32765,},
00289 {    3,    5,    7,    9,   10,   12,   14,   14,   14,   14,   14,   14,   14,   15,32767,32767,},
00290 {    3,    5,    7,    9,   11,   12,   14,   15,   15,   15,   15,   15,   15,   15,   16,65535,},
00291 };
00292 
00293 #define output_pixel(pos, val, bias, signedness) \
00294     if (big_endian) { \
00295         AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
00296     } else { \
00297         AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
00298     }
00299 
00300 static av_always_inline void
00301 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
00302                          int big_endian, int output_bits)
00303 {
00304     int i;
00305     int shift = 3;
00306     av_assert0(output_bits == 16);
00307 
00308     for (i = 0; i < dstW; i++) {
00309         int val = src[i] + (1 << (shift - 1));
00310         output_pixel(&dest[i], val, 0, uint);
00311     }
00312 }
00313 
00314 static av_always_inline void
00315 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
00316                          const int32_t **src, uint16_t *dest, int dstW,
00317                          int big_endian, int output_bits)
00318 {
00319     int i;
00320     int shift = 15;
00321     av_assert0(output_bits == 16);
00322 
00323     for (i = 0; i < dstW; i++) {
00324         int val = 1 << (shift - 1);
00325         int j;
00326 
00327         /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
00328          * filters (or anything with negative coeffs, the range can be slightly
00329          * wider in both directions. To account for this overflow, we subtract
00330          * a constant so it always fits in the signed range (assuming a
00331          * reasonable filterSize), and re-add that at the end. */
00332         val -= 0x40000000;
00333         for (j = 0; j < filterSize; j++)
00334             val += src[j][i] * filter[j];
00335 
00336         output_pixel(&dest[i], val, 0x8000, int);
00337     }
00338 }
00339 
00340 #undef output_pixel
00341 
00342 #define output_pixel(pos, val) \
00343     if (big_endian) { \
00344         AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00345     } else { \
00346         AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00347     }
00348 
00349 static av_always_inline void
00350 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
00351                          int big_endian, int output_bits)
00352 {
00353     int i;
00354     int shift = 15 - output_bits;
00355 
00356     for (i = 0; i < dstW; i++) {
00357         int val = src[i] + (1 << (shift - 1));
00358         output_pixel(&dest[i], val);
00359     }
00360 }
00361 
00362 static av_always_inline void
00363 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
00364                          const int16_t **src, uint16_t *dest, int dstW,
00365                          int big_endian, int output_bits)
00366 {
00367     int i;
00368     int shift = 11 + 16 - output_bits;
00369 
00370     for (i = 0; i < dstW; i++) {
00371         int val = 1 << (shift - 1);
00372         int j;
00373 
00374         for (j = 0; j < filterSize; j++)
00375             val += src[j][i] * filter[j];
00376 
00377         output_pixel(&dest[i], val);
00378     }
00379 }
00380 
00381 #undef output_pixel
00382 
00383 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
00384 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
00385                               uint8_t *dest, int dstW, \
00386                               const uint8_t *dither, int offset)\
00387 { \
00388     yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
00389                          (uint16_t *) dest, dstW, is_be, bits); \
00390 }\
00391 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
00392                               const int16_t **src, uint8_t *dest, int dstW, \
00393                               const uint8_t *dither, int offset)\
00394 { \
00395     yuv2planeX_## template_size ## _c_template(filter, \
00396                          filterSize, (const typeX_t **) src, \
00397                          (uint16_t *) dest, dstW, is_be, bits); \
00398 }
00399 yuv2NBPS( 9, BE, 1, 10, int16_t);
00400 yuv2NBPS( 9, LE, 0, 10, int16_t);
00401 yuv2NBPS(10, BE, 1, 10, int16_t);
00402 yuv2NBPS(10, LE, 0, 10, int16_t);
00403 yuv2NBPS(16, BE, 1, 16, int32_t);
00404 yuv2NBPS(16, LE, 0, 16, int32_t);
00405 
00406 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
00407                            const int16_t **src, uint8_t *dest, int dstW,
00408                            const uint8_t *dither, int offset)
00409 {
00410     int i;
00411     for (i=0; i<dstW; i++) {
00412         int val = dither[(i + offset) & 7] << 12;
00413         int j;
00414         for (j=0; j<filterSize; j++)
00415             val += src[j][i] * filter[j];
00416 
00417         dest[i]= av_clip_uint8(val>>19);
00418     }
00419 }
00420 
00421 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
00422                            const uint8_t *dither, int offset)
00423 {
00424     int i;
00425     for (i=0; i<dstW; i++) {
00426         int val = (src[i] + dither[(i + offset) & 7]) >> 7;
00427         dest[i]= av_clip_uint8(val);
00428     }
00429 }
00430 
00431 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
00432                         const int16_t **chrUSrc, const int16_t **chrVSrc,
00433                         uint8_t *dest, int chrDstW)
00434 {
00435     enum PixelFormat dstFormat = c->dstFormat;
00436     const uint8_t *chrDither = c->chrDither8;
00437     int i;
00438 
00439     if (dstFormat == PIX_FMT_NV12)
00440         for (i=0; i<chrDstW; i++) {
00441             int u = chrDither[i & 7] << 12;
00442             int v = chrDither[(i + 3) & 7] << 12;
00443             int j;
00444             for (j=0; j<chrFilterSize; j++) {
00445                 u += chrUSrc[j][i] * chrFilter[j];
00446                 v += chrVSrc[j][i] * chrFilter[j];
00447             }
00448 
00449             dest[2*i]= av_clip_uint8(u>>19);
00450             dest[2*i+1]= av_clip_uint8(v>>19);
00451         }
00452     else
00453         for (i=0; i<chrDstW; i++) {
00454             int u = chrDither[i & 7] << 12;
00455             int v = chrDither[(i + 3) & 7] << 12;
00456             int j;
00457             for (j=0; j<chrFilterSize; j++) {
00458                 u += chrUSrc[j][i] * chrFilter[j];
00459                 v += chrVSrc[j][i] * chrFilter[j];
00460             }
00461 
00462             dest[2*i]= av_clip_uint8(v>>19);
00463             dest[2*i+1]= av_clip_uint8(u>>19);
00464         }
00465 }
00466 
00467 #define output_pixel(pos, val) \
00468         if (target == PIX_FMT_GRAY16BE) { \
00469             AV_WB16(pos, val); \
00470         } else { \
00471             AV_WL16(pos, val); \
00472         }
00473 
00474 static av_always_inline void
00475 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
00476                         const int32_t **lumSrc, int lumFilterSize,
00477                         const int16_t *chrFilter, const int32_t **chrUSrc,
00478                         const int32_t **chrVSrc, int chrFilterSize,
00479                         const int32_t **alpSrc, uint16_t *dest, int dstW,
00480                         int y, enum PixelFormat target)
00481 {
00482     int i;
00483 
00484     for (i = 0; i < (dstW >> 1); i++) {
00485         int j;
00486         int Y1 = 1 << 14;
00487         int Y2 = 1 << 14;
00488 
00489         for (j = 0; j < lumFilterSize; j++) {
00490             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00491             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00492         }
00493         Y1 >>= 15;
00494         Y2 >>= 15;
00495         if ((Y1 | Y2) & 0x10000) {
00496             Y1 = av_clip_uint16(Y1);
00497             Y2 = av_clip_uint16(Y2);
00498         }
00499         output_pixel(&dest[i * 2 + 0], Y1);
00500         output_pixel(&dest[i * 2 + 1], Y2);
00501     }
00502 }
00503 
00504 static av_always_inline void
00505 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
00506                         const int32_t *ubuf[2], const int32_t *vbuf[2],
00507                         const int32_t *abuf[2], uint16_t *dest, int dstW,
00508                         int yalpha, int uvalpha, int y,
00509                         enum PixelFormat target)
00510 {
00511     int  yalpha1 = 4095 - yalpha;
00512     int i;
00513     const int32_t *buf0 = buf[0], *buf1 = buf[1];
00514 
00515     for (i = 0; i < (dstW >> 1); i++) {
00516         int Y1 = (buf0[i * 2    ] * yalpha1 + buf1[i * 2    ] * yalpha) >> 15;
00517         int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
00518 
00519         output_pixel(&dest[i * 2 + 0], Y1);
00520         output_pixel(&dest[i * 2 + 1], Y2);
00521     }
00522 }
00523 
00524 static av_always_inline void
00525 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
00526                         const int32_t *ubuf[2], const int32_t *vbuf[2],
00527                         const int32_t *abuf0, uint16_t *dest, int dstW,
00528                         int uvalpha, int y, enum PixelFormat target)
00529 {
00530     int i;
00531 
00532     for (i = 0; i < (dstW >> 1); i++) {
00533         int Y1 = (buf0[i * 2    ]+4)>>3;
00534         int Y2 = (buf0[i * 2 + 1]+4)>>3;
00535 
00536         output_pixel(&dest[i * 2 + 0], Y1);
00537         output_pixel(&dest[i * 2 + 1], Y2);
00538     }
00539 }
00540 
00541 #undef output_pixel
00542 
00543 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
00544 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
00545                         const int16_t **_lumSrc, int lumFilterSize, \
00546                         const int16_t *chrFilter, const int16_t **_chrUSrc, \
00547                         const int16_t **_chrVSrc, int chrFilterSize, \
00548                         const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
00549                         int y) \
00550 { \
00551     const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
00552                   **chrUSrc = (const int32_t **) _chrUSrc, \
00553                   **chrVSrc = (const int32_t **) _chrVSrc, \
00554                   **alpSrc  = (const int32_t **) _alpSrc; \
00555     uint16_t *dest = (uint16_t *) _dest; \
00556     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
00557                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00558                           alpSrc, dest, dstW, y, fmt); \
00559 } \
00560  \
00561 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
00562                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
00563                         const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
00564                         int yalpha, int uvalpha, int y) \
00565 { \
00566     const int32_t **buf  = (const int32_t **) _buf, \
00567                   **ubuf = (const int32_t **) _ubuf, \
00568                   **vbuf = (const int32_t **) _vbuf, \
00569                   **abuf = (const int32_t **) _abuf; \
00570     uint16_t *dest = (uint16_t *) _dest; \
00571     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
00572                           dest, dstW, yalpha, uvalpha, y, fmt); \
00573 } \
00574  \
00575 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
00576                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
00577                         const int16_t *_abuf0, uint8_t *_dest, int dstW, \
00578                         int uvalpha, int y) \
00579 { \
00580     const int32_t *buf0  = (const int32_t *)  _buf0, \
00581                  **ubuf  = (const int32_t **) _ubuf, \
00582                  **vbuf  = (const int32_t **) _vbuf, \
00583                   *abuf0 = (const int32_t *)  _abuf0; \
00584     uint16_t *dest = (uint16_t *) _dest; \
00585     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
00586                                   dstW, uvalpha, y, fmt); \
00587 }
00588 
00589 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
00590 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
00591 
00592 #define output_pixel(pos, acc) \
00593     if (target == PIX_FMT_MONOBLACK) { \
00594         pos = acc; \
00595     } else { \
00596         pos = ~acc; \
00597     }
00598 
00599 static av_always_inline void
00600 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
00601                       const int16_t **lumSrc, int lumFilterSize,
00602                       const int16_t *chrFilter, const int16_t **chrUSrc,
00603                       const int16_t **chrVSrc, int chrFilterSize,
00604                       const int16_t **alpSrc, uint8_t *dest, int dstW,
00605                       int y, enum PixelFormat target)
00606 {
00607     const uint8_t * const d128=dither_8x8_220[y&7];
00608     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00609     int i;
00610     unsigned acc = 0;
00611 
00612     for (i = 0; i < dstW - 1; i += 2) {
00613         int j;
00614         int Y1 = 1 << 18;
00615         int Y2 = 1 << 18;
00616 
00617         for (j = 0; j < lumFilterSize; j++) {
00618             Y1 += lumSrc[j][i]   * lumFilter[j];
00619             Y2 += lumSrc[j][i+1] * lumFilter[j];
00620         }
00621         Y1 >>= 19;
00622         Y2 >>= 19;
00623         if ((Y1 | Y2) & 0x100) {
00624             Y1 = av_clip_uint8(Y1);
00625             Y2 = av_clip_uint8(Y2);
00626         }
00627         acc += acc + g[Y1 + d128[(i + 0) & 7]];
00628         acc += acc + g[Y2 + d128[(i + 1) & 7]];
00629         if ((i & 7) == 6) {
00630             output_pixel(*dest++, acc);
00631         }
00632     }
00633 }
00634 
00635 static av_always_inline void
00636 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
00637                       const int16_t *ubuf[2], const int16_t *vbuf[2],
00638                       const int16_t *abuf[2], uint8_t *dest, int dstW,
00639                       int yalpha, int uvalpha, int y,
00640                       enum PixelFormat target)
00641 {
00642     const int16_t *buf0  = buf[0],  *buf1  = buf[1];
00643     const uint8_t * const d128 = dither_8x8_220[y & 7];
00644     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00645     int  yalpha1 = 4095 - yalpha;
00646     int i;
00647 
00648     for (i = 0; i < dstW - 7; i += 8) {
00649         int acc =    g[((buf0[i    ] * yalpha1 + buf1[i    ] * yalpha) >> 19) + d128[0]];
00650         acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
00651         acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
00652         acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
00653         acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
00654         acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
00655         acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
00656         acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
00657         output_pixel(*dest++, acc);
00658     }
00659 }
00660 
00661 static av_always_inline void
00662 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
00663                       const int16_t *ubuf[2], const int16_t *vbuf[2],
00664                       const int16_t *abuf0, uint8_t *dest, int dstW,
00665                       int uvalpha, int y, enum PixelFormat target)
00666 {
00667     const uint8_t * const d128 = dither_8x8_220[y & 7];
00668     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00669     int i;
00670 
00671     for (i = 0; i < dstW - 7; i += 8) {
00672         int acc =    g[(buf0[i    ] >> 7) + d128[0]];
00673         acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
00674         acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
00675         acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
00676         acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
00677         acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
00678         acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
00679         acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
00680         output_pixel(*dest++, acc);
00681     }
00682 }
00683 
00684 #undef output_pixel
00685 
00686 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
00687 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
00688                                 const int16_t **lumSrc, int lumFilterSize, \
00689                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
00690                                 const int16_t **chrVSrc, int chrFilterSize, \
00691                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
00692                                 int y) \
00693 { \
00694     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
00695                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00696                                   alpSrc, dest, dstW, y, fmt); \
00697 } \
00698  \
00699 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
00700                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
00701                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
00702                                 int yalpha, int uvalpha, int y) \
00703 { \
00704     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
00705                                   dest, dstW, yalpha, uvalpha, y, fmt); \
00706 } \
00707  \
00708 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
00709                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
00710                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
00711                                 int uvalpha, int y) \
00712 { \
00713     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
00714                                   abuf0, dest, dstW, uvalpha, \
00715                                   y, fmt); \
00716 }
00717 
00718 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
00719 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
00720 
00721 #define output_pixels(pos, Y1, U, Y2, V) \
00722     if (target == PIX_FMT_YUYV422) { \
00723         dest[pos + 0] = Y1; \
00724         dest[pos + 1] = U;  \
00725         dest[pos + 2] = Y2; \
00726         dest[pos + 3] = V;  \
00727     } else { \
00728         dest[pos + 0] = U;  \
00729         dest[pos + 1] = Y1; \
00730         dest[pos + 2] = V;  \
00731         dest[pos + 3] = Y2; \
00732     }
00733 
00734 static av_always_inline void
00735 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
00736                      const int16_t **lumSrc, int lumFilterSize,
00737                      const int16_t *chrFilter, const int16_t **chrUSrc,
00738                      const int16_t **chrVSrc, int chrFilterSize,
00739                      const int16_t **alpSrc, uint8_t *dest, int dstW,
00740                      int y, enum PixelFormat target)
00741 {
00742     int i;
00743 
00744     for (i = 0; i < (dstW >> 1); i++) {
00745         int j;
00746         int Y1 = 1 << 18;
00747         int Y2 = 1 << 18;
00748         int U  = 1 << 18;
00749         int V  = 1 << 18;
00750 
00751         for (j = 0; j < lumFilterSize; j++) {
00752             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00753             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00754         }
00755         for (j = 0; j < chrFilterSize; j++) {
00756             U += chrUSrc[j][i] * chrFilter[j];
00757             V += chrVSrc[j][i] * chrFilter[j];
00758         }
00759         Y1 >>= 19;
00760         Y2 >>= 19;
00761         U  >>= 19;
00762         V  >>= 19;
00763         if ((Y1 | Y2 | U | V) & 0x100) {
00764             Y1 = av_clip_uint8(Y1);
00765             Y2 = av_clip_uint8(Y2);
00766             U  = av_clip_uint8(U);
00767             V  = av_clip_uint8(V);
00768         }
00769         output_pixels(4*i, Y1, U, Y2, V);
00770     }
00771 }
00772 
00773 static av_always_inline void
00774 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
00775                      const int16_t *ubuf[2], const int16_t *vbuf[2],
00776                      const int16_t *abuf[2], uint8_t *dest, int dstW,
00777                      int yalpha, int uvalpha, int y,
00778                      enum PixelFormat target)
00779 {
00780     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
00781                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00782                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00783     int  yalpha1 = 4095 - yalpha;
00784     int uvalpha1 = 4095 - uvalpha;
00785     int i;
00786 
00787     for (i = 0; i < (dstW >> 1); i++) {
00788         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
00789         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
00790         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
00791         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
00792 
00793         output_pixels(i * 4, Y1, U, Y2, V);
00794     }
00795 }
00796 
00797 static av_always_inline void
00798 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
00799                      const int16_t *ubuf[2], const int16_t *vbuf[2],
00800                      const int16_t *abuf0, uint8_t *dest, int dstW,
00801                      int uvalpha, int y, enum PixelFormat target)
00802 {
00803     const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00804                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00805     int i;
00806 
00807     if (uvalpha < 2048) {
00808         for (i = 0; i < (dstW >> 1); i++) {
00809             int Y1 = buf0[i * 2]     >> 7;
00810             int Y2 = buf0[i * 2 + 1] >> 7;
00811             int U  = ubuf1[i]        >> 7;
00812             int V  = vbuf1[i]        >> 7;
00813 
00814             output_pixels(i * 4, Y1, U, Y2, V);
00815         }
00816     } else {
00817         for (i = 0; i < (dstW >> 1); i++) {
00818             int Y1 =  buf0[i * 2]          >> 7;
00819             int Y2 =  buf0[i * 2 + 1]      >> 7;
00820             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
00821             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
00822 
00823             output_pixels(i * 4, Y1, U, Y2, V);
00824         }
00825     }
00826 }
00827 
00828 #undef output_pixels
00829 
00830 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
00831 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
00832 
00833 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
00834 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
00835 #define output_pixel(pos, val) \
00836     if (isBE(target)) { \
00837         AV_WB16(pos, val); \
00838     } else { \
00839         AV_WL16(pos, val); \
00840     }
00841 
00842 static av_always_inline void
00843 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
00844                        const int32_t **lumSrc, int lumFilterSize,
00845                        const int16_t *chrFilter, const int32_t **chrUSrc,
00846                        const int32_t **chrVSrc, int chrFilterSize,
00847                        const int32_t **alpSrc, uint16_t *dest, int dstW,
00848                        int y, enum PixelFormat target)
00849 {
00850     int i;
00851 
00852     for (i = 0; i < (dstW >> 1); i++) {
00853         int j;
00854         int Y1 = 0;
00855         int Y2 = 0;
00856         int U  = -128 << 23; // 19
00857         int V  = -128 << 23;
00858         int R, G, B;
00859 
00860         for (j = 0; j < lumFilterSize; j++) {
00861             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00862             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00863         }
00864         for (j = 0; j < chrFilterSize; j++) {
00865             U += chrUSrc[j][i] * chrFilter[j];
00866             V += chrVSrc[j][i] * chrFilter[j];
00867         }
00868 
00869         // 8bit: 12+15=27; 16-bit: 12+19=31
00870         Y1 >>= 14; // 10
00871         Y2 >>= 14;
00872         U  >>= 14;
00873         V  >>= 14;
00874 
00875         // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
00876         Y1 -= c->yuv2rgb_y_offset;
00877         Y2 -= c->yuv2rgb_y_offset;
00878         Y1 *= c->yuv2rgb_y_coeff;
00879         Y2 *= c->yuv2rgb_y_coeff;
00880         Y1 += 1 << 13; // 21
00881         Y2 += 1 << 13;
00882         // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
00883 
00884         R = V * c->yuv2rgb_v2r_coeff;
00885         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00886         B =                            U * c->yuv2rgb_u2b_coeff;
00887 
00888         // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
00889         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00890         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00891         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00892         output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00893         output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00894         output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00895         dest += 6;
00896     }
00897 }
00898 
00899 static av_always_inline void
00900 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
00901                        const int32_t *ubuf[2], const int32_t *vbuf[2],
00902                        const int32_t *abuf[2], uint16_t *dest, int dstW,
00903                        int yalpha, int uvalpha, int y,
00904                        enum PixelFormat target)
00905 {
00906     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
00907                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00908                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00909     int  yalpha1 = 4095 - yalpha;
00910     int uvalpha1 = 4095 - uvalpha;
00911     int i;
00912 
00913     for (i = 0; i < (dstW >> 1); i++) {
00914         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
00915         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
00916         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
00917         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
00918         int R, G, B;
00919 
00920         Y1 -= c->yuv2rgb_y_offset;
00921         Y2 -= c->yuv2rgb_y_offset;
00922         Y1 *= c->yuv2rgb_y_coeff;
00923         Y2 *= c->yuv2rgb_y_coeff;
00924         Y1 += 1 << 13;
00925         Y2 += 1 << 13;
00926 
00927         R = V * c->yuv2rgb_v2r_coeff;
00928         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00929         B =                            U * c->yuv2rgb_u2b_coeff;
00930 
00931         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00932         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00933         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00934         output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00935         output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00936         output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00937         dest += 6;
00938     }
00939 }
00940 
00941 static av_always_inline void
00942 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
00943                        const int32_t *ubuf[2], const int32_t *vbuf[2],
00944                        const int32_t *abuf0, uint16_t *dest, int dstW,
00945                        int uvalpha, int y, enum PixelFormat target)
00946 {
00947     const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00948                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00949     int i;
00950 
00951     if (uvalpha < 2048) {
00952         for (i = 0; i < (dstW >> 1); i++) {
00953             int Y1 = (buf0[i * 2]    ) >> 2;
00954             int Y2 = (buf0[i * 2 + 1]) >> 2;
00955             int U  = (ubuf0[i] + (-128 << 11)) >> 2;
00956             int V  = (vbuf0[i] + (-128 << 11)) >> 2;
00957             int R, G, B;
00958 
00959             Y1 -= c->yuv2rgb_y_offset;
00960             Y2 -= c->yuv2rgb_y_offset;
00961             Y1 *= c->yuv2rgb_y_coeff;
00962             Y2 *= c->yuv2rgb_y_coeff;
00963             Y1 += 1 << 13;
00964             Y2 += 1 << 13;
00965 
00966             R = V * c->yuv2rgb_v2r_coeff;
00967             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00968             B =                            U * c->yuv2rgb_u2b_coeff;
00969 
00970             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00971             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00972             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00973             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00974             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00975             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00976             dest += 6;
00977         }
00978     } else {
00979         for (i = 0; i < (dstW >> 1); i++) {
00980             int Y1 = (buf0[i * 2]    ) >> 2;
00981             int Y2 = (buf0[i * 2 + 1]) >> 2;
00982             int U  = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
00983             int V  = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
00984             int R, G, B;
00985 
00986             Y1 -= c->yuv2rgb_y_offset;
00987             Y2 -= c->yuv2rgb_y_offset;
00988             Y1 *= c->yuv2rgb_y_coeff;
00989             Y2 *= c->yuv2rgb_y_coeff;
00990             Y1 += 1 << 13;
00991             Y2 += 1 << 13;
00992 
00993             R = V * c->yuv2rgb_v2r_coeff;
00994             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00995             B =                            U * c->yuv2rgb_u2b_coeff;
00996 
00997             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00998             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00999             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
01000             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
01001             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
01002             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
01003             dest += 6;
01004         }
01005     }
01006 }
01007 
01008 #undef output_pixel
01009 #undef r_b
01010 #undef b_r
01011 
01012 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
01013 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
01014 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
01015 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
01016 
01017 static av_always_inline void
01018 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
01019               int U, int V, int A1, int A2,
01020               const void *_r, const void *_g, const void *_b, int y,
01021               enum PixelFormat target, int hasAlpha)
01022 {
01023     if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
01024         target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
01025         uint32_t *dest = (uint32_t *) _dest;
01026         const uint32_t *r = (const uint32_t *) _r;
01027         const uint32_t *g = (const uint32_t *) _g;
01028         const uint32_t *b = (const uint32_t *) _b;
01029 
01030 #if CONFIG_SMALL
01031         int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
01032 
01033         dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
01034         dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
01035 #else
01036         if (hasAlpha) {
01037             int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
01038 
01039             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
01040             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
01041         } else {
01042             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
01043             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
01044         }
01045 #endif
01046     } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
01047         uint8_t *dest = (uint8_t *) _dest;
01048         const uint8_t *r = (const uint8_t *) _r;
01049         const uint8_t *g = (const uint8_t *) _g;
01050         const uint8_t *b = (const uint8_t *) _b;
01051 
01052 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
01053 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
01054 
01055         dest[i * 6 + 0] = r_b[Y1];
01056         dest[i * 6 + 1] =   g[Y1];
01057         dest[i * 6 + 2] = b_r[Y1];
01058         dest[i * 6 + 3] = r_b[Y2];
01059         dest[i * 6 + 4] =   g[Y2];
01060         dest[i * 6 + 5] = b_r[Y2];
01061 #undef r_b
01062 #undef b_r
01063     } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
01064                target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
01065                target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
01066         uint16_t *dest = (uint16_t *) _dest;
01067         const uint16_t *r = (const uint16_t *) _r;
01068         const uint16_t *g = (const uint16_t *) _g;
01069         const uint16_t *b = (const uint16_t *) _b;
01070         int dr1, dg1, db1, dr2, dg2, db2;
01071 
01072         if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
01073             dr1 = dither_2x2_8[ y & 1     ][0];
01074             dg1 = dither_2x2_4[ y & 1     ][0];
01075             db1 = dither_2x2_8[(y & 1) ^ 1][0];
01076             dr2 = dither_2x2_8[ y & 1     ][1];
01077             dg2 = dither_2x2_4[ y & 1     ][1];
01078             db2 = dither_2x2_8[(y & 1) ^ 1][1];
01079         } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
01080             dr1 = dither_2x2_8[ y & 1     ][0];
01081             dg1 = dither_2x2_8[ y & 1     ][1];
01082             db1 = dither_2x2_8[(y & 1) ^ 1][0];
01083             dr2 = dither_2x2_8[ y & 1     ][1];
01084             dg2 = dither_2x2_8[ y & 1     ][0];
01085             db2 = dither_2x2_8[(y & 1) ^ 1][1];
01086         } else {
01087             dr1 = dither_4x4_16[ y & 3     ][0];
01088             dg1 = dither_4x4_16[ y & 3     ][1];
01089             db1 = dither_4x4_16[(y & 3) ^ 3][0];
01090             dr2 = dither_4x4_16[ y & 3     ][1];
01091             dg2 = dither_4x4_16[ y & 3     ][0];
01092             db2 = dither_4x4_16[(y & 3) ^ 3][1];
01093         }
01094 
01095         dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
01096         dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
01097     } else /* 8/4-bit */ {
01098         uint8_t *dest = (uint8_t *) _dest;
01099         const uint8_t *r = (const uint8_t *) _r;
01100         const uint8_t *g = (const uint8_t *) _g;
01101         const uint8_t *b = (const uint8_t *) _b;
01102         int dr1, dg1, db1, dr2, dg2, db2;
01103 
01104         if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
01105             const uint8_t * const d64 = dither_8x8_73[y & 7];
01106             const uint8_t * const d32 = dither_8x8_32[y & 7];
01107             dr1 = dg1 = d32[(i * 2 + 0) & 7];
01108             db1 =       d64[(i * 2 + 0) & 7];
01109             dr2 = dg2 = d32[(i * 2 + 1) & 7];
01110             db2 =       d64[(i * 2 + 1) & 7];
01111         } else {
01112             const uint8_t * const d64  = dither_8x8_73 [y & 7];
01113             const uint8_t * const d128 = dither_8x8_220[y & 7];
01114             dr1 = db1 = d128[(i * 2 + 0) & 7];
01115             dg1 =        d64[(i * 2 + 0) & 7];
01116             dr2 = db2 = d128[(i * 2 + 1) & 7];
01117             dg2 =        d64[(i * 2 + 1) & 7];
01118         }
01119 
01120         if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
01121             dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
01122                     ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
01123         } else {
01124             dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
01125             dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
01126         }
01127     }
01128 }
01129 
01130 static av_always_inline void
01131 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
01132                      const int16_t **lumSrc, int lumFilterSize,
01133                      const int16_t *chrFilter, const int16_t **chrUSrc,
01134                      const int16_t **chrVSrc, int chrFilterSize,
01135                      const int16_t **alpSrc, uint8_t *dest, int dstW,
01136                      int y, enum PixelFormat target, int hasAlpha)
01137 {
01138     int i;
01139 
01140     for (i = 0; i < (dstW >> 1); i++) {
01141         int j;
01142         int Y1 = 1 << 18;
01143         int Y2 = 1 << 18;
01144         int U  = 1 << 18;
01145         int V  = 1 << 18;
01146         int av_unused A1, A2;
01147         const void *r, *g, *b;
01148 
01149         for (j = 0; j < lumFilterSize; j++) {
01150             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
01151             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
01152         }
01153         for (j = 0; j < chrFilterSize; j++) {
01154             U += chrUSrc[j][i] * chrFilter[j];
01155             V += chrVSrc[j][i] * chrFilter[j];
01156         }
01157         Y1 >>= 19;
01158         Y2 >>= 19;
01159         U  >>= 19;
01160         V  >>= 19;
01161         if ((Y1 | Y2 | U | V) & 0x100) {
01162             Y1 = av_clip_uint8(Y1);
01163             Y2 = av_clip_uint8(Y2);
01164             U  = av_clip_uint8(U);
01165             V  = av_clip_uint8(V);
01166         }
01167         if (hasAlpha) {
01168             A1 = 1 << 18;
01169             A2 = 1 << 18;
01170             for (j = 0; j < lumFilterSize; j++) {
01171                 A1 += alpSrc[j][i * 2    ] * lumFilter[j];
01172                 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
01173             }
01174             A1 >>= 19;
01175             A2 >>= 19;
01176             if ((A1 | A2) & 0x100) {
01177                 A1 = av_clip_uint8(A1);
01178                 A2 = av_clip_uint8(A2);
01179             }
01180         }
01181 
01182         /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
01183         r =  c->table_rV[V];
01184         g = (c->table_gU[U] + c->table_gV[V]);
01185         b =  c->table_bU[U];
01186 
01187         yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01188                       r, g, b, y, target, hasAlpha);
01189     }
01190 }
01191 
01192 static av_always_inline void
01193 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
01194                      const int16_t *ubuf[2], const int16_t *vbuf[2],
01195                      const int16_t *abuf[2], uint8_t *dest, int dstW,
01196                      int yalpha, int uvalpha, int y,
01197                      enum PixelFormat target, int hasAlpha)
01198 {
01199     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
01200                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
01201                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
01202                   *abuf0 = hasAlpha ? abuf[0] : NULL,
01203                   *abuf1 = hasAlpha ? abuf[1] : NULL;
01204     int  yalpha1 = 4095 - yalpha;
01205     int uvalpha1 = 4095 - uvalpha;
01206     int i;
01207 
01208     for (i = 0; i < (dstW >> 1); i++) {
01209         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
01210         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
01211         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
01212         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
01213         int A1, A2;
01214         const void *r =  c->table_rV[V],
01215                    *g = (c->table_gU[U] + c->table_gV[V]),
01216                    *b =  c->table_bU[U];
01217 
01218         if (hasAlpha) {
01219             A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 19;
01220             A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
01221         }
01222 
01223         yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01224                       r, g, b, y, target, hasAlpha);
01225     }
01226 }
01227 
01228 static av_always_inline void
01229 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
01230                      const int16_t *ubuf[2], const int16_t *vbuf[2],
01231                      const int16_t *abuf0, uint8_t *dest, int dstW,
01232                      int uvalpha, int y, enum PixelFormat target,
01233                      int hasAlpha)
01234 {
01235     const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
01236                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
01237     int i;
01238 
01239     if (uvalpha < 2048) {
01240         for (i = 0; i < (dstW >> 1); i++) {
01241             int Y1 = buf0[i * 2]     >> 7;
01242             int Y2 = buf0[i * 2 + 1] >> 7;
01243             int U  = ubuf1[i]        >> 7;
01244             int V  = vbuf1[i]        >> 7;
01245             int A1, A2;
01246             const void *r =  c->table_rV[V],
01247                        *g = (c->table_gU[U] + c->table_gV[V]),
01248                        *b =  c->table_bU[U];
01249 
01250             if (hasAlpha) {
01251                 A1 = abuf0[i * 2    ] >> 7;
01252                 A2 = abuf0[i * 2 + 1] >> 7;
01253             }
01254 
01255             yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01256                           r, g, b, y, target, hasAlpha);
01257         }
01258     } else {
01259         for (i = 0; i < (dstW >> 1); i++) {
01260             int Y1 =  buf0[i * 2]          >> 7;
01261             int Y2 =  buf0[i * 2 + 1]      >> 7;
01262             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
01263             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
01264             int A1, A2;
01265             const void *r =  c->table_rV[V],
01266                        *g = (c->table_gU[U] + c->table_gV[V]),
01267                        *b =  c->table_bU[U];
01268 
01269             if (hasAlpha) {
01270                 A1 = abuf0[i * 2    ] >> 7;
01271                 A2 = abuf0[i * 2 + 1] >> 7;
01272             }
01273 
01274             yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01275                           r, g, b, y, target, hasAlpha);
01276         }
01277     }
01278 }
01279 
01280 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
01281 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
01282                                 const int16_t **lumSrc, int lumFilterSize, \
01283                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
01284                                 const int16_t **chrVSrc, int chrFilterSize, \
01285                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
01286                                 int y) \
01287 { \
01288     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
01289                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
01290                                   alpSrc, dest, dstW, y, fmt, hasAlpha); \
01291 }
01292 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
01293 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
01294 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
01295                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
01296                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
01297                                 int yalpha, int uvalpha, int y) \
01298 { \
01299     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
01300                                   dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
01301 } \
01302  \
01303 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
01304                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
01305                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
01306                                 int uvalpha, int y) \
01307 { \
01308     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
01309                                   dstW, uvalpha, y, fmt, hasAlpha); \
01310 }
01311 
01312 #if CONFIG_SMALL
01313 YUV2RGBWRAPPER(yuv2rgb,,  32_1,  PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
01314 YUV2RGBWRAPPER(yuv2rgb,,  32,    PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
01315 #else
01316 #if CONFIG_SWSCALE_ALPHA
01317 YUV2RGBWRAPPER(yuv2rgb,, a32_1,  PIX_FMT_RGB32_1,   1);
01318 YUV2RGBWRAPPER(yuv2rgb,, a32,    PIX_FMT_RGB32,     1);
01319 #endif
01320 YUV2RGBWRAPPER(yuv2rgb,, x32_1,  PIX_FMT_RGB32_1,   0);
01321 YUV2RGBWRAPPER(yuv2rgb,, x32,    PIX_FMT_RGB32,     0);
01322 #endif
01323 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24,   0);
01324 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24,   0);
01325 YUV2RGBWRAPPER(yuv2rgb,,  16,    PIX_FMT_RGB565,    0);
01326 YUV2RGBWRAPPER(yuv2rgb,,  15,    PIX_FMT_RGB555,    0);
01327 YUV2RGBWRAPPER(yuv2rgb,,  12,    PIX_FMT_RGB444,    0);
01328 YUV2RGBWRAPPER(yuv2rgb,,   8,    PIX_FMT_RGB8,      0);
01329 YUV2RGBWRAPPER(yuv2rgb,,   4,    PIX_FMT_RGB4,      0);
01330 YUV2RGBWRAPPER(yuv2rgb,,   4b,   PIX_FMT_RGB4_BYTE, 0);
01331 
01332 static av_always_inline void
01333 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
01334                           const int16_t **lumSrc, int lumFilterSize,
01335                           const int16_t *chrFilter, const int16_t **chrUSrc,
01336                           const int16_t **chrVSrc, int chrFilterSize,
01337                           const int16_t **alpSrc, uint8_t *dest,
01338                           int dstW, int y, enum PixelFormat target, int hasAlpha)
01339 {
01340     int i;
01341     int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
01342 
01343     for (i = 0; i < dstW; i++) {
01344         int j;
01345         int Y = 1<<9;
01346         int U = (1<<9)-(128 << 19);
01347         int V = (1<<9)-(128 << 19);
01348         int av_unused A;
01349         int R, G, B;
01350 
01351         for (j = 0; j < lumFilterSize; j++) {
01352             Y += lumSrc[j][i] * lumFilter[j];
01353         }
01354         for (j = 0; j < chrFilterSize; j++) {
01355             U += chrUSrc[j][i] * chrFilter[j];
01356             V += chrVSrc[j][i] * chrFilter[j];
01357         }
01358         Y >>= 10;
01359         U >>= 10;
01360         V >>= 10;
01361         if (hasAlpha) {
01362             A = 1 << 18;
01363             for (j = 0; j < lumFilterSize; j++) {
01364                 A += alpSrc[j][i] * lumFilter[j];
01365             }
01366             A >>= 19;
01367             if (A & 0x100)
01368                 A = av_clip_uint8(A);
01369         }
01370         Y -= c->yuv2rgb_y_offset;
01371         Y *= c->yuv2rgb_y_coeff;
01372         Y += 1 << 21;
01373         R = Y + V*c->yuv2rgb_v2r_coeff;
01374         G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
01375         B = Y +                          U*c->yuv2rgb_u2b_coeff;
01376         if ((R | G | B) & 0xC0000000) {
01377             R = av_clip_uintp2(R, 30);
01378             G = av_clip_uintp2(G, 30);
01379             B = av_clip_uintp2(B, 30);
01380         }
01381 
01382         switch(target) {
01383         case PIX_FMT_ARGB:
01384             dest[0] = hasAlpha ? A : 255;
01385             dest[1] = R >> 22;
01386             dest[2] = G >> 22;
01387             dest[3] = B >> 22;
01388             break;
01389         case PIX_FMT_RGB24:
01390             dest[0] = R >> 22;
01391             dest[1] = G >> 22;
01392             dest[2] = B >> 22;
01393             break;
01394         case PIX_FMT_RGBA:
01395             dest[0] = R >> 22;
01396             dest[1] = G >> 22;
01397             dest[2] = B >> 22;
01398             dest[3] = hasAlpha ? A : 255;
01399             break;
01400         case PIX_FMT_ABGR:
01401             dest[0] = hasAlpha ? A : 255;
01402             dest[1] = B >> 22;
01403             dest[2] = G >> 22;
01404             dest[3] = R >> 22;
01405             break;
01406         case PIX_FMT_BGR24:
01407             dest[0] = B >> 22;
01408             dest[1] = G >> 22;
01409             dest[2] = R >> 22;
01410             break;
01411         case PIX_FMT_BGRA:
01412             dest[0] = B >> 22;
01413             dest[1] = G >> 22;
01414             dest[2] = R >> 22;
01415             dest[3] = hasAlpha ? A : 255;
01416             break;
01417         }
01418         dest += step;
01419     }
01420 }
01421 
01422 #if CONFIG_SMALL
01423 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
01424 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
01425 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
01426 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
01427 #else
01428 #if CONFIG_SWSCALE_ALPHA
01429 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  1);
01430 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  1);
01431 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  1);
01432 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  1);
01433 #endif
01434 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA,  0);
01435 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR,  0);
01436 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA,  0);
01437 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB,  0);
01438 #endif
01439 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full,  PIX_FMT_BGR24, 0);
01440 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full,  PIX_FMT_RGB24, 0);
01441 
01442 static av_always_inline void fillPlane(uint8_t* plane, int stride,
01443                                        int width, int height,
01444                                        int y, uint8_t val)
01445 {
01446     int i;
01447     uint8_t *ptr = plane + stride*y;
01448     for (i=0; i<height; i++) {
01449         memset(ptr, val, width);
01450         ptr += stride;
01451     }
01452 }
01453 
01454 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01455 
01456 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
01457 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
01458 
01459 static av_always_inline void
01460 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
01461                     enum PixelFormat origin)
01462 {
01463     int i;
01464     for (i = 0; i < width; i++) {
01465         unsigned int r_b = input_pixel(&src[i*3+0]);
01466         unsigned int   g = input_pixel(&src[i*3+1]);
01467         unsigned int b_r = input_pixel(&src[i*3+2]);
01468 
01469         dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01470     }
01471 }
01472 
01473 static av_always_inline void
01474 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
01475                     const uint16_t *src1, const uint16_t *src2,
01476                     int width, enum PixelFormat origin)
01477 {
01478     int i;
01479     assert(src1==src2);
01480     for (i = 0; i < width; i++) {
01481         int r_b = input_pixel(&src1[i*3+0]);
01482         int   g = input_pixel(&src1[i*3+1]);
01483         int b_r = input_pixel(&src1[i*3+2]);
01484 
01485         dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01486         dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01487     }
01488 }
01489 
01490 static av_always_inline void
01491 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
01492                           const uint16_t *src1, const uint16_t *src2,
01493                           int width, enum PixelFormat origin)
01494 {
01495     int i;
01496     assert(src1==src2);
01497     for (i = 0; i < width; i++) {
01498         int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
01499         int   g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
01500         int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
01501 
01502         dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01503         dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01504     }
01505 }
01506 
01507 #undef r
01508 #undef b
01509 #undef input_pixel
01510 
01511 #define rgb48funcs(pattern, BE_LE, origin) \
01512 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
01513                                     int width, uint32_t *unused) \
01514 { \
01515     const uint16_t *src = (const uint16_t *) _src; \
01516     uint16_t *dst = (uint16_t *) _dst; \
01517     rgb48ToY_c_template(dst, src, width, origin); \
01518 } \
01519  \
01520 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
01521                                     const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
01522                                     int width, uint32_t *unused) \
01523 { \
01524     const uint16_t *src1 = (const uint16_t *) _src1, \
01525                    *src2 = (const uint16_t *) _src2; \
01526     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
01527     rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
01528 } \
01529  \
01530 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
01531                                     const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
01532                                     int width, uint32_t *unused) \
01533 { \
01534     const uint16_t *src1 = (const uint16_t *) _src1, \
01535                    *src2 = (const uint16_t *) _src2; \
01536     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
01537     rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
01538 }
01539 
01540 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
01541 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
01542 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
01543 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
01544 
01545 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
01546                          origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
01547                         (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
01548 
01549 static av_always_inline void
01550 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
01551                        int width, enum PixelFormat origin,
01552                        int shr,   int shg,   int shb, int shp,
01553                        int maskr, int maskg, int maskb,
01554                        int rsh,   int gsh,   int bsh, int S)
01555 {
01556     const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
01557               rnd = (32<<((S)-1)) + (1<<(S-7));
01558     int i;
01559 
01560     for (i = 0; i < width; i++) {
01561         int px = input_pixel(i) >> shp;
01562         int b = (px & maskb) >> shb;
01563         int g = (px & maskg) >> shg;
01564         int r = (px & maskr) >> shr;
01565 
01566         dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
01567     }
01568 }
01569 
01570 static av_always_inline void
01571 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
01572                         const uint8_t *src, int width,
01573                         enum PixelFormat origin,
01574                         int shr,   int shg,   int shb, int shp,
01575                         int maskr, int maskg, int maskb,
01576                         int rsh,   int gsh,   int bsh, int S)
01577 {
01578     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01579               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
01580               rnd = (256<<((S)-1)) + (1<<(S-7));
01581     int i;
01582 
01583     for (i = 0; i < width; i++) {
01584         int px = input_pixel(i) >> shp;
01585         int b = (px & maskb) >> shb;
01586         int g = (px & maskg) >> shg;
01587         int r = (px & maskr) >> shr;
01588 
01589         dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
01590         dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
01591     }
01592 }
01593 
01594 static av_always_inline void
01595 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
01596                              const uint8_t *src, int width,
01597                              enum PixelFormat origin,
01598                              int shr,   int shg,   int shb, int shp,
01599                              int maskr, int maskg, int maskb,
01600                              int rsh,   int gsh,   int bsh, int S)
01601 {
01602     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01603               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
01604               rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
01605     int i;
01606 
01607     maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
01608     for (i = 0; i < width; i++) {
01609         int px0 = input_pixel(2 * i + 0) >> shp;
01610         int px1 = input_pixel(2 * i + 1) >> shp;
01611         int b, r, g = (px0 & maskgx) + (px1 & maskgx);
01612         int rb = px0 + px1 - g;
01613 
01614         b = (rb & maskb) >> shb;
01615         if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
01616             origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
01617             g >>= shg;
01618         } else {
01619             g = (g  & maskg) >> shg;
01620         }
01621         r = (rb & maskr) >> shr;
01622 
01623         dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
01624         dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
01625     }
01626 }
01627 
01628 #undef input_pixel
01629 
01630 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
01631                          maskg, maskb, rsh, gsh, bsh, S) \
01632 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
01633                           int width, uint32_t *unused) \
01634 { \
01635     rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
01636                            shr, shg, shb, shp, \
01637                            maskr, maskg, maskb, rsh, gsh, bsh, S); \
01638 } \
01639  \
01640 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
01641                            const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
01642                            int width, uint32_t *unused) \
01643 { \
01644     rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt,  \
01645                             shr, shg, shb, shp, \
01646                             maskr, maskg, maskb, rsh, gsh, bsh, S); \
01647 } \
01648  \
01649 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
01650                                 const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
01651                                 int width, uint32_t *unused) \
01652 { \
01653     rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
01654                                  shr, shg, shb, shp, \
01655                                  maskr, maskg, maskb, rsh, gsh, bsh, S); \
01656 }
01657 
01658 rgb16_32_wrapper(PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
01659 rgb16_32_wrapper(PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
01660 rgb16_32_wrapper(PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
01661 rgb16_32_wrapper(PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
01662 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
01663 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
01664 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
01665 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
01666 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
01667 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
01668 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
01669 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
01670 
01671 static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
01672                          const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
01673                          int width, enum PixelFormat origin)
01674 {
01675     int i;
01676     for (i = 0; i < width; i++) {
01677         unsigned int g   = gsrc[2*i] + gsrc[2*i+1];
01678         unsigned int b   = bsrc[2*i] + bsrc[2*i+1];
01679         unsigned int r   = rsrc[2*i] + rsrc[2*i+1];
01680 
01681         dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
01682         dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
01683     }
01684 }
01685 
01686 static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
01687 {
01688     int i;
01689     for (i=0; i<width; i++) {
01690         dst[i]= src[4*i]<<6;
01691     }
01692 }
01693 
01694 static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
01695 {
01696     int i;
01697     for (i=0; i<width; i++) {
01698         dst[i]= src[4*i+3]<<6;
01699     }
01700 }
01701 
01702 static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
01703 {
01704     int i;
01705     for (i=0; i<width; i++) {
01706         int d= src[i];
01707 
01708         dst[i]= (pal[d] >> 24)<<6;
01709     }
01710 }
01711 
01712 static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
01713 {
01714     int i;
01715     for (i=0; i<width; i++) {
01716         int d= src[i];
01717 
01718         dst[i]= (pal[d] & 0xFF)<<6;
01719     }
01720 }
01721 
01722 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
01723                            const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
01724                            int width, uint32_t *pal)
01725 {
01726     int i;
01727     assert(src1 == src2);
01728     for (i=0; i<width; i++) {
01729         int p= pal[src1[i]];
01730 
01731         dstU[i]= (uint8_t)(p>> 8)<<6;
01732         dstV[i]= (uint8_t)(p>>16)<<6;
01733     }
01734 }
01735 
01736 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
01737 {
01738     int i, j;
01739     for (i=0; i<width/8; i++) {
01740         int d= ~src[i];
01741         for(j=0; j<8; j++)
01742             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01743     }
01744     if(width&7){
01745         int d= ~src[i];
01746         for(j=0; j<(width&7); j++)
01747             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01748     }
01749 }
01750 
01751 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
01752 {
01753     int i, j;
01754     for (i=0; i<width/8; i++) {
01755         int d= src[i];
01756         for(j=0; j<8; j++)
01757             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01758     }
01759     if(width&7){
01760         int d= src[i];
01761         for(j=0; j<(width&7); j++)
01762             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01763     }
01764 }
01765 
01766 //FIXME yuy2* can read up to 7 samples too much
01767 
01768 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
01769                       uint32_t *unused)
01770 {
01771     int i;
01772     for (i=0; i<width; i++)
01773         dst[i]= src[2*i];
01774 }
01775 
01776 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01777                        const uint8_t *src2, int width, uint32_t *unused)
01778 {
01779     int i;
01780     for (i=0; i<width; i++) {
01781         dstU[i]= src1[4*i + 1];
01782         dstV[i]= src1[4*i + 3];
01783     }
01784     assert(src1 == src2);
01785 }
01786 
01787 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
01788 {
01789     int i;
01790     const uint16_t *src = (const uint16_t *) _src;
01791     uint16_t *dst = (uint16_t *) _dst;
01792     for (i=0; i<width; i++) {
01793         dst[i] = av_bswap16(src[i]);
01794     }
01795 }
01796 
01797 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
01798                         const uint8_t *_src2, int width, uint32_t *unused)
01799 {
01800     int i;
01801     const uint16_t *src1 = (const uint16_t *) _src1,
01802                    *src2 = (const uint16_t *) _src2;
01803     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
01804     for (i=0; i<width; i++) {
01805         dstU[i] = av_bswap16(src1[i]);
01806         dstV[i] = av_bswap16(src2[i]);
01807     }
01808 }
01809 
01810 /* This is almost identical to the previous, end exists only because
01811  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
01812 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
01813                       uint32_t *unused)
01814 {
01815     int i;
01816     for (i=0; i<width; i++)
01817         dst[i]= src[2*i+1];
01818 }
01819 
01820 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01821                        const uint8_t *src2, int width, uint32_t *unused)
01822 {
01823     int i;
01824     for (i=0; i<width; i++) {
01825         dstU[i]= src1[4*i + 0];
01826         dstV[i]= src1[4*i + 2];
01827     }
01828     assert(src1 == src2);
01829 }
01830 
01831 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
01832                                         const uint8_t *src, int width)
01833 {
01834     int i;
01835     for (i = 0; i < width; i++) {
01836         dst1[i] = src[2*i+0];
01837         dst2[i] = src[2*i+1];
01838     }
01839 }
01840 
01841 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
01842                        const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
01843                        int width, uint32_t *unused)
01844 {
01845     nvXXtoUV_c(dstU, dstV, src1, width);
01846 }
01847 
01848 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
01849                        const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
01850                        int width, uint32_t *unused)
01851 {
01852     nvXXtoUV_c(dstV, dstU, src1, width);
01853 }
01854 
01855 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01856 
01857 static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
01858                        int width, uint32_t *unused)
01859 {
01860     int i;
01861     for (i=0; i<width; i++) {
01862         int b= src[i*3+0];
01863         int g= src[i*3+1];
01864         int r= src[i*3+2];
01865 
01866         dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
01867     }
01868 }
01869 
01870 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01871                         const uint8_t *src2, int width, uint32_t *unused)
01872 {
01873     int i;
01874     for (i=0; i<width; i++) {
01875         int b= src1[3*i + 0];
01876         int g= src1[3*i + 1];
01877         int r= src1[3*i + 2];
01878 
01879         dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01880         dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01881     }
01882     assert(src1 == src2);
01883 }
01884 
01885 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01886                              const uint8_t *src2, int width, uint32_t *unused)
01887 {
01888     int i;
01889     for (i=0; i<width; i++) {
01890         int b= src1[6*i + 0] + src1[6*i + 3];
01891         int g= src1[6*i + 1] + src1[6*i + 4];
01892         int r= src1[6*i + 2] + src1[6*i + 5];
01893 
01894         dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01895         dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01896     }
01897     assert(src1 == src2);
01898 }
01899 
01900 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
01901                        uint32_t *unused)
01902 {
01903     int i;
01904     for (i=0; i<width; i++) {
01905         int r= src[i*3+0];
01906         int g= src[i*3+1];
01907         int b= src[i*3+2];
01908 
01909         dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
01910     }
01911 }
01912 
01913 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01914                         const uint8_t *src2, int width, uint32_t *unused)
01915 {
01916     int i;
01917     assert(src1==src2);
01918     for (i=0; i<width; i++) {
01919         int r= src1[3*i + 0];
01920         int g= src1[3*i + 1];
01921         int b= src1[3*i + 2];
01922 
01923         dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01924         dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01925     }
01926 }
01927 
01928 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
01929                                     const uint8_t *src2, int width, uint32_t *unused)
01930 {
01931     int i;
01932     assert(src1==src2);
01933     for (i=0; i<width; i++) {
01934         int r= src1[6*i + 0] + src1[6*i + 3];
01935         int g= src1[6*i + 1] + src1[6*i + 4];
01936         int b= src1[6*i + 2] + src1[6*i + 5];
01937 
01938         dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01939         dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01940     }
01941 }
01942 
01943 static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width)
01944 {
01945     int i;
01946     for (i = 0; i < width; i++) {
01947         int g = src[0][i];
01948         int b = src[1][i];
01949         int r = src[2][i];
01950 
01951         dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
01952     }
01953 }
01954 
01955 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
01956 {
01957     int i;
01958     const uint16_t **src = (const uint16_t **) _src;
01959     uint16_t *dst = (uint16_t *) _dst;
01960     for (i = 0; i < width; i++) {
01961         int g = AV_RL16(src[0] + i);
01962         int b = AV_RL16(src[1] + i);
01963         int r = AV_RL16(src[2] + i);
01964 
01965         dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
01966     }
01967 }
01968 
01969 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
01970 {
01971     int i;
01972     const uint16_t **src = (const uint16_t **) _src;
01973     uint16_t *dst = (uint16_t *) _dst;
01974     for (i = 0; i < width; i++) {
01975         int g = AV_RB16(src[0] + i);
01976         int b = AV_RB16(src[1] + i);
01977         int r = AV_RB16(src[2] + i);
01978 
01979         dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
01980     }
01981 }
01982 
01983 static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width)
01984 {
01985     int i;
01986     for (i = 0; i < width; i++) {
01987         int g = src[0][i];
01988         int b = src[1][i];
01989         int r = src[2][i];
01990 
01991         dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
01992         dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
01993     }
01994 }
01995 
01996 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
01997 {
01998     int i;
01999     const uint16_t **src = (const uint16_t **) _src;
02000     uint16_t *dstU = (uint16_t *) _dstU;
02001     uint16_t *dstV = (uint16_t *) _dstV;
02002     for (i = 0; i < width; i++) {
02003         int g = AV_RL16(src[0] + i);
02004         int b = AV_RL16(src[1] + i);
02005         int r = AV_RL16(src[2] + i);
02006 
02007         dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
02008         dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
02009     }
02010 }
02011 
02012 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
02013 {
02014     int i;
02015     const uint16_t **src = (const uint16_t **) _src;
02016     uint16_t *dstU = (uint16_t *) _dstU;
02017     uint16_t *dstV = (uint16_t *) _dstV;
02018     for (i = 0; i < width; i++) {
02019         int g = AV_RB16(src[0] + i);
02020         int b = AV_RB16(src[1] + i);
02021         int r = AV_RB16(src[2] + i);
02022 
02023         dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
02024         dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
02025     }
02026 }
02027 
02028 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
02029                            const int16_t *filter,
02030                            const int16_t *filterPos, int filterSize)
02031 {
02032     int i;
02033     int32_t *dst = (int32_t *) _dst;
02034     const uint16_t *src = (const uint16_t *) _src;
02035     int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
02036     int sh = bits - 4;
02037 
02038     if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
02039         sh= 9;
02040 
02041     for (i = 0; i < dstW; i++) {
02042         int j;
02043         int srcPos = filterPos[i];
02044         int val = 0;
02045 
02046         for (j = 0; j < filterSize; j++) {
02047             val += src[srcPos + j] * filter[filterSize * i + j];
02048         }
02049         // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
02050         dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
02051     }
02052 }
02053 
02054 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
02055                            const int16_t *filter,
02056                            const int16_t *filterPos, int filterSize)
02057 {
02058     int i;
02059     const uint16_t *src = (const uint16_t *) _src;
02060     int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
02061 
02062     if(sh<15)
02063         sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
02064 
02065     for (i = 0; i < dstW; i++) {
02066         int j;
02067         int srcPos = filterPos[i];
02068         int val = 0;
02069 
02070         for (j = 0; j < filterSize; j++) {
02071             val += src[srcPos + j] * filter[filterSize * i + j];
02072         }
02073         // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
02074         dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
02075     }
02076 }
02077 
02078 // bilinear / bicubic scaling
02079 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
02080                           const int16_t *filter, const int16_t *filterPos,
02081                           int filterSize)
02082 {
02083     int i;
02084     for (i=0; i<dstW; i++) {
02085         int j;
02086         int srcPos= filterPos[i];
02087         int val=0;
02088         for (j=0; j<filterSize; j++) {
02089             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
02090         }
02091         //filter += hFilterSize;
02092         dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
02093         //dst[i] = val>>7;
02094     }
02095 }
02096 
02097 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
02098                           const int16_t *filter, const int16_t *filterPos,
02099                           int filterSize)
02100 {
02101     int i;
02102     int32_t *dst = (int32_t *) _dst;
02103     for (i=0; i<dstW; i++) {
02104         int j;
02105         int srcPos= filterPos[i];
02106         int val=0;
02107         for (j=0; j<filterSize; j++) {
02108             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
02109         }
02110         //filter += hFilterSize;
02111         dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
02112         //dst[i] = val>>7;
02113     }
02114 }
02115 
02116 //FIXME all pal and rgb srcFormats could do this convertion as well
02117 //FIXME all scalers more complex than bilinear could do half of this transform
02118 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
02119 {
02120     int i;
02121     for (i = 0; i < width; i++) {
02122         dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
02123         dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
02124     }
02125 }
02126 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
02127 {
02128     int i;
02129     for (i = 0; i < width; i++) {
02130         dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
02131         dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
02132     }
02133 }
02134 static void lumRangeToJpeg_c(int16_t *dst, int width)
02135 {
02136     int i;
02137     for (i = 0; i < width; i++)
02138         dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
02139 }
02140 static void lumRangeFromJpeg_c(int16_t *dst, int width)
02141 {
02142     int i;
02143     for (i = 0; i < width; i++)
02144         dst[i] = (dst[i]*14071 + 33561947)>>14;
02145 }
02146 
02147 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
02148 {
02149     int i;
02150     int32_t *dstU = (int32_t *) _dstU;
02151     int32_t *dstV = (int32_t *) _dstV;
02152     for (i = 0; i < width; i++) {
02153         dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
02154         dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
02155     }
02156 }
02157 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
02158 {
02159     int i;
02160     int32_t *dstU = (int32_t *) _dstU;
02161     int32_t *dstV = (int32_t *) _dstV;
02162     for (i = 0; i < width; i++) {
02163         dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
02164         dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
02165     }
02166 }
02167 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
02168 {
02169     int i;
02170     int32_t *dst = (int32_t *) _dst;
02171     for (i = 0; i < width; i++)
02172         dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
02173 }
02174 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
02175 {
02176     int i;
02177     int32_t *dst = (int32_t *) _dst;
02178     for (i = 0; i < width; i++)
02179         dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
02180 }
02181 
02182 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
02183                            const uint8_t *src, int srcW, int xInc)
02184 {
02185     int i;
02186     unsigned int xpos=0;
02187     for (i=0;i<dstWidth;i++) {
02188         register unsigned int xx=xpos>>16;
02189         register unsigned int xalpha=(xpos&0xFFFF)>>9;
02190         dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
02191         xpos+=xInc;
02192     }
02193     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
02194         dst[i] = src[srcW-1]*128;
02195 }
02196 
02197 // *** horizontal scale Y line to temp buffer
02198 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
02199                                      const uint8_t *src_in[4], int srcW, int xInc,
02200                                      const int16_t *hLumFilter,
02201                                      const int16_t *hLumFilterPos, int hLumFilterSize,
02202                                      uint8_t *formatConvBuffer,
02203                                      uint32_t *pal, int isAlpha)
02204 {
02205     void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
02206     void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
02207     const uint8_t *src = src_in[isAlpha ? 3 : 0];
02208 
02209     if (toYV12) {
02210         toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
02211         src= formatConvBuffer;
02212     } else if (c->readLumPlanar && !isAlpha) {
02213         c->readLumPlanar(formatConvBuffer, src_in, srcW);
02214         src = formatConvBuffer;
02215     }
02216 
02217     if (!c->hyscale_fast) {
02218         c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
02219     } else { // fast bilinear upscale / crap downscale
02220         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
02221     }
02222 
02223     if (convertRange)
02224         convertRange(dst, dstWidth);
02225 }
02226 
02227 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
02228                            int dstWidth, const uint8_t *src1,
02229                            const uint8_t *src2, int srcW, int xInc)
02230 {
02231     int i;
02232     unsigned int xpos=0;
02233     for (i=0;i<dstWidth;i++) {
02234         register unsigned int xx=xpos>>16;
02235         register unsigned int xalpha=(xpos&0xFFFF)>>9;
02236         dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
02237         dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
02238         xpos+=xInc;
02239     }
02240     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
02241         dst1[i] = src1[srcW-1]*128;
02242         dst2[i] = src2[srcW-1]*128;
02243     }
02244 }
02245 
02246 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
02247                                      const uint8_t *src_in[4],
02248                                      int srcW, int xInc, const int16_t *hChrFilter,
02249                                      const int16_t *hChrFilterPos, int hChrFilterSize,
02250                                      uint8_t *formatConvBuffer, uint32_t *pal)
02251 {
02252     const uint8_t *src1 = src_in[1], *src2 = src_in[2];
02253     if (c->chrToYV12) {
02254         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
02255         c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
02256         src1= formatConvBuffer;
02257         src2= buf2;
02258     } else if (c->readChrPlanar) {
02259         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
02260         c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
02261         src1= formatConvBuffer;
02262         src2= buf2;
02263     }
02264 
02265     if (!c->hcscale_fast) {
02266         c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
02267         c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
02268     } else { // fast bilinear upscale / crap downscale
02269         c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
02270     }
02271 
02272     if (c->chrConvertRange)
02273         c->chrConvertRange(dst1, dst2, dstWidth);
02274 }
02275 
02276 static av_always_inline void
02277 find_c_packed_planar_out_funcs(SwsContext *c,
02278                                yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
02279                                yuv2interleavedX_fn *yuv2nv12cX,
02280                                yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
02281                                yuv2packedX_fn *yuv2packedX)
02282 {
02283     enum PixelFormat dstFormat = c->dstFormat;
02284 
02285     if (is16BPS(dstFormat)) {
02286         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
02287         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
02288     } else if (is9_OR_10BPS(dstFormat)) {
02289         if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
02290             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
02291             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
02292         } else {
02293             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
02294             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
02295         }
02296     } else {
02297         *yuv2plane1 = yuv2plane1_8_c;
02298         *yuv2planeX = yuv2planeX_8_c;
02299         if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
02300             *yuv2nv12cX = yuv2nv12cX_c;
02301     }
02302 
02303     if(c->flags & SWS_FULL_CHR_H_INT) {
02304         switch (dstFormat) {
02305             case PIX_FMT_RGBA:
02306 #if CONFIG_SMALL
02307                 *yuv2packedX = yuv2rgba32_full_X_c;
02308 #else
02309 #if CONFIG_SWSCALE_ALPHA
02310                 if (c->alpPixBuf) {
02311                     *yuv2packedX = yuv2rgba32_full_X_c;
02312                 } else
02313 #endif /* CONFIG_SWSCALE_ALPHA */
02314                 {
02315                     *yuv2packedX = yuv2rgbx32_full_X_c;
02316                 }
02317 #endif /* !CONFIG_SMALL */
02318                 break;
02319             case PIX_FMT_ARGB:
02320 #if CONFIG_SMALL
02321                 *yuv2packedX = yuv2argb32_full_X_c;
02322 #else
02323 #if CONFIG_SWSCALE_ALPHA
02324                 if (c->alpPixBuf) {
02325                     *yuv2packedX = yuv2argb32_full_X_c;
02326                 } else
02327 #endif /* CONFIG_SWSCALE_ALPHA */
02328                 {
02329                     *yuv2packedX = yuv2xrgb32_full_X_c;
02330                 }
02331 #endif /* !CONFIG_SMALL */
02332                 break;
02333             case PIX_FMT_BGRA:
02334 #if CONFIG_SMALL
02335                 *yuv2packedX = yuv2bgra32_full_X_c;
02336 #else
02337 #if CONFIG_SWSCALE_ALPHA
02338                 if (c->alpPixBuf) {
02339                     *yuv2packedX = yuv2bgra32_full_X_c;
02340                 } else
02341 #endif /* CONFIG_SWSCALE_ALPHA */
02342                 {
02343                     *yuv2packedX = yuv2bgrx32_full_X_c;
02344                 }
02345 #endif /* !CONFIG_SMALL */
02346                 break;
02347             case PIX_FMT_ABGR:
02348 #if CONFIG_SMALL
02349                 *yuv2packedX = yuv2abgr32_full_X_c;
02350 #else
02351 #if CONFIG_SWSCALE_ALPHA
02352                 if (c->alpPixBuf) {
02353                     *yuv2packedX = yuv2abgr32_full_X_c;
02354                 } else
02355 #endif /* CONFIG_SWSCALE_ALPHA */
02356                 {
02357                     *yuv2packedX = yuv2xbgr32_full_X_c;
02358                 }
02359 #endif /* !CONFIG_SMALL */
02360                 break;
02361             case PIX_FMT_RGB24:
02362             *yuv2packedX = yuv2rgb24_full_X_c;
02363             break;
02364         case PIX_FMT_BGR24:
02365             *yuv2packedX = yuv2bgr24_full_X_c;
02366             break;
02367         }
02368         if(!*yuv2packedX)
02369             goto YUV_PACKED;
02370     } else {
02371         YUV_PACKED:
02372         switch (dstFormat) {
02373         case PIX_FMT_GRAY16BE:
02374             *yuv2packed1 = yuv2gray16BE_1_c;
02375             *yuv2packed2 = yuv2gray16BE_2_c;
02376             *yuv2packedX = yuv2gray16BE_X_c;
02377             break;
02378         case PIX_FMT_GRAY16LE:
02379             *yuv2packed1 = yuv2gray16LE_1_c;
02380             *yuv2packed2 = yuv2gray16LE_2_c;
02381             *yuv2packedX = yuv2gray16LE_X_c;
02382             break;
02383         case PIX_FMT_MONOWHITE:
02384             *yuv2packed1 = yuv2monowhite_1_c;
02385             *yuv2packed2 = yuv2monowhite_2_c;
02386             *yuv2packedX = yuv2monowhite_X_c;
02387             break;
02388         case PIX_FMT_MONOBLACK:
02389             *yuv2packed1 = yuv2monoblack_1_c;
02390             *yuv2packed2 = yuv2monoblack_2_c;
02391             *yuv2packedX = yuv2monoblack_X_c;
02392             break;
02393         case PIX_FMT_YUYV422:
02394             *yuv2packed1 = yuv2yuyv422_1_c;
02395             *yuv2packed2 = yuv2yuyv422_2_c;
02396             *yuv2packedX = yuv2yuyv422_X_c;
02397             break;
02398         case PIX_FMT_UYVY422:
02399             *yuv2packed1 = yuv2uyvy422_1_c;
02400             *yuv2packed2 = yuv2uyvy422_2_c;
02401             *yuv2packedX = yuv2uyvy422_X_c;
02402             break;
02403         case PIX_FMT_RGB48LE:
02404             *yuv2packed1 = yuv2rgb48le_1_c;
02405             *yuv2packed2 = yuv2rgb48le_2_c;
02406             *yuv2packedX = yuv2rgb48le_X_c;
02407             break;
02408         case PIX_FMT_RGB48BE:
02409             *yuv2packed1 = yuv2rgb48be_1_c;
02410             *yuv2packed2 = yuv2rgb48be_2_c;
02411             *yuv2packedX = yuv2rgb48be_X_c;
02412             break;
02413         case PIX_FMT_BGR48LE:
02414             *yuv2packed1 = yuv2bgr48le_1_c;
02415             *yuv2packed2 = yuv2bgr48le_2_c;
02416             *yuv2packedX = yuv2bgr48le_X_c;
02417             break;
02418         case PIX_FMT_BGR48BE:
02419             *yuv2packed1 = yuv2bgr48be_1_c;
02420             *yuv2packed2 = yuv2bgr48be_2_c;
02421             *yuv2packedX = yuv2bgr48be_X_c;
02422             break;
02423         case PIX_FMT_RGB32:
02424         case PIX_FMT_BGR32:
02425 #if CONFIG_SMALL
02426             *yuv2packed1 = yuv2rgb32_1_c;
02427             *yuv2packed2 = yuv2rgb32_2_c;
02428             *yuv2packedX = yuv2rgb32_X_c;
02429 #else
02430 #if CONFIG_SWSCALE_ALPHA
02431                 if (c->alpPixBuf) {
02432                     *yuv2packed1 = yuv2rgba32_1_c;
02433                     *yuv2packed2 = yuv2rgba32_2_c;
02434                     *yuv2packedX = yuv2rgba32_X_c;
02435                 } else
02436 #endif /* CONFIG_SWSCALE_ALPHA */
02437                 {
02438                     *yuv2packed1 = yuv2rgbx32_1_c;
02439                     *yuv2packed2 = yuv2rgbx32_2_c;
02440                     *yuv2packedX = yuv2rgbx32_X_c;
02441                 }
02442 #endif /* !CONFIG_SMALL */
02443             break;
02444         case PIX_FMT_RGB32_1:
02445         case PIX_FMT_BGR32_1:
02446 #if CONFIG_SMALL
02447                 *yuv2packed1 = yuv2rgb32_1_1_c;
02448                 *yuv2packed2 = yuv2rgb32_1_2_c;
02449                 *yuv2packedX = yuv2rgb32_1_X_c;
02450 #else
02451 #if CONFIG_SWSCALE_ALPHA
02452                 if (c->alpPixBuf) {
02453                     *yuv2packed1 = yuv2rgba32_1_1_c;
02454                     *yuv2packed2 = yuv2rgba32_1_2_c;
02455                     *yuv2packedX = yuv2rgba32_1_X_c;
02456                 } else
02457 #endif /* CONFIG_SWSCALE_ALPHA */
02458                 {
02459                     *yuv2packed1 = yuv2rgbx32_1_1_c;
02460                     *yuv2packed2 = yuv2rgbx32_1_2_c;
02461                     *yuv2packedX = yuv2rgbx32_1_X_c;
02462                 }
02463 #endif /* !CONFIG_SMALL */
02464                 break;
02465         case PIX_FMT_RGB24:
02466             *yuv2packed1 = yuv2rgb24_1_c;
02467             *yuv2packed2 = yuv2rgb24_2_c;
02468             *yuv2packedX = yuv2rgb24_X_c;
02469             break;
02470         case PIX_FMT_BGR24:
02471             *yuv2packed1 = yuv2bgr24_1_c;
02472             *yuv2packed2 = yuv2bgr24_2_c;
02473             *yuv2packedX = yuv2bgr24_X_c;
02474             break;
02475         case PIX_FMT_RGB565LE:
02476         case PIX_FMT_RGB565BE:
02477         case PIX_FMT_BGR565LE:
02478         case PIX_FMT_BGR565BE:
02479             *yuv2packed1 = yuv2rgb16_1_c;
02480             *yuv2packed2 = yuv2rgb16_2_c;
02481             *yuv2packedX = yuv2rgb16_X_c;
02482             break;
02483         case PIX_FMT_RGB555LE:
02484         case PIX_FMT_RGB555BE:
02485         case PIX_FMT_BGR555LE:
02486         case PIX_FMT_BGR555BE:
02487             *yuv2packed1 = yuv2rgb15_1_c;
02488             *yuv2packed2 = yuv2rgb15_2_c;
02489             *yuv2packedX = yuv2rgb15_X_c;
02490             break;
02491         case PIX_FMT_RGB444LE:
02492         case PIX_FMT_RGB444BE:
02493         case PIX_FMT_BGR444LE:
02494         case PIX_FMT_BGR444BE:
02495             *yuv2packed1 = yuv2rgb12_1_c;
02496             *yuv2packed2 = yuv2rgb12_2_c;
02497             *yuv2packedX = yuv2rgb12_X_c;
02498             break;
02499         case PIX_FMT_RGB8:
02500         case PIX_FMT_BGR8:
02501             *yuv2packed1 = yuv2rgb8_1_c;
02502             *yuv2packed2 = yuv2rgb8_2_c;
02503             *yuv2packedX = yuv2rgb8_X_c;
02504             break;
02505         case PIX_FMT_RGB4:
02506         case PIX_FMT_BGR4:
02507             *yuv2packed1 = yuv2rgb4_1_c;
02508             *yuv2packed2 = yuv2rgb4_2_c;
02509             *yuv2packedX = yuv2rgb4_X_c;
02510             break;
02511         case PIX_FMT_RGB4_BYTE:
02512         case PIX_FMT_BGR4_BYTE:
02513             *yuv2packed1 = yuv2rgb4b_1_c;
02514             *yuv2packed2 = yuv2rgb4b_2_c;
02515             *yuv2packedX = yuv2rgb4b_X_c;
02516             break;
02517         }
02518     }
02519 }
02520 
02521 #define DEBUG_SWSCALE_BUFFERS 0
02522 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
02523 
02524 static int swScale(SwsContext *c, const uint8_t* src[],
02525                    int srcStride[], int srcSliceY,
02526                    int srcSliceH, uint8_t* dst[], int dstStride[])
02527 {
02528     /* load a few things into local vars to make the code more readable? and faster */
02529     const int srcW= c->srcW;
02530     const int dstW= c->dstW;
02531     const int dstH= c->dstH;
02532     const int chrDstW= c->chrDstW;
02533     const int chrSrcW= c->chrSrcW;
02534     const int lumXInc= c->lumXInc;
02535     const int chrXInc= c->chrXInc;
02536     const enum PixelFormat dstFormat= c->dstFormat;
02537     const int flags= c->flags;
02538     int16_t *vLumFilterPos= c->vLumFilterPos;
02539     int16_t *vChrFilterPos= c->vChrFilterPos;
02540     int16_t *hLumFilterPos= c->hLumFilterPos;
02541     int16_t *hChrFilterPos= c->hChrFilterPos;
02542     int16_t *hLumFilter= c->hLumFilter;
02543     int16_t *hChrFilter= c->hChrFilter;
02544     int32_t *lumMmxFilter= c->lumMmxFilter;
02545     int32_t *chrMmxFilter= c->chrMmxFilter;
02546     int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
02547     const int vLumFilterSize= c->vLumFilterSize;
02548     const int vChrFilterSize= c->vChrFilterSize;
02549     const int hLumFilterSize= c->hLumFilterSize;
02550     const int hChrFilterSize= c->hChrFilterSize;
02551     int16_t **lumPixBuf= c->lumPixBuf;
02552     int16_t **chrUPixBuf= c->chrUPixBuf;
02553     int16_t **chrVPixBuf= c->chrVPixBuf;
02554     int16_t **alpPixBuf= c->alpPixBuf;
02555     const int vLumBufSize= c->vLumBufSize;
02556     const int vChrBufSize= c->vChrBufSize;
02557     uint8_t *formatConvBuffer= c->formatConvBuffer;
02558     const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
02559     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
02560     int lastDstY;
02561     uint32_t *pal=c->pal_yuv;
02562     int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
02563 
02564     yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
02565     yuv2planarX_fn yuv2planeX = c->yuv2planeX;
02566     yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
02567     yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
02568     yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
02569     yuv2packedX_fn yuv2packedX = c->yuv2packedX;
02570 
02571     /* vars which will change and which we need to store back in the context */
02572     int dstY= c->dstY;
02573     int lumBufIndex= c->lumBufIndex;
02574     int chrBufIndex= c->chrBufIndex;
02575     int lastInLumBuf= c->lastInLumBuf;
02576     int lastInChrBuf= c->lastInChrBuf;
02577 
02578     if (isPacked(c->srcFormat)) {
02579         src[0]=
02580         src[1]=
02581         src[2]=
02582         src[3]= src[0];
02583         srcStride[0]=
02584         srcStride[1]=
02585         srcStride[2]=
02586         srcStride[3]= srcStride[0];
02587     }
02588     srcStride[1]<<= c->vChrDrop;
02589     srcStride[2]<<= c->vChrDrop;
02590 
02591     DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
02592                   src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
02593                   dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
02594     DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
02595                    srcSliceY,    srcSliceH,    dstY,    dstH);
02596     DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
02597                    vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
02598 
02599     if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
02600         static int warnedAlready=0; //FIXME move this into the context perhaps
02601         if (flags & SWS_PRINT_INFO && !warnedAlready) {
02602             av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
02603                    "         ->cannot do aligned memory accesses anymore\n");
02604             warnedAlready=1;
02605         }
02606     }
02607 
02608     if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
02609         || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
02610         || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
02611     ) {
02612         static int warnedAlready=0;
02613         int cpu_flags = av_get_cpu_flags();
02614         if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
02615             av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
02616             warnedAlready=1;
02617         }
02618     }
02619 
02620     /* Note the user might start scaling the picture in the middle so this
02621        will not get executed. This is not really intended but works
02622        currently, so people might do it. */
02623     if (srcSliceY ==0) {
02624         lumBufIndex=-1;
02625         chrBufIndex=-1;
02626         dstY=0;
02627         lastInLumBuf= -1;
02628         lastInChrBuf= -1;
02629     }
02630 
02631     if (!should_dither) {
02632         c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
02633     }
02634     lastDstY= dstY;
02635 
02636     for (;dstY < dstH; dstY++) {
02637         const int chrDstY= dstY>>c->chrDstVSubSample;
02638         uint8_t *dest[4] = {
02639             dst[0] + dstStride[0] * dstY,
02640             dst[1] + dstStride[1] * chrDstY,
02641             dst[2] + dstStride[2] * chrDstY,
02642             (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
02643         };
02644         int use_mmx_vfilter= c->use_mmx_vfilter;
02645 
02646         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
02647         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
02648         const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
02649         int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
02650         int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
02651         int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
02652         int enough_lines;
02653 
02654         //handle holes (FAST_BILINEAR & weird filters)
02655         if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
02656         if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
02657         assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
02658         assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
02659 
02660         DEBUG_BUFFERS("dstY: %d\n", dstY);
02661         DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
02662                          firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
02663         DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
02664                          firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
02665 
02666         // Do we have enough lines in this slice to output the dstY line
02667         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
02668 
02669         if (!enough_lines) {
02670             lastLumSrcY = srcSliceY + srcSliceH - 1;
02671             lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
02672             DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
02673                                             lastLumSrcY, lastChrSrcY);
02674         }
02675 
02676         //Do horizontal scaling
02677         while(lastInLumBuf < lastLumSrcY) {
02678             const uint8_t *src1[4] = {
02679                 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
02680                 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
02681                 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
02682                 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
02683             };
02684             lumBufIndex++;
02685             assert(lumBufIndex < 2*vLumBufSize);
02686             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
02687             assert(lastInLumBuf + 1 - srcSliceY >= 0);
02688             hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
02689                     hLumFilter, hLumFilterPos, hLumFilterSize,
02690                     formatConvBuffer,
02691                     pal, 0);
02692             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
02693                 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
02694                         lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
02695                         formatConvBuffer,
02696                         pal, 1);
02697             lastInLumBuf++;
02698             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
02699                                lumBufIndex,    lastInLumBuf);
02700         }
02701         while(lastInChrBuf < lastChrSrcY) {
02702             const uint8_t *src1[4] = {
02703                 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
02704                 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
02705                 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
02706                 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
02707             };
02708             chrBufIndex++;
02709             assert(chrBufIndex < 2*vChrBufSize);
02710             assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
02711             assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
02712             //FIXME replace parameters through context struct (some at least)
02713 
02714             if (c->needs_hcscale)
02715                 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
02716                           chrDstW, src1, chrSrcW, chrXInc,
02717                           hChrFilter, hChrFilterPos, hChrFilterSize,
02718                           formatConvBuffer, pal);
02719             lastInChrBuf++;
02720             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
02721                                chrBufIndex,    lastInChrBuf);
02722         }
02723         //wrap buf index around to stay inside the ring buffer
02724         if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
02725         if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
02726         if (!enough_lines)
02727             break; //we can't output a dstY line so let's try with the next slice
02728 
02729 #if HAVE_MMX
02730         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
02731 #endif
02732         if (should_dither) {
02733             c->chrDither8 = dither_8x8_128[chrDstY & 7];
02734             c->lumDither8 = dither_8x8_128[dstY & 7];
02735         }
02736         if (dstY >= dstH-2) {
02737             // hmm looks like we can't use MMX here without overwriting this array's tail
02738             find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX,  &yuv2nv12cX,
02739                                            &yuv2packed1, &yuv2packed2, &yuv2packedX);
02740             use_mmx_vfilter= 0;
02741         }
02742 
02743         {
02744             const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02745             const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02746             const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02747             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
02748             int16_t *vLumFilter= c->vLumFilter;
02749             int16_t *vChrFilter= c->vChrFilter;
02750 
02751             if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
02752                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02753 
02754                 vLumFilter +=    dstY * vLumFilterSize;
02755                 vChrFilter += chrDstY * vChrFilterSize;
02756 
02757                 av_assert0(use_mmx_vfilter != (
02758                                yuv2planeX == yuv2planeX_10BE_c
02759                             || yuv2planeX == yuv2planeX_10LE_c
02760                             || yuv2planeX == yuv2planeX_9BE_c
02761                             || yuv2planeX == yuv2planeX_9LE_c
02762                             || yuv2planeX == yuv2planeX_16BE_c
02763                             || yuv2planeX == yuv2planeX_16LE_c
02764                             || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
02765 
02766                 if(use_mmx_vfilter){
02767                     vLumFilter= c->lumMmxFilter;
02768                     vChrFilter= c->chrMmxFilter;
02769                 }
02770 
02771                 if (vLumFilterSize == 1) {
02772                     yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
02773                 } else {
02774                     yuv2planeX(vLumFilter, vLumFilterSize,
02775                                lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
02776                 }
02777 
02778                 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
02779                     if (yuv2nv12cX) {
02780                         yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
02781                     } else if (vChrFilterSize == 1) {
02782                         yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
02783                         yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
02784                     } else {
02785                         yuv2planeX(vChrFilter, vChrFilterSize,
02786                                    chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
02787                         yuv2planeX(vChrFilter, vChrFilterSize,
02788                                    chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
02789                     }
02790                 }
02791 
02792                 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
02793                     if(use_mmx_vfilter){
02794                         vLumFilter= c->alpMmxFilter;
02795                     }
02796                     if (vLumFilterSize == 1) {
02797                         yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
02798                     } else {
02799                         yuv2planeX(vLumFilter, vLumFilterSize,
02800                                    alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
02801                     }
02802                 }
02803             } else {
02804                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
02805                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
02806                 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
02807                     int chrAlpha = vChrFilter[2 * dstY + 1];
02808                     yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
02809                                 alpPixBuf ? *alpSrcPtr : NULL,
02810                                 dest[0], dstW, chrAlpha, dstY);
02811                 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
02812                     int lumAlpha = vLumFilter[2 * dstY + 1];
02813                     int chrAlpha = vChrFilter[2 * dstY + 1];
02814                     lumMmxFilter[2] =
02815                     lumMmxFilter[3] = vLumFilter[2 * dstY   ] * 0x10001;
02816                     chrMmxFilter[2] =
02817                     chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
02818                     yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
02819                                 alpPixBuf ? alpSrcPtr : NULL,
02820                                 dest[0], dstW, lumAlpha, chrAlpha, dstY);
02821                 } else { //general RGB
02822                     yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
02823                                 lumSrcPtr, vLumFilterSize,
02824                                 vChrFilter + dstY * vChrFilterSize,
02825                                 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
02826                                 alpSrcPtr, dest[0], dstW, dstY);
02827                 }
02828             }
02829         }
02830     }
02831 
02832     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
02833         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
02834 
02835 #if HAVE_MMX2
02836     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
02837         __asm__ volatile("sfence":::"memory");
02838 #endif
02839     emms_c();
02840 
02841     /* store changed local vars back in the context */
02842     c->dstY= dstY;
02843     c->lumBufIndex= lumBufIndex;
02844     c->chrBufIndex= chrBufIndex;
02845     c->lastInLumBuf= lastInLumBuf;
02846     c->lastInChrBuf= lastInChrBuf;
02847 
02848     return dstY - lastDstY;
02849 }
02850 
02851 static av_cold void sws_init_swScale_c(SwsContext *c)
02852 {
02853     enum PixelFormat srcFormat = c->srcFormat;
02854 
02855     find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
02856                                    &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
02857                                    &c->yuv2packedX);
02858 
02859     c->chrToYV12 = NULL;
02860     switch(srcFormat) {
02861         case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
02862         case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
02863         case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
02864         case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
02865         case PIX_FMT_RGB8     :
02866         case PIX_FMT_BGR8     :
02867         case PIX_FMT_PAL8     :
02868         case PIX_FMT_BGR4_BYTE:
02869         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
02870         case PIX_FMT_GBRP9LE:
02871         case PIX_FMT_GBRP10LE:
02872         case PIX_FMT_GBRP16LE:  c->readChrPlanar = planar_rgb16le_to_uv; break;
02873         case PIX_FMT_GBRP9BE:
02874         case PIX_FMT_GBRP10BE:
02875         case PIX_FMT_GBRP16BE:  c->readChrPlanar = planar_rgb16be_to_uv; break;
02876         case PIX_FMT_GBRP:      c->readChrPlanar = planar_rgb_to_uv; break;
02877 #if HAVE_BIGENDIAN
02878         case PIX_FMT_YUV444P9LE:
02879         case PIX_FMT_YUV422P9LE:
02880         case PIX_FMT_YUV420P9LE:
02881         case PIX_FMT_YUV422P10LE:
02882         case PIX_FMT_YUV420P10LE:
02883         case PIX_FMT_YUV444P10LE:
02884         case PIX_FMT_YUV420P16LE:
02885         case PIX_FMT_YUV422P16LE:
02886         case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
02887 #else
02888         case PIX_FMT_YUV444P9BE:
02889         case PIX_FMT_YUV422P9BE:
02890         case PIX_FMT_YUV420P9BE:
02891         case PIX_FMT_YUV444P10BE:
02892         case PIX_FMT_YUV422P10BE:
02893         case PIX_FMT_YUV420P10BE:
02894         case PIX_FMT_YUV420P16BE:
02895         case PIX_FMT_YUV422P16BE:
02896         case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
02897 #endif
02898     }
02899     if (c->chrSrcHSubSample) {
02900         switch(srcFormat) {
02901         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
02902         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
02903         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
02904         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
02905         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_half_c;   break;
02906         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c;  break;
02907         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_half_c;   break;
02908         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
02909         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
02910         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
02911         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
02912         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_half_c;   break;
02913         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c;  break;
02914         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_half_c;   break;
02915         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
02916         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
02917         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
02918         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
02919         case PIX_FMT_GBR24P  : c->chrToYV12 = gbr24pToUV_half_c;  break;
02920         }
02921     } else {
02922         switch(srcFormat) {
02923         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
02924         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
02925         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
02926         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
02927         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_c;   break;
02928         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c;  break;
02929         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_c;   break;
02930         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
02931         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
02932         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
02933         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
02934         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_c;   break;
02935         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c;  break;
02936         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_c;   break;
02937         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
02938         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
02939         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
02940         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
02941         }
02942     }
02943 
02944     c->lumToYV12 = NULL;
02945     c->alpToYV12 = NULL;
02946     switch (srcFormat) {
02947     case PIX_FMT_GBRP9LE:
02948     case PIX_FMT_GBRP10LE:
02949     case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
02950     case PIX_FMT_GBRP9BE:
02951     case PIX_FMT_GBRP10BE:
02952     case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
02953     case PIX_FMT_GBRP:     c->readLumPlanar = planar_rgb_to_y; break;
02954 #if HAVE_BIGENDIAN
02955     case PIX_FMT_YUV444P9LE:
02956     case PIX_FMT_YUV422P9LE:
02957     case PIX_FMT_YUV420P9LE:
02958     case PIX_FMT_YUV422P10LE:
02959     case PIX_FMT_YUV420P10LE:
02960     case PIX_FMT_YUV444P10LE:
02961     case PIX_FMT_YUV420P16LE:
02962     case PIX_FMT_YUV422P16LE:
02963     case PIX_FMT_YUV444P16LE:
02964     case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
02965 #else
02966     case PIX_FMT_YUV444P9BE:
02967     case PIX_FMT_YUV422P9BE:
02968     case PIX_FMT_YUV420P9BE:
02969     case PIX_FMT_YUV444P10BE:
02970     case PIX_FMT_YUV422P10BE:
02971     case PIX_FMT_YUV420P10BE:
02972     case PIX_FMT_YUV420P16BE:
02973     case PIX_FMT_YUV422P16BE:
02974     case PIX_FMT_YUV444P16BE:
02975     case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
02976 #endif
02977     case PIX_FMT_YUYV422  :
02978     case PIX_FMT_Y400A    : c->lumToYV12 = yuy2ToY_c; break;
02979     case PIX_FMT_UYVY422  : c->lumToYV12 = uyvyToY_c;    break;
02980     case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c;   break;
02981     case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
02982     case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
02983     case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
02984     case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
02985     case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c;   break;
02986     case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
02987     case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
02988     case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
02989     case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
02990     case PIX_FMT_RGB8     :
02991     case PIX_FMT_BGR8     :
02992     case PIX_FMT_PAL8     :
02993     case PIX_FMT_BGR4_BYTE:
02994     case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
02995     case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
02996     case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
02997     case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY_c;  break;
02998     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
02999     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
03000     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
03001     case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
03002     case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
03003     case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
03004     case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
03005     }
03006     if (c->alpPixBuf) {
03007         switch (srcFormat) {
03008         case PIX_FMT_BGRA:
03009         case PIX_FMT_RGBA:  c->alpToYV12 = rgbaToA_c; break;
03010         case PIX_FMT_ABGR:
03011         case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
03012         case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
03013         case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
03014         }
03015     }
03016 
03017 
03018     if (c->srcBpc == 8) {
03019         if (c->dstBpc <= 10) {
03020             c->hyScale = c->hcScale = hScale8To15_c;
03021             if (c->flags & SWS_FAST_BILINEAR) {
03022                 c->hyscale_fast = hyscale_fast_c;
03023                 c->hcscale_fast = hcscale_fast_c;
03024             }
03025         } else {
03026             c->hyScale = c->hcScale = hScale8To19_c;
03027         }
03028     } else {
03029         c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
03030     }
03031 
03032     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
03033         if (c->dstBpc <= 10) {
03034             if (c->srcRange) {
03035                 c->lumConvertRange = lumRangeFromJpeg_c;
03036                 c->chrConvertRange = chrRangeFromJpeg_c;
03037             } else {
03038                 c->lumConvertRange = lumRangeToJpeg_c;
03039                 c->chrConvertRange = chrRangeToJpeg_c;
03040             }
03041         } else {
03042             if (c->srcRange) {
03043                 c->lumConvertRange = lumRangeFromJpeg16_c;
03044                 c->chrConvertRange = chrRangeFromJpeg16_c;
03045             } else {
03046                 c->lumConvertRange = lumRangeToJpeg16_c;
03047                 c->chrConvertRange = chrRangeToJpeg16_c;
03048             }
03049         }
03050     }
03051 
03052     if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
03053           srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
03054         c->needs_hcscale = 1;
03055 }
03056 
03057 SwsFunc ff_getSwsFunc(SwsContext *c)
03058 {
03059     sws_init_swScale_c(c);
03060 
03061     if (HAVE_MMX)
03062         ff_sws_init_swScale_mmx(c);
03063     if (HAVE_ALTIVEC)
03064         ff_sws_init_swScale_altivec(c);
03065 
03066     return swScale;
03067 }