FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
44 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
45 
46 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
47 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
48 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
49 
50 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
51 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
52 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
53 
54 
55 // MMXEXT versions
56 #if HAVE_MMXEXT_INLINE
57 #undef RENAME
58 #undef COMPILE_TEMPLATE_MMXEXT
59 #define COMPILE_TEMPLATE_MMXEXT 1
60 #define RENAME(a) a ## _mmxext
61 #include "swscale_template.c"
62 #endif
63 
64 void ff_updateMMXDitherTables(SwsInternal *c, int dstY)
65 {
66  const int dstH= c->opts.dst_h;
67  const int flags= c->opts.flags;
68 
69  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
70  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
71  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
72 
73  int hasAlpha = c->needAlpha;
74  int32_t *vLumFilterPos= c->vLumFilterPos;
75  int32_t *vChrFilterPos= c->vChrFilterPos;
76  int16_t *vLumFilter= c->vLumFilter;
77  int16_t *vChrFilter= c->vChrFilter;
78  int32_t *lumMmxFilter= c->lumMmxFilter;
79  int32_t *chrMmxFilter= c->chrMmxFilter;
80  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
81  const int vLumFilterSize= c->vLumFilterSize;
82  const int vChrFilterSize= c->vChrFilterSize;
83  const int chrDstY= dstY>>c->chrDstVSubSample;
84  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
85  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
86 
87  c->blueDither= ff_dither8[dstY&1];
88  if (c->opts.dst_format == AV_PIX_FMT_RGB555 || c->opts.dst_format == AV_PIX_FMT_BGR555)
89  c->greenDither= ff_dither8[dstY&1];
90  else
91  c->greenDither= ff_dither4[dstY&1];
92  c->redDither= ff_dither8[(dstY+1)&1];
93  if (dstY < dstH - 2) {
94  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
95  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
96  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
97 
98  int i;
99  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->opts.src_h) {
100  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
101 
102  int neg = -firstLumSrcY, i, end = FFMIN(c->opts.src_h - firstLumSrcY, vLumFilterSize);
103  for (i = 0; i < neg; i++)
104  tmpY[i] = lumSrcPtr[neg];
105  for ( ; i < end; i++)
106  tmpY[i] = lumSrcPtr[i];
107  for ( ; i < vLumFilterSize; i++)
108  tmpY[i] = tmpY[i-1];
109  lumSrcPtr = tmpY;
110 
111  if (alpSrcPtr) {
112  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
113  for (i = 0; i < neg; i++)
114  tmpA[i] = alpSrcPtr[neg];
115  for ( ; i < end; i++)
116  tmpA[i] = alpSrcPtr[i];
117  for ( ; i < vLumFilterSize; i++)
118  tmpA[i] = tmpA[i - 1];
119  alpSrcPtr = tmpA;
120  }
121  }
122  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
123  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
124  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
125  for (i = 0; i < neg; i++) {
126  tmpU[i] = chrUSrcPtr[neg];
127  }
128  for ( ; i < end; i++) {
129  tmpU[i] = chrUSrcPtr[i];
130  }
131  for ( ; i < vChrFilterSize; i++) {
132  tmpU[i] = tmpU[i - 1];
133  }
134  chrUSrcPtr = tmpU;
135  }
136 
137  if (flags & SWS_ACCURATE_RND) {
138  int s= APCK_SIZE / 8;
139  for (i=0; i<vLumFilterSize; i+=2) {
140  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
141  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
142  lumMmxFilter[s*i+APCK_COEF/4 ]=
143  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
144  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
145  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
146  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
147  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
148  alpMmxFilter[s*i+APCK_COEF/4 ]=
149  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
150  }
151  }
152  for (i=0; i<vChrFilterSize; i+=2) {
153  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
154  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
155  chrMmxFilter[s*i+APCK_COEF/4 ]=
156  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
157  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
158  }
159  } else {
160  for (i=0; i<vLumFilterSize; i++) {
161  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
162  lumMmxFilter[4*i+2]=
163  lumMmxFilter[4*i+3]=
164  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
165  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
166  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
167  alpMmxFilter[4*i+2]=
168  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
169  }
170  }
171  for (i=0; i<vChrFilterSize; i++) {
172  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
173  chrMmxFilter[4*i+2]=
174  chrMmxFilter[4*i+3]=
175  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
176  }
177  }
178  }
179 }
180 #endif /* HAVE_INLINE_ASM */
181 
182 #define YUV2YUVX_FUNC_MMX(opt, step) \
183 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
184  uint8_t *dest, int dstW, \
185  const uint8_t *dither, int offset); \
186 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
187  const int16_t **src, uint8_t *dest, int dstW, \
188  const uint8_t *dither, int offset) \
189 { \
190  if(dstW > 0) \
191  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
192  return; \
193 }
194 
195 #define YUV2YUVX_FUNC(opt, step) \
196 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
197  uint8_t *dest, int dstW, \
198  const uint8_t *dither, int offset); \
199 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
200  const int16_t **src, uint8_t *dest, int dstW, \
201  const uint8_t *dither, int offset) \
202 { \
203  int remainder = (dstW % step); \
204  int pixelsProcessed = dstW - remainder; \
205  if(((uintptr_t)dest) & 15){ \
206  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); \
207  return; \
208  } \
209  if(pixelsProcessed > 0) \
210  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
211  if(remainder > 0){ \
212  ff_yuv2yuvX_mmxext(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
213  } \
214  return; \
215 }
216 
217 #if HAVE_MMXEXT_EXTERNAL
218 YUV2YUVX_FUNC_MMX(mmxext, 16)
219 #endif
220 #if HAVE_SSE3_EXTERNAL
221 YUV2YUVX_FUNC(sse3, 32)
222 #endif
223 #if HAVE_AVX2_EXTERNAL
224 YUV2YUVX_FUNC(avx2, 64)
225 #endif
226 
227 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
228 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
229  SwsInternal *c, int16_t *data, \
230  int dstW, const uint8_t *src, \
231  const int16_t *filter, \
232  const int32_t *filterPos, int filterSize)
233 
234 #define SCALE_FUNCS(filter_n, opt) \
235  SCALE_FUNC(filter_n, 8, 15, opt); \
236  SCALE_FUNC(filter_n, 9, 15, opt); \
237  SCALE_FUNC(filter_n, 10, 15, opt); \
238  SCALE_FUNC(filter_n, 12, 15, opt); \
239  SCALE_FUNC(filter_n, 14, 15, opt); \
240  SCALE_FUNC(filter_n, 16, 15, opt); \
241  SCALE_FUNC(filter_n, 8, 19, opt); \
242  SCALE_FUNC(filter_n, 9, 19, opt); \
243  SCALE_FUNC(filter_n, 10, 19, opt); \
244  SCALE_FUNC(filter_n, 12, 19, opt); \
245  SCALE_FUNC(filter_n, 14, 19, opt); \
246  SCALE_FUNC(filter_n, 16, 19, opt)
247 
248 #define SCALE_FUNCS_MMX(opt) \
249  SCALE_FUNCS(4, opt); \
250  SCALE_FUNCS(8, opt); \
251  SCALE_FUNCS(X, opt)
252 
253 #define SCALE_FUNCS_SSE(opt) \
254  SCALE_FUNCS(4, opt); \
255  SCALE_FUNCS(8, opt); \
256  SCALE_FUNCS(X4, opt); \
257  SCALE_FUNCS(X8, opt)
258 
259 SCALE_FUNCS_SSE(sse2);
260 SCALE_FUNCS_SSE(ssse3);
261 SCALE_FUNCS_SSE(sse4);
262 
263 SCALE_FUNC(4, 8, 15, avx2);
264 SCALE_FUNC(X4, 8, 15, avx2);
265 
266 #define VSCALEX_FUNC(size, opt) \
267 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
268  const int16_t **src, uint8_t *dest, int dstW, \
269  const uint8_t *dither, int offset)
270 #define VSCALEX_FUNCS(opt) \
271  VSCALEX_FUNC(8, opt); \
272  VSCALEX_FUNC(9, opt); \
273  VSCALEX_FUNC(10, opt)
274 
275 VSCALEX_FUNC(8, mmxext);
276 VSCALEX_FUNCS(sse2);
277 VSCALEX_FUNCS(sse4);
278 VSCALEX_FUNC(16, sse4);
279 VSCALEX_FUNCS(avx);
280 
281 #define VSCALE_FUNC(size, opt) \
282 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
283  const uint8_t *dither, int offset)
284 #define VSCALE_FUNCS(opt1, opt2) \
285  VSCALE_FUNC(8, opt1); \
286  VSCALE_FUNC(9, opt2); \
287  VSCALE_FUNC(10, opt2); \
288  VSCALE_FUNC(16, opt1)
289 
290 VSCALE_FUNCS(sse2, sse2);
291 VSCALE_FUNC(16, sse4);
292 VSCALE_FUNCS(avx, avx);
293 
294 #define INPUT_Y_FUNC(fmt, opt) \
295 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
296  const uint8_t *unused1, const uint8_t *unused2, \
297  int w, uint32_t *unused, void *opq)
298 #define INPUT_UV_FUNC(fmt, opt) \
299 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
300  const uint8_t *unused0, \
301  const uint8_t *src1, \
302  const uint8_t *src2, \
303  int w, uint32_t *unused, void *opq)
304 #define INPUT_FUNC(fmt, opt) \
305  INPUT_Y_FUNC(fmt, opt); \
306  INPUT_UV_FUNC(fmt, opt)
307 #define INPUT_FUNCS(opt) \
308  INPUT_FUNC(uyvy, opt); \
309  INPUT_FUNC(yuyv, opt); \
310  INPUT_UV_FUNC(nv12, opt); \
311  INPUT_UV_FUNC(nv21, opt); \
312  INPUT_FUNC(rgba, opt); \
313  INPUT_FUNC(bgra, opt); \
314  INPUT_FUNC(argb, opt); \
315  INPUT_FUNC(abgr, opt); \
316  INPUT_FUNC(rgb24, opt); \
317  INPUT_FUNC(bgr24, opt)
318 
319 INPUT_FUNCS(sse2);
320 INPUT_FUNCS(ssse3);
321 INPUT_FUNCS(avx);
322 INPUT_FUNC(rgba, avx2);
323 INPUT_FUNC(bgra, avx2);
324 INPUT_FUNC(argb, avx2);
325 INPUT_FUNC(abgr, avx2);
326 INPUT_FUNC(rgb24, avx2);
327 INPUT_FUNC(bgr24, avx2);
328 
329 #if ARCH_X86_64
330 #define YUV2NV_DECL(fmt, opt) \
331 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
332  const int16_t *filter, int filterSize, \
333  const int16_t **u, const int16_t **v, \
334  uint8_t *dst, int dstWidth)
335 
336 YUV2NV_DECL(nv12, avx2);
337 YUV2NV_DECL(nv21, avx2);
338 
339 #define YUV2GBRP_FN_DECL(fmt, opt) \
340 void ff_yuv2##fmt##_full_X_ ##opt(SwsInternal *c, const int16_t *lumFilter, \
341  const int16_t **lumSrcx, int lumFilterSize, \
342  const int16_t *chrFilter, const int16_t **chrUSrcx, \
343  const int16_t **chrVSrcx, int chrFilterSize, \
344  const int16_t **alpSrcx, uint8_t **dest, \
345  int dstW, int y)
346 
347 #define YUV2GBRP_DECL(opt) \
348 YUV2GBRP_FN_DECL(gbrp, opt); \
349 YUV2GBRP_FN_DECL(gbrap, opt); \
350 YUV2GBRP_FN_DECL(gbrp9le, opt); \
351 YUV2GBRP_FN_DECL(gbrp10le, opt); \
352 YUV2GBRP_FN_DECL(gbrap10le, opt); \
353 YUV2GBRP_FN_DECL(gbrp12le, opt); \
354 YUV2GBRP_FN_DECL(gbrap12le, opt); \
355 YUV2GBRP_FN_DECL(gbrp14le, opt); \
356 YUV2GBRP_FN_DECL(gbrp16le, opt); \
357 YUV2GBRP_FN_DECL(gbrap16le, opt); \
358 YUV2GBRP_FN_DECL(gbrpf32le, opt); \
359 YUV2GBRP_FN_DECL(gbrapf32le, opt); \
360 YUV2GBRP_FN_DECL(gbrp9be, opt); \
361 YUV2GBRP_FN_DECL(gbrp10be, opt); \
362 YUV2GBRP_FN_DECL(gbrap10be, opt); \
363 YUV2GBRP_FN_DECL(gbrp12be, opt); \
364 YUV2GBRP_FN_DECL(gbrap12be, opt); \
365 YUV2GBRP_FN_DECL(gbrp14be, opt); \
366 YUV2GBRP_FN_DECL(gbrp16be, opt); \
367 YUV2GBRP_FN_DECL(gbrap16be, opt); \
368 YUV2GBRP_FN_DECL(gbrpf32be, opt); \
369 YUV2GBRP_FN_DECL(gbrapf32be, opt)
370 
371 YUV2GBRP_DECL(sse2);
372 YUV2GBRP_DECL(sse4);
373 YUV2GBRP_DECL(avx2);
374 
375 #define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \
376 void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \
377  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
378  void *opq)
379 
380 #define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \
381 void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \
382  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
383  void *opq)
384 
385 #define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \
386 void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \
387  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
388  void *opq)
389 
390 
391 #define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \
392 INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \
393 INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt)
394 
395 #define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \
396 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \
397 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt)
398 
399 #define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \
400 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \
401 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt)
402 
403 #define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \
404 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
405 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
406 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
407 
408 #define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \
409 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
410 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt)
411 
412 #define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \
413 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
414 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
415 
416 #define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \
417 INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \
418 INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \
419 INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \
420 INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \
421 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt)
422 
423 #define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \
424 INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \
425 INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \
426 INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \
427 INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \
428 INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \
429 INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \
430 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt)
431 
432 #define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \
433 INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \
434 INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \
435 INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \
436 INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \
437 INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \
438 INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \
439 INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt)
440 
441 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2);
442 INPUT_PLANAR_RGB_UV_ALL_DECL(sse2);
443 INPUT_PLANAR_RGB_A_ALL_DECL(sse2);
444 
445 INPUT_PLANAR_RGB_Y_ALL_DECL(sse4);
446 INPUT_PLANAR_RGB_UV_ALL_DECL(sse4);
447 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4);
448 
449 INPUT_PLANAR_RGB_Y_ALL_DECL(avx2);
450 INPUT_PLANAR_RGB_UV_ALL_DECL(avx2);
451 INPUT_PLANAR_RGB_A_ALL_DECL(avx2);
452 #endif
453 
454 #define RANGE_CONVERT_FUNCS(opt, bpc) do { \
455  if (c->opts.src_range) { \
456  c->lumConvertRange = ff_lumRangeFromJpeg##bpc##_##opt; \
457  c->chrConvertRange = ff_chrRangeFromJpeg##bpc##_##opt; \
458  } else { \
459  c->lumConvertRange = ff_lumRangeToJpeg##bpc##_##opt; \
460  c->chrConvertRange = ff_chrRangeToJpeg##bpc##_##opt; \
461  } \
462 } while (0)
463 
464 #define RANGE_CONVERT_FUNCS_DECL(opt, bpc) \
465 void ff_lumRangeFromJpeg##bpc##_##opt(int16_t *dst, int width, \
466  uint32_t coeff, int64_t offset); \
467 void ff_chrRangeFromJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \
468  uint32_t coeff, int64_t offset); \
469 void ff_lumRangeToJpeg##bpc##_##opt(int16_t *dst, int width, \
470  uint32_t coeff, int64_t offset); \
471 void ff_chrRangeToJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \
472  uint32_t coeff, int64_t offset); \
473 
475 RANGE_CONVERT_FUNCS_DECL(sse4, 16)
477 RANGE_CONVERT_FUNCS_DECL(avx2, 16)
478 
480 {
481  int cpu_flags = av_get_cpu_flags();
483  if (c->dstBpc <= 14) {
484  RANGE_CONVERT_FUNCS(avx2, 8);
485  } else {
486  RANGE_CONVERT_FUNCS(avx2, 16);
487  }
488  } else if (EXTERNAL_SSE2(cpu_flags) && c->dstBpc <= 14) {
489  RANGE_CONVERT_FUNCS(sse2, 8);
490  } else if (EXTERNAL_SSE4(cpu_flags) && c->dstBpc > 14) {
491  RANGE_CONVERT_FUNCS(sse4, 16);
492  }
493 }
494 
496 {
497  int cpu_flags = av_get_cpu_flags();
498 
499 #if HAVE_MMXEXT_INLINE
501  sws_init_swscale_mmxext(c);
502 #endif
503  if(c->use_mmx_vfilter && !(c->opts.flags & SWS_ACCURATE_RND)) {
504 #if HAVE_MMXEXT_EXTERNAL
506  c->yuv2planeX = yuv2yuvX_mmxext;
507 #endif
508 #if HAVE_SSE3_EXTERNAL
510  c->yuv2planeX = yuv2yuvX_sse3;
511 #endif
512 #if HAVE_AVX2_EXTERNAL
514  c->yuv2planeX = yuv2yuvX_avx2;
515 #endif
516  }
517 #if ARCH_X86_32 && !HAVE_ALIGNED_STACK
518  // The better yuv2planeX_8 functions need aligned stack on x86-32,
519  // so we use MMXEXT in this case if they are not available.
520  if (EXTERNAL_MMXEXT(cpu_flags)) {
521  if (c->dstBpc == 8 && !c->use_mmx_vfilter)
522  c->yuv2planeX = ff_yuv2planeX_8_mmxext;
523  }
524 #endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
525 
526 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
527  if (c->srcBpc == 8) { \
528  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
529  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
530  } else if (c->srcBpc == 9) { \
531  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
532  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
533  } else if (c->srcBpc == 10) { \
534  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
535  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
536  } else if (c->srcBpc == 12) { \
537  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
538  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
539  } else if (c->srcBpc == 14 || ((c->opts.src_format==AV_PIX_FMT_PAL8||isAnyRGB(c->opts.src_format)) && av_pix_fmt_desc_get(c->opts.src_format)->comp[0].depth<16)) { \
540  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
541  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
542  } else { /* c->srcBpc == 16 */ \
543  av_assert0(c->srcBpc == 16);\
544  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
545  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
546  } \
547 } while (0)
548 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
549 switch(c->dstBpc){ \
550  case 16: do_16_case; break; \
551  case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
552  case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
553  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
554  }
555 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
556  switch(c->dstBpc){ \
557  case 16: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_16_ ## opt; break; \
558  case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format)) vscalefn = ff_yuv2plane1_10_ ## opt; break; \
559  case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_9_ ## opt; break; \
560  case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
561  default: av_assert0(c->dstBpc>8); \
562  }
563 #define case_rgb(x, X, opt) \
564  case AV_PIX_FMT_ ## X: \
565  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
566  if (!c->chrSrcHSubSample) \
567  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
568  break
569 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
570  switch (filtersize) { \
571  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
572  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
573  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
574  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
575  break; \
576  }
577  if (EXTERNAL_SSE2(cpu_flags)) {
578  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
579  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
580  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
581  HAVE_ALIGNED_STACK || ARCH_X86_64);
582  if (!(c->opts.flags & SWS_ACCURATE_RND))
583  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2);
584 
585  switch (c->opts.src_format) {
586  case AV_PIX_FMT_YA8:
587  c->lumToYV12 = ff_yuyvToY_sse2;
588  if (c->needAlpha)
589  c->alpToYV12 = ff_uyvyToY_sse2;
590  break;
591  case AV_PIX_FMT_YUYV422:
592  c->lumToYV12 = ff_yuyvToY_sse2;
593  c->chrToYV12 = ff_yuyvToUV_sse2;
594  break;
595  case AV_PIX_FMT_UYVY422:
596  c->lumToYV12 = ff_uyvyToY_sse2;
597  c->chrToYV12 = ff_uyvyToUV_sse2;
598  break;
599  case AV_PIX_FMT_NV12:
600  c->chrToYV12 = ff_nv12ToUV_sse2;
601  break;
602  case AV_PIX_FMT_NV21:
603  c->chrToYV12 = ff_nv21ToUV_sse2;
604  break;
605  case_rgb(rgb24, RGB24, sse2);
606  case_rgb(bgr24, BGR24, sse2);
607  case_rgb(bgra, BGRA, sse2);
608  case_rgb(rgba, RGBA, sse2);
609  case_rgb(abgr, ABGR, sse2);
610  case_rgb(argb, ARGB, sse2);
611  default:
612  break;
613  }
614  }
615  if (EXTERNAL_SSSE3(cpu_flags)) {
616  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
617  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
618  switch (c->opts.src_format) {
619  case_rgb(rgb24, RGB24, ssse3);
620  case_rgb(bgr24, BGR24, ssse3);
621  default:
622  break;
623  }
624  }
625  if (EXTERNAL_SSE4(cpu_flags)) {
626  /* Xto15 don't need special sse4 functions */
627  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
628  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
629  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
630  if (!isBE(c->opts.dst_format)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
631  HAVE_ALIGNED_STACK || ARCH_X86_64);
632  if (c->dstBpc == 16 && !isBE(c->opts.dst_format) && !(c->opts.flags & SWS_ACCURATE_RND))
633  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
634  }
635 
636  if (EXTERNAL_AVX(cpu_flags)) {
637  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
638  HAVE_ALIGNED_STACK || ARCH_X86_64);
639  if (!(c->opts.flags & SWS_ACCURATE_RND))
640  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx);
641 
642  switch (c->opts.src_format) {
643  case AV_PIX_FMT_YUYV422:
644  c->chrToYV12 = ff_yuyvToUV_avx;
645  break;
646  case AV_PIX_FMT_UYVY422:
647  c->chrToYV12 = ff_uyvyToUV_avx;
648  break;
649  case AV_PIX_FMT_NV12:
650  c->chrToYV12 = ff_nv12ToUV_avx;
651  break;
652  case AV_PIX_FMT_NV21:
653  c->chrToYV12 = ff_nv21ToUV_avx;
654  break;
655  case_rgb(rgb24, RGB24, avx);
656  case_rgb(bgr24, BGR24, avx);
657  case_rgb(bgra, BGRA, avx);
658  case_rgb(rgba, RGBA, avx);
659  case_rgb(abgr, ABGR, avx);
660  case_rgb(argb, ARGB, avx);
661  default:
662  break;
663  }
664  }
665 
666 #if ARCH_X86_64
667 #define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
668  switch (filtersize) { \
669  case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
670  default: hscalefn = ff_hscale8to15_X4_avx2; break; \
671  break; \
672  }
673 
675  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
676  ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
677  ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
678  }
679  }
680 
682  if (ARCH_X86_64)
683  switch (c->opts.src_format) {
684  case_rgb(rgb24, RGB24, avx2);
685  case_rgb(bgr24, BGR24, avx2);
686  case_rgb(bgra, BGRA, avx2);
687  case_rgb(rgba, RGBA, avx2);
688  case_rgb(abgr, ABGR, avx2);
689  case_rgb(argb, ARGB, avx2);
690  }
691  if (!(c->opts.flags & SWS_ACCURATE_RND)) // FIXME
692  switch (c->opts.dst_format) {
693  case AV_PIX_FMT_NV12:
694  case AV_PIX_FMT_NV24:
695  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
696  break;
697  case AV_PIX_FMT_NV21:
698  case AV_PIX_FMT_NV42:
699  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
700  break;
701  default:
702  break;
703  }
704  }
705 
706 
707 #define INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(fmt, name, opt) \
708  case fmt: \
709  c->readAlpPlanar = ff_planar_##name##_to_a_##opt;
710 
711 #define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
712  case rgba_fmt: \
713  case rgb_fmt: \
714  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
715  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
716  break;
717 
718 #define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \
719  case fmt: \
720  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
721  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
722  break;
723 
724 #define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \
725  case fmt: \
726  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
727  break;
728 
729 #define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
730  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
731  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
732  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
733  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
734 
735 #define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
736  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
737  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
738  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
739  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
740 
741 #define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
742  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \
743  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt)
744 
745 #define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \
746  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
747  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
748 
749 #define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \
750  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
751  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
752 
753 #define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \
754  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, opt) \
755  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \
756  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \
757  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \
758  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \
759  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \
760  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \
761  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt)
762 
763 
764  if (EXTERNAL_SSE2(cpu_flags)) {
765  switch (c->opts.src_format) {
766  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, sse2);
767  INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2);
768  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2);
769  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2);
770  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2);
771  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2);
772  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2);
773  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2);
774  default:
775  break;
776  }
777  }
778 
779  if (EXTERNAL_SSE4(cpu_flags)) {
780  switch (c->opts.src_format) {
781  case AV_PIX_FMT_GBRAP:
782  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4);
783  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4);
784  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4);
785  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4);
786  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4);
787  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4);
788  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4);
789  default:
790  break;
791  }
792  }
793 
795  switch (c->opts.src_format) {
796  INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2)
797  default:
798  break;
799  }
800  }
801 
802  if(c->opts.flags & SWS_FULL_CHR_H_INT) {
803 
804 #define YUV2ANYX_FUNC_CASE(fmt, name, opt) \
805  case fmt: \
806  c->yuv2anyX = ff_yuv2##name##_full_X_##opt; \
807  break;
808 
809 #define YUV2ANYX_GBRAP_CASES(opt) \
810  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP, gbrp, opt) \
811  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP, gbrap, opt) \
812  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9LE, gbrp9le, opt) \
813  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10LE, gbrp10le, opt) \
814  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10LE, gbrap10le, opt) \
815  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12LE, gbrp12le, opt) \
816  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12LE, gbrap12le, opt) \
817  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14LE, gbrp14le, opt) \
818  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16LE, gbrp16le, opt) \
819  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16LE, gbrap16le, opt) \
820  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32LE, gbrpf32le, opt) \
821  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32LE, gbrapf32le, opt) \
822  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9BE, gbrp9be, opt) \
823  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10BE, gbrp10be, opt) \
824  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10BE, gbrap10be, opt) \
825  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12BE, gbrp12be, opt) \
826  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12BE, gbrap12be, opt) \
827  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14BE, gbrp14be, opt) \
828  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16BE, gbrp16be, opt) \
829  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16BE, gbrap16be, opt) \
830  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32BE, gbrpf32be, opt) \
831  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32BE, gbrapf32be, opt)
832 
833  if (EXTERNAL_SSE2(cpu_flags)) {
834  switch (c->opts.dst_format) {
835  YUV2ANYX_GBRAP_CASES(sse2)
836  default:
837  break;
838  }
839  }
840 
841  if (EXTERNAL_SSE4(cpu_flags)) {
842  switch (c->opts.dst_format) {
843  YUV2ANYX_GBRAP_CASES(sse4)
844  default:
845  break;
846  }
847  }
848 
850  switch (c->opts.dst_format) {
851  YUV2ANYX_GBRAP_CASES(avx2)
852  default:
853  break;
854  }
855  }
856  }
857 
858 #endif
859 }
AV_PIX_FMT_GBRAP16
#define AV_PIX_FMT_GBRAP16
Definition: pixfmt.h:525
DECLARE_ASM_ALIGNED
#define DECLARE_ASM_ALIGNED(n, t, v)
Definition: mem_internal.h:89
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:65
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:1095
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:140
mem_internal.h
av_unused
#define av_unused
Definition: attributes.h:131
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
pixdesc.h
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem_internal.h:90
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
INPUT_FUNC
#define INPUT_FUNC(fmt, opt)
Definition: swscale.c:304
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:307
rgb
Definition: rpzaenc.c:60
RANGE_CONVERT_FUNCS_DECL
#define RANGE_CONVERT_FUNCS_DECL(opt, bpc)
Definition: swscale.c:464
AV_PIX_FMT_GBRP14
#define AV_PIX_FMT_GBRP14
Definition: pixfmt.h:520
RANGE_CONVERT_FUNCS
#define RANGE_CONVERT_FUNCS(opt, bpc)
Definition: swscale.c:454
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:212
AV_PIX_FMT_GBRP10
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:518
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:59
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
AV_PIX_FMT_GBRAP10
#define AV_PIX_FMT_GBRAP10
Definition: pixfmt.h:522
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_GBRAP12
#define AV_PIX_FMT_GBRAP12
Definition: pixfmt.h:523
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:66
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:1096
SCALE_FUNC
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt)
Definition: swscale.c:227
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:284
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:266
ff_sws_init_range_convert_x86
av_cold void ff_sws_init_range_convert_x86(SwsInternal *c)
Definition: swscale.c:479
AV_PIX_FMT_GBRP16
#define AV_PIX_FMT_GBRP16
Definition: pixfmt.h:521
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
SwsPlane
Slice plane.
Definition: swscale_internal.h:1090
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
AV_PIX_FMT_GBRP9
#define AV_PIX_FMT_GBRP9
Definition: pixfmt.h:517
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:748
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:104
cpu.h
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
AV_PIX_FMT_GBRPF32
#define AV_PIX_FMT_GBRPF32
Definition: pixfmt.h:532
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:495
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsInternal *c)
Definition: swscale.c:495
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AV_PIX_FMT_GBRP12
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:519
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:490
swscale_internal.h
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:97
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsInternal *c, int dstY)
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
SwsInternal
Definition: swscale_internal.h:317
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
SWS_FULL_CHR_H_INT
@ SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:132
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:88
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
U
#define U(x)
Definition: vpx_arith.h:37
AV_PIX_FMT_GBRAPF32
#define AV_PIX_FMT_GBRAPF32
Definition: pixfmt.h:533
YUV2YUVX_FUNC_MMX
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:182
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:88
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:1093
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:281
case_rgb
#define case_rgb(x, X, opt)
int32_t
int32_t
Definition: audioconvert.c:56
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:42
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:195
SWS_ACCURATE_RND
@ SWS_ACCURATE_RND
Force bit-exact output.
Definition: swscale.h:155
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:253
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:67
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:270
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
swscale.h
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33