FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
44 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
45 
46 DECLARE_ASM_CONST(8, uint64_t, M24A) = 0x00FF0000FF0000FFLL;
47 DECLARE_ASM_CONST(8, uint64_t, M24B) = 0xFF0000FF0000FF00LL;
48 DECLARE_ASM_CONST(8, uint64_t, M24C) = 0x0000FF0000FF0000LL;
49 
50 // MMXEXT versions
51 #if HAVE_MMXEXT_INLINE
52 #undef RENAME
53 #undef COMPILE_TEMPLATE_MMXEXT
54 #define COMPILE_TEMPLATE_MMXEXT 1
55 #define RENAME(a) a ## _mmxext
56 #include "swscale_template.c"
57 #endif
58 
59 void ff_updateMMXDitherTables(SwsInternal *c, int dstY)
60 {
61  const int dstH= c->opts.dst_h;
62  const int flags= c->opts.flags;
63 
64  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
65  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
66  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
67 
68  int hasAlpha = c->needAlpha;
69  int32_t *vLumFilterPos= c->vLumFilterPos;
70  int32_t *vChrFilterPos= c->vChrFilterPos;
71  int16_t *vLumFilter= c->vLumFilter;
72  int16_t *vChrFilter= c->vChrFilter;
73  int32_t *lumMmxFilter= c->lumMmxFilter;
74  int32_t *chrMmxFilter= c->chrMmxFilter;
75  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
76  const int vLumFilterSize= c->vLumFilterSize;
77  const int vChrFilterSize= c->vChrFilterSize;
78  const int chrDstY= dstY>>c->chrDstVSubSample;
79  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
80  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
81 
82  c->blueDither= ff_dither8[dstY&1];
83  if (c->opts.dst_format == AV_PIX_FMT_RGB555 || c->opts.dst_format == AV_PIX_FMT_BGR555)
84  c->greenDither= ff_dither8[dstY&1];
85  else
86  c->greenDither= ff_dither4[dstY&1];
87  c->redDither= ff_dither8[(dstY+1)&1];
88  if (dstY < dstH - 2) {
89  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
90  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
91  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
92 
93  int i;
94  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->opts.src_h) {
95  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
96 
97  int neg = -firstLumSrcY, i, end = FFMIN(c->opts.src_h - firstLumSrcY, vLumFilterSize);
98  for (i = 0; i < neg; i++)
99  tmpY[i] = lumSrcPtr[neg];
100  for ( ; i < end; i++)
101  tmpY[i] = lumSrcPtr[i];
102  for ( ; i < vLumFilterSize; i++)
103  tmpY[i] = tmpY[i-1];
104  lumSrcPtr = tmpY;
105 
106  if (alpSrcPtr) {
107  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
108  for (i = 0; i < neg; i++)
109  tmpA[i] = alpSrcPtr[neg];
110  for ( ; i < end; i++)
111  tmpA[i] = alpSrcPtr[i];
112  for ( ; i < vLumFilterSize; i++)
113  tmpA[i] = tmpA[i - 1];
114  alpSrcPtr = tmpA;
115  }
116  }
117  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
118  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
119  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
120  for (i = 0; i < neg; i++) {
121  tmpU[i] = chrUSrcPtr[neg];
122  }
123  for ( ; i < end; i++) {
124  tmpU[i] = chrUSrcPtr[i];
125  }
126  for ( ; i < vChrFilterSize; i++) {
127  tmpU[i] = tmpU[i - 1];
128  }
129  chrUSrcPtr = tmpU;
130  }
131 
132  if (flags & SWS_ACCURATE_RND) {
133  int s= APCK_SIZE / 8;
134  for (i=0; i<vLumFilterSize; i+=2) {
135  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
136  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
137  lumMmxFilter[s*i+APCK_COEF/4 ]=
138  lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
139  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
140  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
141  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
142  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
143  alpMmxFilter[s*i+APCK_COEF/4 ]=
144  alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
145  }
146  }
147  for (i=0; i<vChrFilterSize; i+=2) {
148  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
149  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
150  chrMmxFilter[s*i+APCK_COEF/4 ]=
151  chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
152  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
153  }
154  } else {
155  for (i=0; i<vLumFilterSize; i++) {
156  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
157  lumMmxFilter[4*i+2]=
158  lumMmxFilter[4*i+3]=
159  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
160  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
161  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
162  alpMmxFilter[4*i+2]=
163  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
164  }
165  }
166  for (i=0; i<vChrFilterSize; i++) {
167  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
168  chrMmxFilter[4*i+2]=
169  chrMmxFilter[4*i+3]=
170  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
171  }
172  }
173  }
174 }
175 #endif /* HAVE_INLINE_ASM */
176 
177 #define YUV2YUVX_FUNC_MMX(opt, step) \
178 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
179  uint8_t *dest, int dstW, \
180  const uint8_t *dither, int offset); \
181 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
182  const int16_t **src, uint8_t *dest, int dstW, \
183  const uint8_t *dither, int offset) \
184 { \
185  if(dstW > 0) \
186  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
187  return; \
188 }
189 
190 #define YUV2YUVX_FUNC(opt, step) \
191 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
192  uint8_t *dest, int dstW, \
193  const uint8_t *dither, int offset); \
194 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
195  const int16_t **src, uint8_t *dest, int dstW, \
196  const uint8_t *dither, int offset) \
197 { \
198  int remainder = (dstW % step); \
199  int pixelsProcessed = dstW - remainder; \
200  if(((uintptr_t)dest) & 15){ \
201  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); \
202  return; \
203  } \
204  if(pixelsProcessed > 0) \
205  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
206  if(remainder > 0){ \
207  ff_yuv2yuvX_mmxext(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
208  } \
209  return; \
210 }
211 
212 #if HAVE_MMXEXT_EXTERNAL
213 YUV2YUVX_FUNC_MMX(mmxext, 16)
214 #endif
215 #if HAVE_SSE3_EXTERNAL
216 YUV2YUVX_FUNC(sse3, 32)
217 #endif
218 #if HAVE_AVX2_EXTERNAL
219 YUV2YUVX_FUNC(avx2, 64)
220 #endif
221 
222 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
223 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
224  SwsInternal *c, int16_t *data, \
225  int dstW, const uint8_t *src, \
226  const int16_t *filter, \
227  const int32_t *filterPos, int filterSize)
228 
229 #define SCALE_FUNCS(filter_n, opt) \
230  SCALE_FUNC(filter_n, 8, 15, opt); \
231  SCALE_FUNC(filter_n, 9, 15, opt); \
232  SCALE_FUNC(filter_n, 10, 15, opt); \
233  SCALE_FUNC(filter_n, 12, 15, opt); \
234  SCALE_FUNC(filter_n, 14, 15, opt); \
235  SCALE_FUNC(filter_n, 16, 15, opt); \
236  SCALE_FUNC(filter_n, 8, 19, opt); \
237  SCALE_FUNC(filter_n, 9, 19, opt); \
238  SCALE_FUNC(filter_n, 10, 19, opt); \
239  SCALE_FUNC(filter_n, 12, 19, opt); \
240  SCALE_FUNC(filter_n, 14, 19, opt); \
241  SCALE_FUNC(filter_n, 16, 19, opt)
242 
243 #define SCALE_FUNCS_MMX(opt) \
244  SCALE_FUNCS(4, opt); \
245  SCALE_FUNCS(8, opt); \
246  SCALE_FUNCS(X, opt)
247 
248 #define SCALE_FUNCS_SSE(opt) \
249  SCALE_FUNCS(4, opt); \
250  SCALE_FUNCS(8, opt); \
251  SCALE_FUNCS(X4, opt); \
252  SCALE_FUNCS(X8, opt)
253 
254 SCALE_FUNCS_SSE(sse2);
255 SCALE_FUNCS_SSE(ssse3);
256 SCALE_FUNCS_SSE(sse4);
257 
258 SCALE_FUNC(4, 8, 15, avx2);
259 SCALE_FUNC(X4, 8, 15, avx2);
260 
261 #define VSCALEX_FUNC(size, opt) \
262 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
263  const int16_t **src, uint8_t *dest, int dstW, \
264  const uint8_t *dither, int offset)
265 #define VSCALEX_FUNCS(opt) \
266  VSCALEX_FUNC(8, opt); \
267  VSCALEX_FUNC(9, opt); \
268  VSCALEX_FUNC(10, opt)
269 
270 VSCALEX_FUNC(8, mmxext);
271 VSCALEX_FUNCS(sse2);
272 VSCALEX_FUNCS(sse4);
273 VSCALEX_FUNC(16, sse4);
274 VSCALEX_FUNCS(avx);
275 
276 #define VSCALE_FUNC(size, opt) \
277 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
278  const uint8_t *dither, int offset)
279 #define VSCALE_FUNCS(opt1, opt2) \
280  VSCALE_FUNC(8, opt1); \
281  VSCALE_FUNC(9, opt2); \
282  VSCALE_FUNC(10, opt2); \
283  VSCALE_FUNC(16, opt1)
284 
285 VSCALE_FUNCS(sse2, sse2);
286 VSCALE_FUNC(16, sse4);
287 VSCALE_FUNCS(avx, avx);
288 
289 #define INPUT_Y_FUNC(fmt, opt) \
290 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
291  const uint8_t *unused1, const uint8_t *unused2, \
292  int w, uint32_t *unused, void *opq)
293 #define INPUT_UV_FUNC(fmt, opt) \
294 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
295  const uint8_t *unused0, \
296  const uint8_t *src1, \
297  const uint8_t *src2, \
298  int w, uint32_t *unused, void *opq)
299 #define INPUT_FUNC(fmt, opt) \
300  INPUT_Y_FUNC(fmt, opt); \
301  INPUT_UV_FUNC(fmt, opt)
302 #define INPUT_FUNCS(opt) \
303  INPUT_FUNC(uyvy, opt); \
304  INPUT_FUNC(yuyv, opt); \
305  INPUT_UV_FUNC(nv12, opt); \
306  INPUT_UV_FUNC(nv21, opt); \
307  INPUT_FUNC(rgba, opt); \
308  INPUT_FUNC(bgra, opt); \
309  INPUT_FUNC(argb, opt); \
310  INPUT_FUNC(abgr, opt); \
311  INPUT_FUNC(rgb24, opt); \
312  INPUT_FUNC(bgr24, opt)
313 
314 INPUT_FUNCS(sse2);
315 INPUT_FUNCS(ssse3);
316 INPUT_FUNCS(avx);
317 INPUT_FUNC(rgba, avx2);
318 INPUT_FUNC(bgra, avx2);
319 INPUT_FUNC(argb, avx2);
320 INPUT_FUNC(abgr, avx2);
321 INPUT_FUNC(rgb24, avx2);
322 INPUT_FUNC(bgr24, avx2);
323 
324 #if ARCH_X86_64
325 #define YUV2NV_DECL(fmt, opt) \
326 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
327  const int16_t *filter, int filterSize, \
328  const int16_t **u, const int16_t **v, \
329  uint8_t *dst, int dstWidth)
330 
331 YUV2NV_DECL(nv12, avx2);
332 YUV2NV_DECL(nv21, avx2);
333 
334 #define YUV2GBRP_FN_DECL(fmt, opt) \
335 void ff_yuv2##fmt##_full_X_ ##opt(SwsInternal *c, const int16_t *lumFilter, \
336  const int16_t **lumSrcx, int lumFilterSize, \
337  const int16_t *chrFilter, const int16_t **chrUSrcx, \
338  const int16_t **chrVSrcx, int chrFilterSize, \
339  const int16_t **alpSrcx, uint8_t **dest, \
340  int dstW, int y)
341 
342 #define YUV2GBRP_DECL(opt) \
343 YUV2GBRP_FN_DECL(gbrp, opt); \
344 YUV2GBRP_FN_DECL(gbrap, opt); \
345 YUV2GBRP_FN_DECL(gbrp9le, opt); \
346 YUV2GBRP_FN_DECL(gbrp10le, opt); \
347 YUV2GBRP_FN_DECL(gbrap10le, opt); \
348 YUV2GBRP_FN_DECL(gbrp12le, opt); \
349 YUV2GBRP_FN_DECL(gbrap12le, opt); \
350 YUV2GBRP_FN_DECL(gbrp14le, opt); \
351 YUV2GBRP_FN_DECL(gbrp16le, opt); \
352 YUV2GBRP_FN_DECL(gbrap16le, opt); \
353 YUV2GBRP_FN_DECL(gbrpf32le, opt); \
354 YUV2GBRP_FN_DECL(gbrapf32le, opt); \
355 YUV2GBRP_FN_DECL(gbrp9be, opt); \
356 YUV2GBRP_FN_DECL(gbrp10be, opt); \
357 YUV2GBRP_FN_DECL(gbrap10be, opt); \
358 YUV2GBRP_FN_DECL(gbrp12be, opt); \
359 YUV2GBRP_FN_DECL(gbrap12be, opt); \
360 YUV2GBRP_FN_DECL(gbrp14be, opt); \
361 YUV2GBRP_FN_DECL(gbrp16be, opt); \
362 YUV2GBRP_FN_DECL(gbrap16be, opt); \
363 YUV2GBRP_FN_DECL(gbrpf32be, opt); \
364 YUV2GBRP_FN_DECL(gbrapf32be, opt)
365 
366 YUV2GBRP_DECL(sse2);
367 YUV2GBRP_DECL(sse4);
368 YUV2GBRP_DECL(avx2);
369 
370 #define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \
371 void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \
372  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
373  void *opq)
374 
375 #define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \
376 void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \
377  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
378  void *opq)
379 
380 #define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \
381 void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \
382  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
383  void *opq)
384 
385 
386 #define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \
387 INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \
388 INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt)
389 
390 #define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \
391 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \
392 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt)
393 
394 #define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \
395 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \
396 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt)
397 
398 #define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \
399 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
400 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
401 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
402 
403 #define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \
404 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
405 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt)
406 
407 #define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \
408 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
409 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
410 
411 #define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \
412 INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \
413 INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \
414 INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \
415 INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \
416 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt)
417 
418 #define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \
419 INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \
420 INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \
421 INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \
422 INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \
423 INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \
424 INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \
425 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt)
426 
427 #define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \
428 INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \
429 INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \
430 INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \
431 INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \
432 INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \
433 INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \
434 INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt)
435 
436 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2);
437 INPUT_PLANAR_RGB_UV_ALL_DECL(sse2);
438 INPUT_PLANAR_RGB_A_ALL_DECL(sse2);
439 
440 INPUT_PLANAR_RGB_Y_ALL_DECL(sse4);
441 INPUT_PLANAR_RGB_UV_ALL_DECL(sse4);
442 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4);
443 
444 INPUT_PLANAR_RGB_Y_ALL_DECL(avx2);
445 INPUT_PLANAR_RGB_UV_ALL_DECL(avx2);
446 INPUT_PLANAR_RGB_A_ALL_DECL(avx2);
447 #endif
448 
449 #define RANGE_CONVERT_FUNCS(opt, bpc) do { \
450  if (c->opts.src_range) { \
451  c->lumConvertRange = ff_lumRangeFromJpeg##bpc##_##opt; \
452  c->chrConvertRange = ff_chrRangeFromJpeg##bpc##_##opt; \
453  } else { \
454  c->lumConvertRange = ff_lumRangeToJpeg##bpc##_##opt; \
455  c->chrConvertRange = ff_chrRangeToJpeg##bpc##_##opt; \
456  } \
457 } while (0)
458 
459 #define RANGE_CONVERT_FUNCS_DECL(opt, bpc) \
460 void ff_lumRangeFromJpeg##bpc##_##opt(int16_t *dst, int width, \
461  uint32_t coeff, int64_t offset); \
462 void ff_chrRangeFromJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \
463  uint32_t coeff, int64_t offset); \
464 void ff_lumRangeToJpeg##bpc##_##opt(int16_t *dst, int width, \
465  uint32_t coeff, int64_t offset); \
466 void ff_chrRangeToJpeg##bpc##_##opt(int16_t *dstU, int16_t *dstV, int width, \
467  uint32_t coeff, int64_t offset); \
468 
470 RANGE_CONVERT_FUNCS_DECL(sse4, 16)
472 RANGE_CONVERT_FUNCS_DECL(avx2, 16)
473 
475 {
476  int cpu_flags = av_get_cpu_flags();
478  if (c->dstBpc <= 14) {
479  RANGE_CONVERT_FUNCS(avx2, 8);
480  } else {
481  RANGE_CONVERT_FUNCS(avx2, 16);
482  }
483  } else if (EXTERNAL_SSE2(cpu_flags) && c->dstBpc <= 14) {
484  RANGE_CONVERT_FUNCS(sse2, 8);
485  } else if (EXTERNAL_SSE4(cpu_flags) && c->dstBpc > 14) {
486  RANGE_CONVERT_FUNCS(sse4, 16);
487  }
488 }
489 
491 {
492  int cpu_flags = av_get_cpu_flags();
493 
494 #if HAVE_MMXEXT_INLINE
496  sws_init_swscale_mmxext(c);
497 #endif
498  if(c->use_mmx_vfilter && !(c->opts.flags & SWS_ACCURATE_RND)) {
499 #if HAVE_MMXEXT_EXTERNAL
501  c->yuv2planeX = yuv2yuvX_mmxext;
502 #endif
503 #if HAVE_SSE3_EXTERNAL
505  c->yuv2planeX = yuv2yuvX_sse3;
506 #endif
507 #if HAVE_AVX2_EXTERNAL
509  c->yuv2planeX = yuv2yuvX_avx2;
510 #endif
511  }
512 #if ARCH_X86_32 && !HAVE_ALIGNED_STACK
513  // The better yuv2planeX_8 functions need aligned stack on x86-32,
514  // so we use MMXEXT in this case if they are not available.
515  if (EXTERNAL_MMXEXT(cpu_flags)) {
516  if (c->dstBpc == 8 && !c->use_mmx_vfilter)
517  c->yuv2planeX = ff_yuv2planeX_8_mmxext;
518  }
519 #endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
520 
521 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
522  if (c->srcBpc == 8) { \
523  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
524  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
525  } else if (c->srcBpc == 9) { \
526  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
527  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
528  } else if (c->srcBpc == 10) { \
529  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
530  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
531  } else if (c->srcBpc == 12) { \
532  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
533  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
534  } else if (c->srcBpc == 14 || ((c->opts.src_format==AV_PIX_FMT_PAL8||isAnyRGB(c->opts.src_format)) && av_pix_fmt_desc_get(c->opts.src_format)->comp[0].depth<16)) { \
535  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
536  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
537  } else { /* c->srcBpc == 16 */ \
538  av_assert0(c->srcBpc == 16);\
539  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
540  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
541  } \
542 } while (0)
543 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
544 switch(c->dstBpc){ \
545  case 16: do_16_case; break; \
546  case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
547  case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
548  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
549  }
550 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
551  switch(c->dstBpc){ \
552  case 16: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_16_ ## opt; break; \
553  case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format)) vscalefn = ff_yuv2plane1_10_ ## opt; break; \
554  case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2plane1_9_ ## opt; break; \
555  case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
556  default: av_assert0(c->dstBpc>8); \
557  }
558 #define case_rgb(x, X, opt) \
559  case AV_PIX_FMT_ ## X: \
560  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
561  if (!c->chrSrcHSubSample) \
562  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
563  break
564 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
565  switch (filtersize) { \
566  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
567  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
568  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
569  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
570  break; \
571  }
572  if (EXTERNAL_SSE2(cpu_flags)) {
573  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
574  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
575  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
576  HAVE_ALIGNED_STACK || ARCH_X86_64);
577  if (!(c->opts.flags & SWS_ACCURATE_RND))
578  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2);
579 
580  switch (c->opts.src_format) {
581  case AV_PIX_FMT_YA8:
582  c->lumToYV12 = ff_yuyvToY_sse2;
583  if (c->needAlpha)
584  c->alpToYV12 = ff_uyvyToY_sse2;
585  break;
586  case AV_PIX_FMT_YUYV422:
587  c->lumToYV12 = ff_yuyvToY_sse2;
588  c->chrToYV12 = ff_yuyvToUV_sse2;
589  break;
590  case AV_PIX_FMT_UYVY422:
591  c->lumToYV12 = ff_uyvyToY_sse2;
592  c->chrToYV12 = ff_uyvyToUV_sse2;
593  break;
594  case AV_PIX_FMT_NV12:
595  c->chrToYV12 = ff_nv12ToUV_sse2;
596  break;
597  case AV_PIX_FMT_NV21:
598  c->chrToYV12 = ff_nv21ToUV_sse2;
599  break;
600  case_rgb(rgb24, RGB24, sse2);
601  case_rgb(bgr24, BGR24, sse2);
602  case_rgb(bgra, BGRA, sse2);
603  case_rgb(rgba, RGBA, sse2);
604  case_rgb(abgr, ABGR, sse2);
605  case_rgb(argb, ARGB, sse2);
606  default:
607  break;
608  }
609  }
610  if (EXTERNAL_SSSE3(cpu_flags)) {
611  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
612  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
613  switch (c->opts.src_format) {
614  case_rgb(rgb24, RGB24, ssse3);
615  case_rgb(bgr24, BGR24, ssse3);
616  default:
617  break;
618  }
619  }
620  if (EXTERNAL_SSE4(cpu_flags)) {
621  /* Xto15 don't need special sse4 functions */
622  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
623  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
624  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
625  if (!isBE(c->opts.dst_format)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
626  HAVE_ALIGNED_STACK || ARCH_X86_64);
627  if (c->dstBpc == 16 && !isBE(c->opts.dst_format) && !(c->opts.flags & SWS_ACCURATE_RND))
628  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
629  }
630 
631  if (EXTERNAL_AVX(cpu_flags)) {
632  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
633  HAVE_ALIGNED_STACK || ARCH_X86_64);
634  if (!(c->opts.flags & SWS_ACCURATE_RND))
635  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx);
636 
637  switch (c->opts.src_format) {
638  case AV_PIX_FMT_YUYV422:
639  c->chrToYV12 = ff_yuyvToUV_avx;
640  break;
641  case AV_PIX_FMT_UYVY422:
642  c->chrToYV12 = ff_uyvyToUV_avx;
643  break;
644  case AV_PIX_FMT_NV12:
645  c->chrToYV12 = ff_nv12ToUV_avx;
646  break;
647  case AV_PIX_FMT_NV21:
648  c->chrToYV12 = ff_nv21ToUV_avx;
649  break;
650  case_rgb(rgb24, RGB24, avx);
651  case_rgb(bgr24, BGR24, avx);
652  case_rgb(bgra, BGRA, avx);
653  case_rgb(rgba, RGBA, avx);
654  case_rgb(abgr, ABGR, avx);
655  case_rgb(argb, ARGB, avx);
656  default:
657  break;
658  }
659  }
660 
661 #if ARCH_X86_64
662 #define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
663  switch (filtersize) { \
664  case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
665  default: hscalefn = ff_hscale8to15_X4_avx2; break; \
666  break; \
667  }
668 
670  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
671  ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
672  ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
673  }
674  }
675 
677  if (ARCH_X86_64)
678  switch (c->opts.src_format) {
679  case_rgb(rgb24, RGB24, avx2);
680  case_rgb(bgr24, BGR24, avx2);
681  case_rgb(bgra, BGRA, avx2);
682  case_rgb(rgba, RGBA, avx2);
683  case_rgb(abgr, ABGR, avx2);
684  case_rgb(argb, ARGB, avx2);
685  }
686  if (!(c->opts.flags & SWS_ACCURATE_RND)) // FIXME
687  switch (c->opts.dst_format) {
688  case AV_PIX_FMT_NV12:
689  case AV_PIX_FMT_NV24:
690  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
691  break;
692  case AV_PIX_FMT_NV21:
693  case AV_PIX_FMT_NV42:
694  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
695  break;
696  default:
697  break;
698  }
699  }
700 
701 
702 #define INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(fmt, name, opt) \
703  case fmt: \
704  c->readAlpPlanar = ff_planar_##name##_to_a_##opt;
705 
706 #define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
707  case rgba_fmt: \
708  case rgb_fmt: \
709  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
710  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
711  break;
712 
713 #define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \
714  case fmt: \
715  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
716  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
717  break;
718 
719 #define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \
720  case fmt: \
721  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
722  break;
723 
724 #define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
725  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
726  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
727  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
728  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
729 
730 #define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
731  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
732  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
733  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
734  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
735 
736 #define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
737  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \
738  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt)
739 
740 #define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \
741  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
742  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
743 
744 #define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \
745  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
746  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
747 
748 #define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \
749  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, opt) \
750  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \
751  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \
752  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \
753  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \
754  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \
755  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \
756  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt)
757 
758 
759  if (EXTERNAL_SSE2(cpu_flags)) {
760  switch (c->opts.src_format) {
761  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, sse2);
762  INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2);
763  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2);
764  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2);
765  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2);
766  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2);
767  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2);
768  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2);
769  default:
770  break;
771  }
772  }
773 
774  if (EXTERNAL_SSE4(cpu_flags)) {
775  switch (c->opts.src_format) {
776  case AV_PIX_FMT_GBRAP:
777  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4);
778  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4);
779  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4);
780  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4);
781  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4);
782  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4);
783  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4);
784  default:
785  break;
786  }
787  }
788 
790  switch (c->opts.src_format) {
791  INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2)
792  default:
793  break;
794  }
795  }
796 
797  if(c->opts.flags & SWS_FULL_CHR_H_INT) {
798 
799 #define YUV2ANYX_FUNC_CASE(fmt, name, opt) \
800  case fmt: \
801  c->yuv2anyX = ff_yuv2##name##_full_X_##opt; \
802  break;
803 
804 #define YUV2ANYX_GBRAP_CASES(opt) \
805  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP, gbrp, opt) \
806  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP, gbrap, opt) \
807  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9LE, gbrp9le, opt) \
808  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10LE, gbrp10le, opt) \
809  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10LE, gbrap10le, opt) \
810  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12LE, gbrp12le, opt) \
811  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12LE, gbrap12le, opt) \
812  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14LE, gbrp14le, opt) \
813  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16LE, gbrp16le, opt) \
814  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16LE, gbrap16le, opt) \
815  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32LE, gbrpf32le, opt) \
816  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32LE, gbrapf32le, opt) \
817  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9BE, gbrp9be, opt) \
818  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10BE, gbrp10be, opt) \
819  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10BE, gbrap10be, opt) \
820  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12BE, gbrp12be, opt) \
821  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12BE, gbrap12be, opt) \
822  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14BE, gbrp14be, opt) \
823  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16BE, gbrp16be, opt) \
824  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16BE, gbrap16be, opt) \
825  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32BE, gbrpf32be, opt) \
826  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32BE, gbrapf32be, opt)
827 
828  if (EXTERNAL_SSE2(cpu_flags)) {
829  switch (c->opts.dst_format) {
830  YUV2ANYX_GBRAP_CASES(sse2)
831  default:
832  break;
833  }
834  }
835 
836  if (EXTERNAL_SSE4(cpu_flags)) {
837  switch (c->opts.dst_format) {
838  YUV2ANYX_GBRAP_CASES(sse4)
839  default:
840  break;
841  }
842  }
843 
845  switch (c->opts.dst_format) {
846  YUV2ANYX_GBRAP_CASES(avx2)
847  default:
848  break;
849  }
850  }
851  }
852 
853 #endif
854 }
AV_PIX_FMT_GBRAP16
#define AV_PIX_FMT_GBRAP16
Definition: pixfmt.h:538
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:65
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:1095
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:140
mem_internal.h
av_unused
#define av_unused
Definition: attributes.h:131
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
pixdesc.h
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem_internal.h:90
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
INPUT_FUNC
#define INPUT_FUNC(fmt, opt)
Definition: swscale.c:299
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:302
rgb
Definition: rpzaenc.c:60
RANGE_CONVERT_FUNCS_DECL
#define RANGE_CONVERT_FUNCS_DECL(opt, bpc)
Definition: swscale.c:459
AV_PIX_FMT_GBRP14
#define AV_PIX_FMT_GBRP14
Definition: pixfmt.h:533
RANGE_CONVERT_FUNCS
#define RANGE_CONVERT_FUNCS(opt, bpc)
Definition: swscale.c:449
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:212
AV_PIX_FMT_GBRP10
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:531
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:59
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
AV_PIX_FMT_GBRAP10
#define AV_PIX_FMT_GBRAP10
Definition: pixfmt.h:535
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_GBRAP12
#define AV_PIX_FMT_GBRAP12
Definition: pixfmt.h:536
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:66
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:1096
SCALE_FUNC
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt)
Definition: swscale.c:222
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:279
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:261
ff_sws_init_range_convert_x86
av_cold void ff_sws_init_range_convert_x86(SwsInternal *c)
Definition: swscale.c:474
AV_PIX_FMT_GBRP16
#define AV_PIX_FMT_GBRP16
Definition: pixfmt.h:534
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
SwsPlane
Slice plane.
Definition: swscale_internal.h:1090
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
AV_PIX_FMT_GBRP9
#define AV_PIX_FMT_GBRP9
Definition: pixfmt.h:530
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:748
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:104
cpu.h
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
AV_PIX_FMT_GBRPF32
#define AV_PIX_FMT_GBRPF32
Definition: pixfmt.h:547
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:508
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsInternal *c)
Definition: swscale.c:490
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AV_PIX_FMT_GBRP12
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:532
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:503
swscale_internal.h
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:97
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsInternal *c, int dstY)
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
SwsInternal
Definition: swscale_internal.h:317
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
SWS_FULL_CHR_H_INT
@ SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:132
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:88
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
U
#define U(x)
Definition: vpx_arith.h:37
AV_PIX_FMT_GBRAPF32
#define AV_PIX_FMT_GBRAPF32
Definition: pixfmt.h:548
YUV2YUVX_FUNC_MMX
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:177
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:88
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:1093
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:276
case_rgb
#define case_rgb(x, X, opt)
int32_t
int32_t
Definition: audioconvert.c:56
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:42
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:190
SWS_ACCURATE_RND
@ SWS_ACCURATE_RND
Force bit-exact output.
Definition: swscale.h:155
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:248
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:67
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:265
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
swscale.h
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33