FFmpeg
sw_scale.c
Go to the documentation of this file.
1 /*
2  *
3  * This file is part of FFmpeg.
4  *
5  * FFmpeg is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * FFmpeg is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 #include <string.h>
21 
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 
27 #include "libswscale/swscale.h"
29 
30 #include "checkasm.h"
31 
32 #define randomize_buffers(buf, size) \
33  do { \
34  int j; \
35  for (j = 0; j < size; j+=4) \
36  AV_WN32(buf + j, rnd()); \
37  } while (0)
38 
39 static void yuv2planeX_8_ref(const int16_t *filter, int filterSize,
40  const int16_t **src, uint8_t *dest, int dstW,
41  const uint8_t *dither, int offset)
42 {
43  // This corresponds to the yuv2planeX_8_c function
44  int i;
45  for (i = 0; i < dstW; i++) {
46  int val = dither[(i + offset) & 7] << 12;
47  int j;
48  for (j = 0; j < filterSize; j++)
49  val += src[j][i] * filter[j];
50 
51  dest[i]= av_clip_uint8(val >> 19);
52  }
53 }
54 
55 static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
56 {
57  for (size_t i = 0; i < n; i++) {
58  if (abs(ref[i] - test[i]) > accuracy)
59  return 1;
60  }
61  return 0;
62 }
63 
64 static void print_data(uint8_t *p, size_t len, size_t offset)
65 {
66  size_t i = 0;
67  for (; i < len; i++) {
68  if (i % 8 == 0) {
69  printf("0x%04zx: ", i+offset);
70  }
71  printf("0x%02x ", (uint32_t) p[i]);
72  if (i % 8 == 7) {
73  printf("\n");
74  }
75  }
76  if (i % 8 != 0) {
77  printf("\n");
78  }
79 }
80 
81 static size_t show_differences(uint8_t *a, uint8_t *b, size_t len)
82 {
83  for (size_t i = 0; i < len; i++) {
84  if (a[i] != b[i]) {
85  size_t offset_of_mismatch = i;
86  size_t offset;
87  if (i >= 8) i-=8;
88  offset = i & (~7);
89  printf("test a:\n");
90  print_data(&a[offset], 32, offset);
91  printf("\ntest b:\n");
92  print_data(&b[offset], 32, offset);
93  printf("\n");
94  return offset_of_mismatch;
95  }
96  }
97  return len;
98 }
99 
100 static void check_yuv2yuv1(int accurate)
101 {
102  SwsContext *sws;
103  SwsInternal *c;
104  int osi, isi;
105  int dstW, offset;
106  size_t fail_offset;
107  const int input_sizes[] = {8, 24, 128, 144, 256, 512};
108  #define LARGEST_INPUT_SIZE 512
109 
110  const int offsets[] = {0, 3, 8, 11, 16, 19};
111  const int OFFSET_SIZES = sizeof(offsets)/sizeof(offsets[0]);
112  const char *accurate_str = (accurate) ? "accurate" : "approximate";
113 
114  declare_func(void,
115  const int16_t *src, uint8_t *dest,
116  int dstW, const uint8_t *dither, int offset);
117 
118  LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_INPUT_SIZE]);
119  LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
120  LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
121  LOCAL_ALIGNED_8(uint8_t, dither, [8]);
122 
123  randomize_buffers((uint8_t*)dither, 8);
124  randomize_buffers((uint8_t*)src_pixels, LARGEST_INPUT_SIZE * sizeof(int16_t));
126  if (accurate)
128  if (sws_init_context(sws, NULL, NULL) < 0)
129  fail();
130 
131  c = sws_internal(sws);
133  for (isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); ++isi) {
134  dstW = input_sizes[isi];
135  for (osi = 0; osi < OFFSET_SIZES; osi++) {
136  offset = offsets[osi];
137  if (check_func(c->yuv2plane1, "yuv2yuv1_%d_%d_%s", offset, dstW, accurate_str)){
138  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
139  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
140 
141  call_ref(src_pixels, dst0, dstW, dither, offset);
142  call_new(src_pixels, dst1, dstW, dither, offset);
143  if (cmp_off_by_n(dst0, dst1, dstW * sizeof(dst0[0]), accurate ? 0 : 2)) {
144  fail();
145  printf("failed: yuv2yuv1_%d_%di_%s\n", offset, dstW, accurate_str);
146  fail_offset = show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
147  printf("failing values: src: 0x%04x dither: 0x%02x dst-c: %02x dst-asm: %02x\n",
148  (int) src_pixels[fail_offset],
149  (int) dither[(fail_offset + fail_offset) & 7],
150  (int) dst0[fail_offset],
151  (int) dst1[fail_offset]);
152  }
153  if(dstW == LARGEST_INPUT_SIZE)
154  bench_new(src_pixels, dst1, dstW, dither, offset);
155  }
156  }
157  }
159 }
160 
161 static void check_yuv2yuvX(int accurate)
162 {
163  SwsContext *sws;
164  SwsInternal *c;
165  int fsi, osi, isi, i, j;
166  int dstW;
167 #define LARGEST_FILTER 16
168  // ff_yuv2planeX_8_sse2 can't handle odd filter sizes
169  const int filter_sizes[] = {2, 4, 8, 16};
170  const int FILTER_SIZES = sizeof(filter_sizes)/sizeof(filter_sizes[0]);
171 #define LARGEST_INPUT_SIZE 512
172  static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
173  const char *accurate_str = (accurate) ? "accurate" : "approximate";
174 
175  declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
176  int filterSize, const int16_t **src, uint8_t *dest,
177  int dstW, const uint8_t *dither, int offset);
178 
179  const int16_t **src;
180  LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
181  LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]);
182  LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
183  LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
185  union VFilterData{
186  const int16_t *src;
187  uint16_t coeff[8];
188  } *vFilterData;
189  uint8_t d_val = rnd();
190  memset(dither, d_val, LARGEST_INPUT_SIZE);
191  randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
193  if (accurate)
195  if (sws_init_context(sws, NULL, NULL) < 0)
196  fail();
197 
198  c = sws_internal(sws);
200  for(isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); ++isi){
201  dstW = input_sizes[isi];
202  for(osi = 0; osi < 64; osi += 16){
203  if (dstW <= osi)
204  continue;
205  for (fsi = 0; fsi < FILTER_SIZES; ++fsi) {
206  // Generate filter coefficients for the given filter size,
207  // with some properties:
208  // - The coefficients add up to the intended sum (4096, 1<<12)
209  // - The coefficients contain negative values
210  // - The filter intermediates don't overflow for worst case
211  // inputs (all positive coefficients are coupled with
212  // input_max and all negative coefficients with input_min,
213  // or vice versa).
214  // Produce a filter with all coefficients set to
215  // -((1<<12)/(filter_size-1)) except for one (randomly chosen)
216  // which is set to ((1<<13)-1).
217  for (i = 0; i < filter_sizes[fsi]; ++i)
218  filter_coeff[i] = -((1 << 12) / (filter_sizes[fsi] - 1));
219  filter_coeff[rnd() % filter_sizes[fsi]] = (1 << 13) - 1;
220 
221  src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
222  vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
223  memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
224  for (i = 0; i < filter_sizes[fsi]; ++i) {
225  src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
226  vFilterData[i].src = src[i] - osi;
227  for(j = 0; j < 4; ++j)
228  vFilterData[i].coeff[j + 4] = filter_coeff[i];
229  }
230  if (check_func(c->yuv2planeX, "yuv2yuvX_%d_%d_%d_%s", filter_sizes[fsi], osi, dstW, accurate_str)){
231  // use vFilterData for the mmx function
232  const int16_t *filter = c->use_mmx_vfilter ? (const int16_t*)vFilterData : &filter_coeff[0];
233  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
234  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
235 
236  // We can't use call_ref here, because we don't know if use_mmx_vfilter was set for that
237  // function or not, so we can't pass it the parameters correctly.
238  yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
239 
240  call_new(filter, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
241  if (cmp_off_by_n(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]), accurate ? 0 : 2)) {
242  fail();
243  printf("failed: yuv2yuvX_%d_%d_%d_%s\n", filter_sizes[fsi], osi, dstW, accurate_str);
244  show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
245  }
246  if(dstW == LARGEST_INPUT_SIZE)
247  bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
248 
249  }
250  av_freep(&src);
251  av_freep(&vFilterData);
252  }
253  }
254  }
256 #undef FILTER_SIZES
257 }
258 
259 #undef SRC_PIXELS
260 #define SRC_PIXELS 512
261 
262 static void check_hscale(void)
263 {
264 #define MAX_FILTER_WIDTH 40
265 #define FILTER_SIZES 6
266  static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };
267 
268 #define HSCALE_PAIRS 2
269  static const int hscale_pairs[HSCALE_PAIRS][2] = {
270  { 8, 14 },
271  { 8, 18 },
272  };
273 
274 #define LARGEST_INPUT_SIZE 512
275  static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
276 
277  int i, j, fsi, hpi, width, dstWi;
278  SwsContext *sws;
279  SwsInternal *c;
280 
281  // padded
282  LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
283  LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
284  LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
285 
286  // padded
288  LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
289  LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
290  LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]);
291 
292  // The dst parameter here is either int16_t or int32_t but we use void* to
293  // just cover both cases.
294  declare_func(void, void *c, void *dst, int dstW,
295  const uint8_t *src, const int16_t *filter,
296  const int32_t *filterPos, int filterSize);
297 
299  if (sws_init_context(sws, NULL, NULL) < 0)
300  fail();
301 
302  c = sws_internal(sws);
304 
305  for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
306  for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
307  for (dstWi = 0; dstWi < FF_ARRAY_ELEMS(input_sizes); dstWi++) {
308  width = filter_sizes[fsi];
309 
310  c->srcBpc = hscale_pairs[hpi][0];
311  c->dstBpc = hscale_pairs[hpi][1];
312  c->hLumFilterSize = c->hChrFilterSize = width;
313 
314  for (i = 0; i < SRC_PIXELS; i++) {
315  filterPos[i] = i;
316  filterPosAvx[i] = i;
317 
318  // These filter cofficients are chosen to try break two corner
319  // cases, namely:
320  //
321  // - Negative filter coefficients. The filters output signed
322  // values, and it should be possible to end up with negative
323  // output values.
324  //
325  // - Positive clipping. The hscale filter function has clipping
326  // at (1<<15) - 1
327  //
328  // The coefficients sum to the 1.0 point for the hscale
329  // functions (1 << 14).
330 
331  for (j = 0; j < width; j++) {
332  filter[i * width + j] = -((1 << 14) / (width - 1));
333  }
334  filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
335  }
336 
337  for (i = 0; i < MAX_FILTER_WIDTH; i++) {
338  // These values should be unused in SIMD implementations but
339  // may still be read, random coefficients here should help show
340  // issues where they are used in error.
341 
342  filter[SRC_PIXELS * width + i] = rnd();
343  }
344  sws->dst_w = c->chrDstW = input_sizes[dstWi];
346  memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
347  ff_shuffle_filter_coefficients(c, filterPosAvx, width, filterAvx2, sws->dst_w);
348 
349  if (check_func(c->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", c->srcBpc, c->dstBpc + 1, width, sws->dst_w)) {
350  memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
351  memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
352 
353  call_ref(NULL, dst0, sws->dst_w, src, filter, filterPos, width);
354  call_new(NULL, dst1, sws->dst_w, src, filterAvx2, filterPosAvx, width);
355  if (memcmp(dst0, dst1, sws->dst_w * sizeof(dst0[0])))
356  fail();
357  bench_new(NULL, dst0, sws->dst_w, src, filter, filterPosAvx, width);
358  }
359  }
360  }
361  }
363 }
364 
366 {
367  check_hscale();
368  report("hscale");
369  check_yuv2yuv1(0);
370  check_yuv2yuv1(1);
371  report("yuv2yuv1");
372  check_yuv2yuvX(0);
373  check_yuv2yuvX(1);
374  report("yuv2yuvX");
375 }
FILTER_SIZES
#define FILTER_SIZES
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:190
check_yuv2yuv1
static void check_yuv2yuv1(int accurate)
Definition: sw_scale.c:100
mem_internal.h
sws_freeContext
void sws_freeContext(SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2447
check_func
#define check_func(func,...)
Definition: checkasm.h:184
b
#define b
Definition: input.c:41
test
Definition: idctdsp.c:35
SwsContext::flags
unsigned flags
Bitmask of SWS_*.
Definition: swscale.h:187
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
call_ref
#define call_ref(...)
Definition: checkasm.h:199
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
print_data
static void print_data(uint8_t *p, size_t len, size_t offset)
Definition: sw_scale.c:64
fail
#define fail()
Definition: checkasm.h:193
checkasm.h
sws_init_context
av_warn_unused_result int sws_init_context(SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter)
Initialize the swscaler context sws_context.
Definition: utils.c:2082
val
static double val(void *priv, double ch)
Definition: aeval.c:77
check_hscale
static void check_hscale(void)
Definition: sw_scale.c:262
rnd
#define rnd()
Definition: checkasm.h:177
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
ff_shuffle_filter_coefficients
int ff_shuffle_filter_coefficients(SwsInternal *c, int *filterPos, int filterSize, int16_t *filter, int dstW)
Definition: utils.c:316
intreadwrite.h
offsets
static const int offsets[]
Definition: hevc_pel.c:34
LARGEST_FILTER
#define LARGEST_FILTER
cmp_off_by_n
static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
Definition: sw_scale.c:55
input_sizes
static const int input_sizes[]
Definition: sw_rgb.c:347
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
yuv2planeX_8_ref
static void yuv2planeX_8_ref(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: sw_scale.c:39
LOCAL_ALIGNED_8
#define LOCAL_ALIGNED_8(t, v,...)
Definition: mem_internal.h:128
HSCALE_PAIRS
#define HSCALE_PAIRS
SRC_PIXELS
#define SRC_PIXELS
Definition: sw_scale.c:260
call_new
#define call_new(...)
Definition: checkasm.h:302
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:132
abs
#define abs(x)
Definition: cuda_runtime.h:35
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
sws_alloc_context
SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext and set its fields to default values.
Definition: utils.c:1227
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
check_yuv2yuvX
static void check_yuv2yuvX(int accurate)
Definition: sw_scale.c:161
printf
printf("static const uint8_t my_array[100] = {\n")
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
show_differences
static size_t show_differences(uint8_t *a, uint8_t *b, size_t len)
Definition: sw_scale.c:81
sws
static SwsContext * sws[3]
Definition: swscale.c:69
report
#define report
Definition: checkasm.h:196
bench_new
#define bench_new(...)
Definition: checkasm.h:373
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
ff_sws_init_scale
void ff_sws_init_scale(SwsInternal *c)
Definition: swscale.c:691
common.h
LARGEST_INPUT_SIZE
#define LARGEST_INPUT_SIZE
swscale_internal.h
len
int len
Definition: vorbis_enc_data.h:426
SwsInternal
Definition: swscale_internal.h:317
randomize_buffers
#define randomize_buffers(buf, size)
Definition: sw_scale.c:32
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:30
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:117
av_clip_uint8
#define av_clip_uint8
Definition: common.h:106
mem.h
MAX_FILTER_WIDTH
#define MAX_FILTER_WIDTH
SwsContext::dst_w
int dst_w
Definition: swscale.h:221
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:188
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
int32_t
int32_t
Definition: audioconvert.c:56
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:80
sws_internal
static SwsInternal * sws_internal(const SwsContext *sws)
Definition: swscale_internal.h:74
SWS_ACCURATE_RND
@ SWS_ACCURATE_RND
Force bit-exact output.
Definition: swscale.h:155
width
#define width
Definition: dsp.h:85
checkasm_check_sw_scale
void checkasm_check_sw_scale(void)
Definition: sw_scale.c:365
SwsContext
Main external API structure.
Definition: swscale.h:174
src
#define src
Definition: vp8dsp.c:248
swscale.h
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:62