FFmpeg
slice.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "swscale_internal.h"
23 
24 static void free_lines(SwsSlice *s)
25 {
26  int i;
27  for (i = 0; i < 2; ++i) {
28  int n = s->plane[i].available_lines;
29  int j;
30  for (j = 0; j < n; ++j) {
31  av_freep(&s->plane[i].line[j]);
32  if (s->is_ring)
33  s->plane[i].line[j+n] = NULL;
34  }
35  }
36 
37  for (i = 0; i < 4; ++i)
38  memset(s->plane[i].line, 0, sizeof(uint8_t*) * s->plane[i].available_lines * (s->is_ring ? 3 : 1));
39  s->should_free_lines = 0;
40 }
41 
42 /*
43  slice lines contains extra bytes for vectorial code thus @size
44  is the allocated memory size and @width is the number of pixels
45 */
46 static int alloc_lines(SwsSlice *s, int size, int width)
47 {
48  int i;
49  int idx[2] = {3, 2};
50 
51  s->should_free_lines = 1;
52  s->width = width;
53 
54  for (i = 0; i < 2; ++i) {
55  int n = s->plane[i].available_lines;
56  int j;
57  int ii = idx[i];
58 
59  av_assert0(n == s->plane[ii].available_lines);
60  for (j = 0; j < n; ++j) {
61  // chroma plane line U and V are expected to be contiguous in memory
62  // by mmx vertical scaler code
63  s->plane[i].line[j] = av_malloc(size * 2 + 32);
64  if (!s->plane[i].line[j]) {
65  free_lines(s);
66  return AVERROR(ENOMEM);
67  }
68  s->plane[ii].line[j] = s->plane[i].line[j] + size + 16;
69  if (s->is_ring) {
70  s->plane[i].line[j+n] = s->plane[i].line[j];
71  s->plane[ii].line[j+n] = s->plane[ii].line[j];
72  }
73  }
74  }
75 
76  return 0;
77 }
78 
79 static int alloc_slice(SwsSlice *s, enum AVPixelFormat fmt, int lumLines, int chrLines, int h_sub_sample, int v_sub_sample, int ring)
80 {
81  int i;
82  int size[4] = { lumLines,
83  chrLines,
84  chrLines,
85  lumLines };
86 
87  s->h_chr_sub_sample = h_sub_sample;
88  s->v_chr_sub_sample = v_sub_sample;
89  s->fmt = fmt;
90  s->is_ring = ring;
91  s->should_free_lines = 0;
92 
93  for (i = 0; i < 4; ++i) {
94  int n = size[i] * ( ring == 0 ? 1 : 3);
95  s->plane[i].line = av_calloc(n, sizeof(*s->plane[i].line));
96  if (!s->plane[i].line)
97  return AVERROR(ENOMEM);
98 
99  s->plane[i].tmp = ring ? s->plane[i].line + size[i] * 2 : NULL;
100  s->plane[i].available_lines = size[i];
101  s->plane[i].sliceY = 0;
102  s->plane[i].sliceH = 0;
103  }
104  return 0;
105 }
106 
107 static void free_slice(SwsSlice *s)
108 {
109  int i;
110  if (s) {
111  if (s->should_free_lines)
112  free_lines(s);
113  for (i = 0; i < 4; ++i) {
114  av_freep(&s->plane[i].line);
115  s->plane[i].tmp = NULL;
116  }
117  }
118 }
119 
120 int ff_rotate_slice(SwsSlice *s, int lum, int chr)
121 {
122  int i;
123  if (lum) {
124  for (i = 0; i < 4; i+=3) {
125  int n = s->plane[i].available_lines;
126  int l = lum - s->plane[i].sliceY;
127 
128  if (l >= n * 2) {
129  s->plane[i].sliceY += n;
130  s->plane[i].sliceH -= n;
131  }
132  }
133  }
134  if (chr) {
135  for (i = 1; i < 3; ++i) {
136  int n = s->plane[i].available_lines;
137  int l = chr - s->plane[i].sliceY;
138 
139  if (l >= n * 2) {
140  s->plane[i].sliceY += n;
141  s->plane[i].sliceH -= n;
142  }
143  }
144  }
145  return 0;
146 }
147 
148 int ff_init_slice_from_src(SwsSlice * s, uint8_t *const src[4], const int stride[4],
149  int srcW, int lumY, int lumH, int chrY, int chrH, int relative)
150 {
151  int i = 0;
152 
153  const int start[4] = {lumY,
154  chrY,
155  chrY,
156  lumY};
157 
158  const int end[4] = {lumY +lumH,
159  chrY + chrH,
160  chrY + chrH,
161  lumY + lumH};
162 
163  s->width = srcW;
164 
165  for (i = 0; i < 4 && src[i] != NULL; ++i) {
166  uint8_t *const src_i = src[i] + (relative ? 0 : start[i]) * stride[i];
167  int j;
168  int first = s->plane[i].sliceY;
169  int n = s->plane[i].available_lines;
170  int lines = end[i] - start[i];
171  int tot_lines = end[i] - first;
172 
173  if (start[i] >= first && n >= tot_lines) {
174  s->plane[i].sliceH = FFMAX(tot_lines, s->plane[i].sliceH);
175  for (j = 0; j < lines; j+= 1)
176  s->plane[i].line[start[i] - first + j] = src_i + j * stride[i];
177  } else {
178  s->plane[i].sliceY = start[i];
179  lines = lines > n ? n : lines;
180  s->plane[i].sliceH = lines;
181  for (j = 0; j < lines; j+= 1)
182  s->plane[i].line[j] = src_i + j * stride[i];
183  }
184 
185  }
186 
187  return 0;
188 }
189 
190 static void fill_ones(SwsSlice *s, int n, int bpc)
191 {
192  int i, j, k, size, end;
193 
194  for (i = 0; i < 4; ++i) {
195  size = s->plane[i].available_lines;
196  for (j = 0; j < size; ++j) {
197  if (bpc == 16) {
198  end = (n>>1) + 1;
199  for (k = 0; k < end; ++k)
200  ((int32_t*)(s->plane[i].line[j]))[k] = 1<<18;
201  } else if (bpc == 32) {
202  end = (n>>2) + 1;
203  for (k = 0; k < end; ++k)
204  ((int64_t*)(s->plane[i].line[j]))[k] = 1LL<<34;
205  } else {
206  end = n + 1;
207  for (k = 0; k < end; ++k)
208  ((int16_t*)(s->plane[i].line[j]))[k] = 1<<14;
209  }
210  }
211  }
212 }
213 
214 /*
215  Calculates the minimum ring buffer size, it should be able to store vFilterSize
216  more n lines where n is the max difference between each adjacent slice which
217  outputs a line.
218  The n lines are needed only when there is not enough src lines to output a single
219  dst line, then we should buffer these lines to process them on the next call to scale.
220 */
221 static void get_min_buffer_size(SwsInternal *c, int *out_lum_size, int *out_chr_size)
222 {
223  int lumY;
224  int dstH = c->opts.dst_h;
225  int chrDstH = c->chrDstH;
226  int *lumFilterPos = c->vLumFilterPos;
227  int *chrFilterPos = c->vChrFilterPos;
228  int lumFilterSize = c->vLumFilterSize;
229  int chrFilterSize = c->vChrFilterSize;
230  int chrSubSample = c->chrSrcVSubSample;
231 
232  *out_lum_size = lumFilterSize;
233  *out_chr_size = chrFilterSize;
234 
235  for (lumY = 0; lumY < dstH; lumY++) {
236  int chrY = (int64_t)lumY * chrDstH / dstH;
237  int nextSlice = FFMAX(lumFilterPos[lumY] + lumFilterSize - 1,
238  ((chrFilterPos[chrY] + chrFilterSize - 1)
239  << chrSubSample));
240 
241  nextSlice >>= chrSubSample;
242  nextSlice <<= chrSubSample;
243  (*out_lum_size) = FFMAX((*out_lum_size), nextSlice - lumFilterPos[lumY]);
244  (*out_chr_size) = FFMAX((*out_chr_size), (nextSlice >> chrSubSample) - chrFilterPos[chrY]);
245  }
246 }
247 
248 
249 
251 {
252  int i;
253  int index;
254  int num_ydesc;
255  int num_cdesc;
256  int num_vdesc = isPlanarYUV(c->opts.dst_format) && !isGray(c->opts.dst_format) ? 2 : 1;
257  int need_lum_conv = c->lumToYV12 || c->readLumPlanar || c->alpToYV12 || c->readAlpPlanar;
258  int need_chr_conv = c->chrToYV12 || c->readChrPlanar;
259  int need_gamma = c->is_internal_gamma;
260  int srcIdx, dstIdx;
261  int dst_stride = FFALIGN(c->opts.dst_w * sizeof(int16_t) + 66, 16);
262 
263  uint32_t * pal = usePal(c->opts.src_format) ? c->pal_yuv : (uint32_t*)c->input_rgb2yuv_table;
264  int res = 0;
265 
266  int lumBufSize;
267  int chrBufSize;
268 
269  get_min_buffer_size(c, &lumBufSize, &chrBufSize);
270  lumBufSize = FFMAX(lumBufSize, c->vLumFilterSize + MAX_LINES_AHEAD);
271  chrBufSize = FFMAX(chrBufSize, c->vChrFilterSize + MAX_LINES_AHEAD);
272 
273  if (c->dstBpc == 16)
274  dst_stride <<= 1;
275 
276  if (c->dstBpc == 32)
277  dst_stride <<= 2;
278 
279  num_ydesc = need_lum_conv ? 2 : 1;
280  num_cdesc = need_chr_conv ? 2 : 1;
281 
282  c->numSlice = FFMAX(num_ydesc, num_cdesc) + 2;
283  c->numDesc = num_ydesc + num_cdesc + num_vdesc + (need_gamma ? 2 : 0);
284  c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0);
285  c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0);
286 
287  if (isFloat16(c->opts.src_format)) {
288  c->h2f_tables = av_malloc(sizeof(*c->h2f_tables));
289  if (!c->h2f_tables)
290  return AVERROR(ENOMEM);
291  ff_init_half2float_tables(c->h2f_tables);
292  c->input_opaque = c->h2f_tables;
293  }
294 
295  c->desc = av_calloc(c->numDesc, sizeof(*c->desc));
296  if (!c->desc)
297  return AVERROR(ENOMEM);
298  c->slice = av_calloc(c->numSlice, sizeof(*c->slice));
299  if (!c->slice) {
300  res = AVERROR(ENOMEM);
301  goto cleanup;
302  }
303 
304  res = alloc_slice(&c->slice[0], c->opts.src_format, c->opts.src_h, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
305  if (res < 0) goto cleanup;
306  for (i = 1; i < c->numSlice-2; ++i) {
307  res = alloc_slice(&c->slice[i], c->opts.src_format, lumBufSize, chrBufSize, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
308  if (res < 0) goto cleanup;
309  res = alloc_lines(&c->slice[i], FFALIGN(c->opts.src_w*2+78, 16), c->opts.src_w);
310  if (res < 0) goto cleanup;
311  }
312  // horizontal scaler output
313  res = alloc_slice(&c->slice[i], c->opts.src_format, lumBufSize, chrBufSize, c->chrDstHSubSample, c->chrDstVSubSample, 1);
314  if (res < 0) goto cleanup;
315  res = alloc_lines(&c->slice[i], dst_stride, c->opts.dst_w);
316  if (res < 0) goto cleanup;
317 
318  fill_ones(&c->slice[i], dst_stride>>1, c->dstBpc);
319 
320  // vertical scaler output
321  ++i;
322  res = alloc_slice(&c->slice[i], c->opts.dst_format, c->opts.dst_h, c->chrDstH, c->chrDstHSubSample, c->chrDstVSubSample, 0);
323  if (res < 0) goto cleanup;
324 
325  index = 0;
326  srcIdx = 0;
327  dstIdx = 1;
328 
329  if (need_gamma) {
330  res = ff_init_gamma_convert(c->desc + index, c->slice + srcIdx, c->inv_gamma);
331  if (res < 0) goto cleanup;
332  ++index;
333  }
334 
335  if (need_lum_conv) {
336  res = ff_init_desc_fmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal);
337  if (res < 0) goto cleanup;
338  c->desc[index].alpha = c->needAlpha;
339  ++index;
340  srcIdx = dstIdx;
341  }
342 
343 
344  dstIdx = FFMAX(num_ydesc, num_cdesc);
345  res = ff_init_desc_hscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hLumFilter, c->hLumFilterPos, c->hLumFilterSize, c->lumXInc);
346  if (res < 0) goto cleanup;
347  c->desc[index].alpha = c->needAlpha;
348 
349 
350  ++index;
351  {
352  srcIdx = 0;
353  dstIdx = 1;
354  if (need_chr_conv) {
355  res = ff_init_desc_cfmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal);
356  if (res < 0) goto cleanup;
357  ++index;
358  srcIdx = dstIdx;
359  }
360 
361  dstIdx = FFMAX(num_ydesc, num_cdesc);
362  if (c->needs_hcscale)
363  res = ff_init_desc_chscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hChrFilter, c->hChrFilterPos, c->hChrFilterSize, c->chrXInc);
364  else
365  res = ff_init_desc_no_chr(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx]);
366  if (res < 0) goto cleanup;
367  }
368 
369  ++index;
370  {
371  srcIdx = c->numSlice - 2;
372  dstIdx = c->numSlice - 1;
373  res = ff_init_vscale(c, c->desc + index, c->slice + srcIdx, c->slice + dstIdx);
374  if (res < 0) goto cleanup;
375  }
376 
377  ++index;
378  if (need_gamma) {
379  res = ff_init_gamma_convert(c->desc + index, c->slice + dstIdx, c->gamma);
380  if (res < 0) goto cleanup;
381  }
382 
383  return 0;
384 
385 cleanup:
387  return res;
388 }
389 
391 {
392  int i;
393  if (c->desc) {
394  for (i = 0; i < c->numDesc; ++i)
395  av_freep(&c->desc[i].instance);
396  av_freep(&c->desc);
397  }
398 
399  if (c->slice) {
400  for (i = 0; i < c->numSlice; ++i)
401  free_slice(&c->slice[i]);
402  av_freep(&c->slice);
403  }
404  av_freep(&c->h2f_tables);
405  return 0;
406 }
ff_init_desc_cfmt_convert
int ff_init_desc_cfmt_convert(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint32_t *pal)
initializes chr pixel format conversion descriptor
Definition: hscale.c:236
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
fill_ones
static void fill_ones(SwsSlice *s, int n, int bpc)
Definition: slice.c:190
ff_rotate_slice
int ff_rotate_slice(SwsSlice *s, int lum, int chr)
Definition: slice.c:120
int64_t
long long int64_t
Definition: coverity.c:34
cleanup
static av_cold void cleanup(FlashSV2Context *s)
Definition: flashsv2enc.c:130
ff_init_desc_hscale
int ff_init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int *filter_pos, int filter_size, int xInc)
initializes lum horizontal scaling descriptor
Definition: hscale.c:145
isGray
#define isGray(x)
Definition: swscale.c:42
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
ff_init_desc_no_chr
int ff_init_desc_no_chr(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst)
Definition: hscale.c:282
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
s
#define s(width, name)
Definition: cbs_vp9.c:198
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
ff_free_filters
int ff_free_filters(SwsInternal *c)
Definition: slice.c:390
NULL
#define NULL
Definition: coverity.c:32
isFloat16
static av_always_inline int isFloat16(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:885
ff_init_desc_chscale
int ff_init_desc_chscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int *filter_pos, int filter_size, int xInc)
initializes chr horizontal scaling descriptor
Definition: hscale.c:251
index
int index
Definition: gxfenc.c:90
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
free_lines
static void free_lines(SwsSlice *s)
Definition: slice.c:24
alloc_lines
static int alloc_lines(SwsSlice *s, int size, int width)
Definition: slice.c:46
usePal
static av_always_inline int usePal(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:932
size
int size
Definition: twinvq_data.h:10344
ff_init_vscale
int ff_init_vscale(SwsInternal *c, SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst)
initializes vertical scaling descriptors
Definition: vscale.c:214
free_slice
static void free_slice(SwsSlice *s)
Definition: slice.c:107
alloc_slice
static int alloc_slice(SwsSlice *s, enum AVPixelFormat fmt, int lumLines, int chrLines, int h_sub_sample, int v_sub_sample, int ring)
Definition: slice.c:79
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
ff_init_gamma_convert
int ff_init_gamma_convert(SwsFilterDescriptor *desc, SwsSlice *src, uint16_t *table)
initializes gamma conversion descriptor
Definition: gamma.c:59
swscale_internal.h
get_min_buffer_size
static void get_min_buffer_size(SwsInternal *c, int *out_lum_size, int *out_chr_size)
Definition: slice.c:221
SwsSlice
Struct which defines a slice of an image to be scaled or an output for a scaled slice.
Definition: swscale_internal.h:1115
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
stride
#define stride
Definition: h264pred_template.c:537
ff_init_slice_from_src
int ff_init_slice_from_src(SwsSlice *s, uint8_t *const src[4], const int stride[4], int srcW, int lumY, int lumH, int chrY, int chrH, int relative)
Definition: slice.c:148
SwsInternal
Definition: swscale_internal.h:331
ff_init_half2float_tables
void ff_init_half2float_tables(Half2FloatTables *t)
Definition: half2float.c:39
isPlanarYUV
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: vf_dnn_processing.c:162
mem.h
ff_init_filters
int ff_init_filters(SwsInternal *c)
Definition: slice.c:250
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
lum
static double lum(void *priv, double x, double y, int plane)
Definition: vf_fftfilt.c:107
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
ff_init_desc_fmt_convert
int ff_init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint32_t *pal)
initializes lum pixel format conversion descriptor
Definition: hscale.c:128
int32_t
int32_t
Definition: audioconvert.c:56
width
#define width
Definition: dsp.h:85
MAX_LINES_AHEAD
#define MAX_LINES_AHEAD
Definition: swscale_internal.h:1195
src
#define src
Definition: vp8dsp.c:248