FFmpeg
dnn_io_proc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "dnn_io_proc.h"
22 #include "libavutil/imgutils.h"
23 #include "libswscale/swscale.h"
24 #include "libavutil/avassert.h"
26 
28 {
29  switch (dt)
30  {
31  case DNN_FLOAT:
32  return sizeof(float);
33  case DNN_UINT8:
34  return sizeof(uint8_t);
35  default:
36  av_assert0(!"not supported yet.");
37  return 1;
38  }
39 }
40 
42 {
43  struct SwsContext *sws_ctx;
44  int ret = 0;
45  int linesize[4] = { 0 };
46  void **dst_data = NULL;
47  void *middle_data = NULL;
48  uint8_t *planar_data[4] = { 0 };
49  int plane_size = frame->width * frame->height * sizeof(uint8_t);
50  enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE;
51  int src_datatype_size = get_datatype_size(output->dt);
52 
53  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
54  if (bytewidth < 0) {
55  return AVERROR(EINVAL);
56  }
57  /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
58  if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT8)
59  src_fmt = AV_PIX_FMT_GRAY8;
60  /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
61  else if ((fabsf(output->scale - 255) < 1e-6f || fabsf(output->scale) < 1e-6f) &&
62  fabsf(output->mean) < 1e-6 && output->dt == DNN_FLOAT)
63  src_fmt = AV_PIX_FMT_GRAYF32;
64  else {
65  av_log(log_ctx, AV_LOG_ERROR, "dnn_process output data doesn't type: UINT8 "
66  "scale: %f, mean: %f\n", output->scale, output->mean);
67  return AVERROR(ENOSYS);
68  }
69 
70  dst_data = (void **)frame->data;
71  linesize[0] = frame->linesize[0];
72  if (output->layout == DL_NCHW) {
73  middle_data = av_malloc(plane_size * output->dims[1]);
74  if (!middle_data) {
75  ret = AVERROR(ENOMEM);
76  goto err;
77  }
78  dst_data = &middle_data;
79  linesize[0] = frame->width * 3;
80  }
81 
82  switch (frame->format) {
83  case AV_PIX_FMT_RGB24:
84  case AV_PIX_FMT_BGR24:
85  sws_ctx = sws_getContext(frame->width * 3,
86  frame->height,
87  src_fmt,
88  frame->width * 3,
89  frame->height,
91  0, NULL, NULL, NULL);
92  if (!sws_ctx) {
93  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
94  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
95  av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height,
97  ret = AVERROR(EINVAL);
98  goto err;
99  }
100  sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
101  (const int[4]){frame->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height,
102  (uint8_t * const*)dst_data, linesize);
103  sws_freeContext(sws_ctx);
104  // convert data from planar to packed
105  if (output->layout == DL_NCHW) {
106  sws_ctx = sws_getContext(frame->width,
107  frame->height,
109  frame->width,
110  frame->height,
111  frame->format,
112  0, NULL, NULL, NULL);
113  if (!sws_ctx) {
114  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
115  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
118  ret = AVERROR(EINVAL);
119  goto err;
120  }
121  if (frame->format == AV_PIX_FMT_RGB24) {
122  planar_data[0] = (uint8_t *)middle_data + plane_size;
123  planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
124  planar_data[2] = (uint8_t *)middle_data;
125  } else if (frame->format == AV_PIX_FMT_BGR24) {
126  planar_data[0] = (uint8_t *)middle_data + plane_size;
127  planar_data[1] = (uint8_t *)middle_data;
128  planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
129  }
130  sws_scale(sws_ctx, (const uint8_t * const *)planar_data,
131  (const int [4]){frame->width * sizeof(uint8_t),
132  frame->width * sizeof(uint8_t),
133  frame->width * sizeof(uint8_t), 0},
134  0, frame->height, frame->data, frame->linesize);
135  sws_freeContext(sws_ctx);
136  }
137  break;
138  case AV_PIX_FMT_GRAYF32:
140  output->data, bytewidth,
141  bytewidth, frame->height);
142  break;
143  case AV_PIX_FMT_YUV420P:
144  case AV_PIX_FMT_YUV422P:
145  case AV_PIX_FMT_YUV444P:
146  case AV_PIX_FMT_YUV410P:
147  case AV_PIX_FMT_YUV411P:
148  case AV_PIX_FMT_GRAY8:
149  case AV_PIX_FMT_NV12:
150  sws_ctx = sws_getContext(frame->width,
151  frame->height,
153  frame->width,
154  frame->height,
156  0, NULL, NULL, NULL);
157  if (!sws_ctx) {
158  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
159  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
162  ret = AVERROR(EINVAL);
163  goto err;
164  }
165  sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
166  (const int[4]){frame->width * src_datatype_size, 0, 0, 0}, 0, frame->height,
167  (uint8_t * const*)frame->data, frame->linesize);
168  sws_freeContext(sws_ctx);
169  break;
170  default:
172  ret = AVERROR(ENOSYS);
173  goto err;
174  }
175 
176 err:
177  av_free(middle_data);
178  return ret;
179 }
180 
182 {
183  struct SwsContext *sws_ctx;
184  int ret = 0;
185  int linesize[4] = { 0 };
186  void **src_data = NULL;
187  void *middle_data = NULL;
188  uint8_t *planar_data[4] = { 0 };
189  int plane_size = frame->width * frame->height * sizeof(uint8_t);
190  enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE;
191  int dst_datatype_size = get_datatype_size(input->dt);
192  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
193  if (bytewidth < 0) {
194  return AVERROR(EINVAL);
195  }
196  /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
197  if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT8)
198  dst_fmt = AV_PIX_FMT_GRAY8;
199  /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
200  else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) &&
201  fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT)
202  dst_fmt = AV_PIX_FMT_GRAYF32;
203  else {
204  av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support type: UINT8 "
205  "scale: %f, mean: %f\n", input->scale, input->mean);
206  return AVERROR(ENOSYS);
207  }
208 
209  src_data = (void **)frame->data;
210  linesize[0] = frame->linesize[0];
211  if (input->layout == DL_NCHW) {
212  middle_data = av_malloc(plane_size * input->dims[1]);
213  if (!middle_data) {
214  ret = AVERROR(ENOMEM);
215  goto err;
216  }
217  src_data = &middle_data;
218  linesize[0] = frame->width * 3;
219  }
220 
221  switch (frame->format) {
222  case AV_PIX_FMT_RGB24:
223  case AV_PIX_FMT_BGR24:
224  // convert data from planar to packed
225  if (input->layout == DL_NCHW) {
226  sws_ctx = sws_getContext(frame->width,
227  frame->height,
228  frame->format,
229  frame->width,
230  frame->height,
232  0, NULL, NULL, NULL);
233  if (!sws_ctx) {
234  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
235  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
238  ret = AVERROR(EINVAL);
239  goto err;
240  }
241  if (frame->format == AV_PIX_FMT_RGB24) {
242  planar_data[0] = (uint8_t *)middle_data + plane_size;
243  planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
244  planar_data[2] = (uint8_t *)middle_data;
245  } else if (frame->format == AV_PIX_FMT_BGR24) {
246  planar_data[0] = (uint8_t *)middle_data + plane_size;
247  planar_data[1] = (uint8_t *)middle_data;
248  planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
249  }
250  sws_scale(sws_ctx, (const uint8_t * const *)frame->data,
251  frame->linesize, 0, frame->height, planar_data,
252  (const int [4]){frame->width * sizeof(uint8_t),
253  frame->width * sizeof(uint8_t),
254  frame->width * sizeof(uint8_t), 0});
255  sws_freeContext(sws_ctx);
256  }
257  sws_ctx = sws_getContext(frame->width * 3,
258  frame->height,
260  frame->width * 3,
261  frame->height,
262  dst_fmt,
263  0, NULL, NULL, NULL);
264  if (!sws_ctx) {
265  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
266  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
268  av_get_pix_fmt_name(dst_fmt),frame->width * 3, frame->height);
269  ret = AVERROR(EINVAL);
270  goto err;
271  }
272  sws_scale(sws_ctx, (const uint8_t **)src_data,
273  linesize, 0, frame->height,
274  (uint8_t * const [4]){input->data, 0, 0, 0},
275  (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0});
276  sws_freeContext(sws_ctx);
277  break;
278  case AV_PIX_FMT_GRAYF32:
279  av_image_copy_plane(input->data, bytewidth,
280  frame->data[0], frame->linesize[0],
281  bytewidth, frame->height);
282  break;
283  case AV_PIX_FMT_YUV420P:
284  case AV_PIX_FMT_YUV422P:
285  case AV_PIX_FMT_YUV444P:
286  case AV_PIX_FMT_YUV410P:
287  case AV_PIX_FMT_YUV411P:
288  case AV_PIX_FMT_GRAY8:
289  case AV_PIX_FMT_NV12:
290  sws_ctx = sws_getContext(frame->width,
291  frame->height,
293  frame->width,
294  frame->height,
295  dst_fmt,
296  0, NULL, NULL, NULL);
297  if (!sws_ctx) {
298  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
299  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
302  ret = AVERROR(EINVAL);
303  goto err;
304  }
305  sws_scale(sws_ctx, (const uint8_t **)frame->data,
306  frame->linesize, 0, frame->height,
307  (uint8_t * const [4]){input->data, 0, 0, 0},
308  (const int [4]){frame->width * dst_datatype_size, 0, 0, 0});
309  sws_freeContext(sws_ctx);
310  break;
311  default:
313  ret = AVERROR(ENOSYS);
314  goto err;
315  }
316 err:
317  av_free(middle_data);
318  return ret;
319 }
320 
322 {
323  if (data->dt == DNN_UINT8) {
324  switch (data->order) {
325  case DCO_BGR:
326  return AV_PIX_FMT_BGR24;
327  case DCO_RGB:
328  return AV_PIX_FMT_RGB24;
329  default:
330  av_assert0(!"unsupported data pixel format.\n");
331  return AV_PIX_FMT_BGR24;
332  }
333  }
334 
335  av_assert0(!"unsupported data type.\n");
336  return AV_PIX_FMT_BGR24;
337 }
338 
339 int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx)
340 {
341  const AVPixFmtDescriptor *desc;
342  int offsetx[4], offsety[4];
343  uint8_t *bbox_data[4];
344  struct SwsContext *sws_ctx;
345  int linesizes[4];
346  int ret = 0;
347  enum AVPixelFormat fmt;
348  int left, top, width, height;
349  int width_idx, height_idx;
351  const AVDetectionBBox *bbox;
353  av_assert0(sd);
354 
355  /* (scale != 1 and scale != 0) or mean != 0 */
356  if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
357  fabsf(input->mean) > 1e-6f) {
358  av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support "
359  "scale: %f, mean: %f\n", input->scale, input->mean);
360  return AVERROR(ENOSYS);
361  }
362 
363  if (input->layout == DL_NCHW) {
364  av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n");
365  return AVERROR(ENOSYS);
366  }
367 
368  width_idx = dnn_get_width_idx_by_layout(input->layout);
369  height_idx = dnn_get_height_idx_by_layout(input->layout);
370 
371  header = (const AVDetectionBBoxHeader *)sd->data;
372  bbox = av_get_detection_bbox(header, bbox_index);
373 
374  left = bbox->x;
375  width = bbox->w;
376  top = bbox->y;
377  height = bbox->h;
378 
379  fmt = get_pixel_format(input);
380  sws_ctx = sws_getContext(width, height, frame->format,
381  input->dims[width_idx],
382  input->dims[height_idx], fmt,
384  if (!sws_ctx) {
385  av_log(log_ctx, AV_LOG_ERROR, "Failed to create scale context for the conversion "
386  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
388  av_get_pix_fmt_name(fmt),
389  input->dims[width_idx],
390  input->dims[height_idx]);
391  return AVERROR(EINVAL);
392  }
393 
394  ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);
395  if (ret < 0) {
396  av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
397  sws_freeContext(sws_ctx);
398  return ret;
399  }
400 
402  offsetx[1] = offsetx[2] = AV_CEIL_RSHIFT(left, desc->log2_chroma_w);
403  offsetx[0] = offsetx[3] = left;
404 
405  offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h);
406  offsety[0] = offsety[3] = top;
407 
408  for (int k = 0; frame->data[k]; k++)
409  bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k];
410 
411  sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize,
412  0, height,
413  (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
414 
415  sws_freeContext(sws_ctx);
416 
417  return ret;
418 }
419 
421 {
422  struct SwsContext *sws_ctx;
423  int linesizes[4];
424  int ret = 0, width_idx, height_idx;
426 
427  /* (scale != 1 and scale != 0) or mean != 0 */
428  if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
429  fabsf(input->mean) > 1e-6f) {
430  av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support "
431  "scale: %f, mean: %f\n", input->scale, input->mean);
432  return AVERROR(ENOSYS);
433  }
434 
435  if (input->layout == DL_NCHW) {
436  av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n");
437  return AVERROR(ENOSYS);
438  }
439 
440  width_idx = dnn_get_width_idx_by_layout(input->layout);
441  height_idx = dnn_get_height_idx_by_layout(input->layout);
442 
444  input->dims[width_idx],
445  input->dims[height_idx], fmt,
447  if (!sws_ctx) {
448  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
449  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
451  av_get_pix_fmt_name(fmt), input->dims[width_idx],
452  input->dims[height_idx]);
453  return AVERROR(EINVAL);
454  }
455 
456  ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);
457  if (ret < 0) {
458  av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
459  sws_freeContext(sws_ctx);
460  return ret;
461  }
462 
463  sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height,
464  (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
465 
466  sws_freeContext(sws_ctx);
467  return ret;
468 }
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
av_frame_get_side_data
AVFrameSideData * av_frame_get_side_data(const AVFrame *frame, enum AVFrameSideDataType type)
Definition: frame.c:838
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2962
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:344
AVFrame::width
int width
Definition: frame.h:416
data
const char data[16]
Definition: mxf.c:148
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
sws_scale
int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
swscale wrapper, so we don't need to export the SwsContext.
Definition: swscale.c:1205
dnn_io_proc.h
AVDetectionBBox::y
int y
Definition: detection_bbox.h:32
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:365
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
av_image_copy_plane
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:374
SWS_FAST_BILINEAR
#define SWS_FAST_BILINEAR
Definition: swscale.h:65
dnn_get_width_idx_by_layout
static int dnn_get_width_idx_by_layout(DNNLayout layout)
Definition: dnn_interface.h:137
get_pixel_format
static enum AVPixelFormat get_pixel_format(DNNData *data)
Definition: dnn_io_proc.c:321
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
av_get_detection_bbox
static av_always_inline AVDetectionBBox * av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx)
Definition: detection_bbox.h:84
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
float
float
Definition: af_crystalizer.c:121
width
#define width
av_image_fill_linesizes
int av_image_fill_linesizes(int linesizes[4], enum AVPixelFormat pix_fmt, int width)
Fill plane linesizes for an image with pixel format pix_fmt and width width.
Definition: imgutils.c:89
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
DNNData
Definition: dnn_interface.h:65
DL_NCHW
@ DL_NCHW
Definition: dnn_interface.h:61
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
AV_PIX_FMT_GRAYF32
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:511
frame
static AVFrame * frame
Definition: demux_decode.c:54
if
if(ret)
Definition: filter_design.txt:179
ff_proc_from_frame_to_dnn
int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
Definition: dnn_io_proc.c:181
ff_frame_to_dnn_detect
int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)
Definition: dnn_io_proc.c:420
NULL
#define NULL
Definition: coverity.c:32
AVDetectionBBoxHeader
Definition: detection_bbox.h:56
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
f
f
Definition: af_crystalizer.c:121
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
AVDetectionBBox::w
int w
Definition: detection_bbox.h:33
sws_getContext
struct SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
Definition: utils.c:2094
avpriv_report_missing_feature
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
AVFrameSideData::data
uint8_t * data
Definition: frame.h:252
DNNDataType
DNNDataType
Definition: dnn_interface.h:37
AVFrame::format
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames,...
Definition: frame.h:431
get_datatype_size
static int get_datatype_size(DNNDataType dt)
Definition: dnn_io_proc.c:27
header
static const uint8_t header[24]
Definition: sdr2.c:68
height
#define height
DNN_FLOAT
@ DNN_FLOAT
Definition: dnn_interface.h:37
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
av_image_get_linesize
int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)
Compute the size of an image line with format pix_fmt and width width for the plane plane.
Definition: imgutils.c:76
ret
ret
Definition: filter_design.txt:187
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
AVDetectionBBox::h
int h
Definition: detection_bbox.h:34
DNN_UINT8
@ DNN_UINT8
Definition: dnn_interface.h:37
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
AVFrame::height
int height
Definition: frame.h:416
sws_freeContext
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2425
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AVDetectionBBox::x
int x
Distance in pixels from the left/top edge of the frame, together with width and height,...
Definition: detection_bbox.h:31
DCO_RGB
@ DCO_RGB
Definition: dnn_interface.h:42
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
desc
const char * desc
Definition: libsvtav1.c:75
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
dnn_get_height_idx_by_layout
static int dnn_get_height_idx_by_layout(DNNLayout layout)
Definition: dnn_interface.h:142
AVFrameSideData
Structure to hold side data for an AVFrame.
Definition: frame.h:250
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
ff_frame_to_dnn_classify
int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx)
Definition: dnn_io_proc.c:339
av_free
#define av_free(p)
Definition: tableprint_vlc.h:33
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
imgutils.h
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:389
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVDetectionBBox
Definition: detection_bbox.h:26
DCO_BGR
@ DCO_BGR
Definition: dnn_interface.h:41
SwsContext
Definition: swscale_internal.h:299
detection_bbox.h
swscale.h
AV_FRAME_DATA_DETECTION_BBOXES
@ AV_FRAME_DATA_DETECTION_BBOXES
Bounding boxes for object detection and classification, as described by AVDetectionBBoxHeader.
Definition: frame.h:194
av_get_pix_fmt_name
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2882
ff_proc_from_dnn_to_frame
int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
Definition: dnn_io_proc.c:41