FFmpeg
dnn_io_proc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "dnn_io_proc.h"
22 #include "libavutil/imgutils.h"
23 #include "libavutil/mem.h"
24 #include "libswscale/swscale.h"
25 #include "libavutil/avassert.h"
27 
29 {
30  switch (dt)
31  {
32  case DNN_FLOAT:
33  return sizeof(float);
34  case DNN_UINT8:
35  return sizeof(uint8_t);
36  default:
37  av_assert0(!"not supported yet.");
38  return 1;
39  }
40 }
41 
43 {
44  struct SwsContext *sws_ctx;
45  int ret = 0;
46  int linesize[4] = { 0 };
47  void **dst_data = NULL;
48  void *middle_data = NULL;
49  uint8_t *planar_data[4] = { 0 };
50  int plane_size = frame->width * frame->height * sizeof(uint8_t);
51  enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE;
52  int src_datatype_size = get_datatype_size(output->dt);
53 
54  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
55  if (bytewidth < 0) {
56  return AVERROR(EINVAL);
57  }
58  /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
59  if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT8)
60  src_fmt = AV_PIX_FMT_GRAY8;
61  /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
62  else if ((fabsf(output->scale - 255) < 1e-6f || fabsf(output->scale) < 1e-6f) &&
63  fabsf(output->mean) < 1e-6 && output->dt == DNN_FLOAT)
64  src_fmt = AV_PIX_FMT_GRAYF32;
65  else {
66  av_log(log_ctx, AV_LOG_ERROR, "dnn_process output data doesn't type: UINT8 "
67  "scale: %f, mean: %f\n", output->scale, output->mean);
68  return AVERROR(ENOSYS);
69  }
70 
71  dst_data = (void **)frame->data;
72  linesize[0] = frame->linesize[0];
73  if (output->layout == DL_NCHW) {
74  middle_data = av_malloc(plane_size * output->dims[1]);
75  if (!middle_data) {
76  ret = AVERROR(ENOMEM);
77  goto err;
78  }
79  dst_data = &middle_data;
80  linesize[0] = frame->width * 3;
81  }
82 
83  switch (frame->format) {
84  case AV_PIX_FMT_RGB24:
85  case AV_PIX_FMT_BGR24:
86  sws_ctx = sws_getContext(frame->width * 3,
87  frame->height,
88  src_fmt,
89  frame->width * 3,
90  frame->height,
92  0, NULL, NULL, NULL);
93  if (!sws_ctx) {
94  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
95  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
96  av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height,
97  av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height);
98  ret = AVERROR(EINVAL);
99  goto err;
100  }
101  sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
102  (const int[4]){frame->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height,
103  (uint8_t * const*)dst_data, linesize);
104  sws_freeContext(sws_ctx);
105  // convert data from planar to packed
106  if (output->layout == DL_NCHW) {
107  sws_ctx = sws_getContext(frame->width,
108  frame->height,
110  frame->width,
111  frame->height,
112  frame->format,
113  0, NULL, NULL, NULL);
114  if (!sws_ctx) {
115  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
116  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
118  av_get_pix_fmt_name(frame->format),frame->width, frame->height);
119  ret = AVERROR(EINVAL);
120  goto err;
121  }
122  if (frame->format == AV_PIX_FMT_RGB24) {
123  planar_data[0] = (uint8_t *)middle_data + plane_size;
124  planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
125  planar_data[2] = (uint8_t *)middle_data;
126  } else if (frame->format == AV_PIX_FMT_BGR24) {
127  planar_data[0] = (uint8_t *)middle_data + plane_size;
128  planar_data[1] = (uint8_t *)middle_data;
129  planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
130  }
131  sws_scale(sws_ctx, (const uint8_t * const *)planar_data,
132  (const int [4]){frame->width * sizeof(uint8_t),
133  frame->width * sizeof(uint8_t),
134  frame->width * sizeof(uint8_t), 0},
135  0, frame->height, frame->data, frame->linesize);
136  sws_freeContext(sws_ctx);
137  }
138  break;
139  case AV_PIX_FMT_GRAYF32:
140  av_image_copy_plane(frame->data[0], frame->linesize[0],
141  output->data, bytewidth,
142  bytewidth, frame->height);
143  break;
144  case AV_PIX_FMT_YUV420P:
145  case AV_PIX_FMT_YUV422P:
146  case AV_PIX_FMT_YUV444P:
147  case AV_PIX_FMT_YUV410P:
148  case AV_PIX_FMT_YUV411P:
149  case AV_PIX_FMT_GRAY8:
150  case AV_PIX_FMT_NV12:
151  sws_ctx = sws_getContext(frame->width,
152  frame->height,
154  frame->width,
155  frame->height,
157  0, NULL, NULL, NULL);
158  if (!sws_ctx) {
159  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
160  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
161  av_get_pix_fmt_name(src_fmt), frame->width, frame->height,
162  av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height);
163  ret = AVERROR(EINVAL);
164  goto err;
165  }
166  sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
167  (const int[4]){frame->width * src_datatype_size, 0, 0, 0}, 0, frame->height,
168  (uint8_t * const*)frame->data, frame->linesize);
169  sws_freeContext(sws_ctx);
170  break;
171  default:
173  ret = AVERROR(ENOSYS);
174  goto err;
175  }
176 
177 err:
178  av_free(middle_data);
179  return ret;
180 }
181 
183 {
184  struct SwsContext *sws_ctx;
185  int ret = 0;
186  int linesize[4] = { 0 };
187  void **src_data = NULL;
188  void *middle_data = NULL;
189  uint8_t *planar_data[4] = { 0 };
190  int plane_size = frame->width * frame->height * sizeof(uint8_t);
191  enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE;
192  int dst_datatype_size = get_datatype_size(input->dt);
193  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
194  if (bytewidth < 0) {
195  return AVERROR(EINVAL);
196  }
197  /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
198  if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT8)
199  dst_fmt = AV_PIX_FMT_GRAY8;
200  /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
201  else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) &&
202  fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT)
203  dst_fmt = AV_PIX_FMT_GRAYF32;
204  else {
205  av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support type: UINT8 "
206  "scale: %f, mean: %f\n", input->scale, input->mean);
207  return AVERROR(ENOSYS);
208  }
209 
210  src_data = (void **)frame->data;
211  linesize[0] = frame->linesize[0];
212  if (input->layout == DL_NCHW) {
213  middle_data = av_malloc(plane_size * input->dims[1]);
214  if (!middle_data) {
215  ret = AVERROR(ENOMEM);
216  goto err;
217  }
218  src_data = &middle_data;
219  linesize[0] = frame->width * 3;
220  }
221 
222  switch (frame->format) {
223  case AV_PIX_FMT_RGB24:
224  case AV_PIX_FMT_BGR24:
225  // convert data from planar to packed
226  if (input->layout == DL_NCHW) {
227  sws_ctx = sws_getContext(frame->width,
228  frame->height,
229  frame->format,
230  frame->width,
231  frame->height,
233  0, NULL, NULL, NULL);
234  if (!sws_ctx) {
235  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
236  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
237  av_get_pix_fmt_name(frame->format), frame->width, frame->height,
239  ret = AVERROR(EINVAL);
240  goto err;
241  }
242  if (frame->format == AV_PIX_FMT_RGB24) {
243  planar_data[0] = (uint8_t *)middle_data + plane_size;
244  planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
245  planar_data[2] = (uint8_t *)middle_data;
246  } else if (frame->format == AV_PIX_FMT_BGR24) {
247  planar_data[0] = (uint8_t *)middle_data + plane_size;
248  planar_data[1] = (uint8_t *)middle_data;
249  planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
250  }
251  sws_scale(sws_ctx, (const uint8_t * const *)frame->data,
252  frame->linesize, 0, frame->height, planar_data,
253  (const int [4]){frame->width * sizeof(uint8_t),
254  frame->width * sizeof(uint8_t),
255  frame->width * sizeof(uint8_t), 0});
256  sws_freeContext(sws_ctx);
257  }
258  sws_ctx = sws_getContext(frame->width * 3,
259  frame->height,
261  frame->width * 3,
262  frame->height,
263  dst_fmt,
264  0, NULL, NULL, NULL);
265  if (!sws_ctx) {
266  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
267  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
268  av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height,
269  av_get_pix_fmt_name(dst_fmt),frame->width * 3, frame->height);
270  ret = AVERROR(EINVAL);
271  goto err;
272  }
273  sws_scale(sws_ctx, (const uint8_t **)src_data,
274  linesize, 0, frame->height,
275  (uint8_t * const [4]){input->data, 0, 0, 0},
276  (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0});
277  sws_freeContext(sws_ctx);
278  break;
279  case AV_PIX_FMT_GRAYF32:
280  av_image_copy_plane(input->data, bytewidth,
281  frame->data[0], frame->linesize[0],
282  bytewidth, frame->height);
283  break;
284  case AV_PIX_FMT_YUV420P:
285  case AV_PIX_FMT_YUV422P:
286  case AV_PIX_FMT_YUV444P:
287  case AV_PIX_FMT_YUV410P:
288  case AV_PIX_FMT_YUV411P:
289  case AV_PIX_FMT_GRAY8:
290  case AV_PIX_FMT_NV12:
291  sws_ctx = sws_getContext(frame->width,
292  frame->height,
294  frame->width,
295  frame->height,
296  dst_fmt,
297  0, NULL, NULL, NULL);
298  if (!sws_ctx) {
299  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
300  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
302  av_get_pix_fmt_name(dst_fmt),frame->width, frame->height);
303  ret = AVERROR(EINVAL);
304  goto err;
305  }
306  sws_scale(sws_ctx, (const uint8_t **)frame->data,
307  frame->linesize, 0, frame->height,
308  (uint8_t * const [4]){input->data, 0, 0, 0},
309  (const int [4]){frame->width * dst_datatype_size, 0, 0, 0});
310  sws_freeContext(sws_ctx);
311  break;
312  default:
314  ret = AVERROR(ENOSYS);
315  goto err;
316  }
317 err:
318  av_free(middle_data);
319  return ret;
320 }
321 
323 {
324  if (data->dt == DNN_UINT8) {
325  switch (data->order) {
326  case DCO_BGR:
327  return AV_PIX_FMT_BGR24;
328  case DCO_RGB:
329  return AV_PIX_FMT_RGB24;
330  default:
331  av_assert0(!"unsupported data pixel format.\n");
332  return AV_PIX_FMT_BGR24;
333  }
334  }
335 
336  av_assert0(!"unsupported data type.\n");
337  return AV_PIX_FMT_BGR24;
338 }
339 
340 int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx)
341 {
342  const AVPixFmtDescriptor *desc;
343  int offsetx[4], offsety[4];
344  uint8_t *bbox_data[4];
345  struct SwsContext *sws_ctx;
346  int linesizes[4];
347  int ret = 0;
348  enum AVPixelFormat fmt;
349  int left, top, width, height;
350  int width_idx, height_idx;
352  const AVDetectionBBox *bbox;
354  int max_step[4] = { 0 };
355  av_assert0(sd);
356 
357  /* (scale != 1 and scale != 0) or mean != 0 */
358  if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
359  fabsf(input->mean) > 1e-6f) {
360  av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support "
361  "scale: %f, mean: %f\n", input->scale, input->mean);
362  return AVERROR(ENOSYS);
363  }
364 
365  if (input->layout == DL_NCHW) {
366  av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n");
367  return AVERROR(ENOSYS);
368  }
369 
370  width_idx = dnn_get_width_idx_by_layout(input->layout);
371  height_idx = dnn_get_height_idx_by_layout(input->layout);
372 
373  header = (const AVDetectionBBoxHeader *)sd->data;
374  bbox = av_get_detection_bbox(header, bbox_index);
375 
376  left = bbox->x;
377  width = bbox->w;
378  top = bbox->y;
379  height = bbox->h;
380 
381  fmt = get_pixel_format(input);
382  sws_ctx = sws_getContext(width, height, frame->format,
383  input->dims[width_idx],
384  input->dims[height_idx], fmt,
386  if (!sws_ctx) {
387  av_log(log_ctx, AV_LOG_ERROR, "Failed to create scale context for the conversion "
388  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
390  av_get_pix_fmt_name(fmt),
391  input->dims[width_idx],
392  input->dims[height_idx]);
393  return AVERROR(EINVAL);
394  }
395 
396  ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);
397  if (ret < 0) {
398  av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
399  sws_freeContext(sws_ctx);
400  return ret;
401  }
402 
403  desc = av_pix_fmt_desc_get(frame->format);
404  offsetx[1] = offsetx[2] = AV_CEIL_RSHIFT(left, desc->log2_chroma_w);
405  offsetx[0] = offsetx[3] = left;
406 
407  offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h);
408  offsety[0] = offsety[3] = top;
409 
411  for (int k = 0; frame->data[k]; k++)
412  bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k] * max_step[k];
413 
414  sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize,
415  0, height,
416  (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
417 
418  sws_freeContext(sws_ctx);
419 
420  return ret;
421 }
422 
424 {
425  struct SwsContext *sws_ctx;
426  int linesizes[4];
427  int ret = 0, width_idx, height_idx;
429 
430  /* (scale != 1 and scale != 0) or mean != 0 */
431  if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
432  fabsf(input->mean) > 1e-6f) {
433  av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support "
434  "scale: %f, mean: %f\n", input->scale, input->mean);
435  return AVERROR(ENOSYS);
436  }
437 
438  if (input->layout == DL_NCHW) {
439  av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n");
440  return AVERROR(ENOSYS);
441  }
442 
443  width_idx = dnn_get_width_idx_by_layout(input->layout);
444  height_idx = dnn_get_height_idx_by_layout(input->layout);
445 
446  sws_ctx = sws_getContext(frame->width, frame->height, frame->format,
447  input->dims[width_idx],
448  input->dims[height_idx], fmt,
450  if (!sws_ctx) {
451  av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
452  "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
453  av_get_pix_fmt_name(frame->format), frame->width, frame->height,
454  av_get_pix_fmt_name(fmt), input->dims[width_idx],
455  input->dims[height_idx]);
456  return AVERROR(EINVAL);
457  }
458 
459  ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);
460  if (ret < 0) {
461  av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
462  sws_freeContext(sws_ctx);
463  return ret;
464  }
465 
466  sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height,
467  (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
468 
469  sws_freeContext(sws_ctx);
470  return ret;
471 }
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
av_frame_get_side_data
AVFrameSideData * av_frame_get_side_data(const AVFrame *frame, enum AVFrameSideDataType type)
Definition: frame.c:980
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3170
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
sws_freeContext
void sws_freeContext(SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2475
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:403
data
const char data[16]
Definition: mxf.c:149
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
dnn_io_proc.h
AVDetectionBBox::y
int y
Definition: detection_bbox.h:32
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
av_image_copy_plane
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:374
SWS_FAST_BILINEAR
@ SWS_FAST_BILINEAR
Scaler selection options.
Definition: swscale.h:98
dnn_get_width_idx_by_layout
static int dnn_get_width_idx_by_layout(DNNLayout layout)
Definition: dnn_interface.h:197
get_pixel_format
static enum AVPixelFormat get_pixel_format(DNNData *data)
Definition: dnn_io_proc.c:322
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
av_get_detection_bbox
static av_always_inline AVDetectionBBox * av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx)
Definition: detection_bbox.h:84
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
float
float
Definition: af_crystalizer.c:122
av_image_fill_linesizes
int av_image_fill_linesizes(int linesizes[4], enum AVPixelFormat pix_fmt, int width)
Fill plane linesizes for an image with pixel format pix_fmt and width width.
Definition: imgutils.c:89
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
DNNData
Definition: dnn_interface.h:69
DL_NCHW
@ DL_NCHW
Definition: dnn_interface.h:65
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
AV_PIX_FMT_GRAYF32
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:535
if
if(ret)
Definition: filter_design.txt:179
ff_proc_from_frame_to_dnn
int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
Definition: dnn_io_proc.c:182
ff_frame_to_dnn_detect
int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)
Definition: dnn_io_proc.c:423
NULL
#define NULL
Definition: coverity.c:32
AVDetectionBBoxHeader
Definition: detection_bbox.h:56
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
f
f
Definition: af_crystalizer.c:122
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
height
#define height
Definition: dsp.h:85
AVDetectionBBox::w
int w
Definition: detection_bbox.h:33
avpriv_report_missing_feature
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
AVFrameSideData::data
uint8_t * data
Definition: frame.h:267
DNNDataType
DNNDataType
Definition: dnn_interface.h:41
get_datatype_size
static int get_datatype_size(DNNDataType dt)
Definition: dnn_io_proc.c:28
header
static const uint8_t header[24]
Definition: sdr2.c:68
DNN_FLOAT
@ DNN_FLOAT
Definition: dnn_interface.h:41
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
av_image_get_linesize
int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)
Compute the size of an image line with format pix_fmt and width width for the plane plane.
Definition: imgutils.c:76
ret
ret
Definition: filter_design.txt:187
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
AVDetectionBBox::h
int h
Definition: detection_bbox.h:34
DNN_UINT8
@ DNN_UINT8
Definition: dnn_interface.h:41
sws_getContext
SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
Definition: utils.c:2144
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
sws_scale
int attribute_align_arg sws_scale(SwsContext *sws, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
swscale wrapper, so we don't need to export the SwsContext.
Definition: swscale.c:1505
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AVDetectionBBox::x
int x
Distance in pixels from the left/top edge of the frame, together with width and height,...
Definition: detection_bbox.h:31
DCO_RGB
@ DCO_RGB
Definition: dnn_interface.h:46
av_image_fill_max_pixsteps
void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4], const AVPixFmtDescriptor *pixdesc)
Compute the max pixel step for each plane of an image with a format described by pixdesc.
Definition: imgutils.c:35
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
desc
const char * desc
Definition: libsvtav1.c:79
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
mem.h
dnn_get_height_idx_by_layout
static int dnn_get_height_idx_by_layout(DNNLayout layout)
Definition: dnn_interface.h:202
AVFrameSideData
Structure to hold side data for an AVFrame.
Definition: frame.h:265
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
ff_frame_to_dnn_classify
int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx)
Definition: dnn_io_proc.c:340
av_free
#define av_free(p)
Definition: tableprint_vlc.h:33
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
imgutils.h
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVDetectionBBox
Definition: detection_bbox.h:26
width
#define width
Definition: dsp.h:85
DCO_BGR
@ DCO_BGR
Definition: dnn_interface.h:45
SwsContext
Main external API structure.
Definition: swscale.h:182
detection_bbox.h
swscale.h
AV_FRAME_DATA_DETECTION_BBOXES
@ AV_FRAME_DATA_DETECTION_BBOXES
Bounding boxes for object detection and classification, as described by AVDetectionBBoxHeader.
Definition: frame.h:194
av_get_pix_fmt_name
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:3090
ff_proc_from_dnn_to_frame
int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
Definition: dnn_io_proc.c:42