FFmpeg
vf_siti.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Boris Baracaldo
3  * Copyright (c) 2022 Thilo Borgmann
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate Spatial Info (SI) and Temporal Info (TI) scores
25  */
26 
27 #include <math.h>
28 
29 #include "libavutil/imgutils.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/mem.h"
32 #include "libavutil/opt.h"
33 
34 #include "avfilter.h"
35 #include "filters.h"
36 #include "video.h"
37 
38 static const int X_FILTER[9] = {
39  1, 0, -1,
40  2, 0, -2,
41  1, 0, -1
42 };
43 
44 static const int Y_FILTER[9] = {
45  1, 2, 1,
46  0, 0, 0,
47  -1, -2, -1
48 };
49 
50 typedef struct SiTiContext {
51  const AVClass *class;
53  int width, height;
54  uint64_t nb_frames;
55  uint8_t *prev_frame;
56  float max_si;
57  float max_ti;
58  float min_si;
59  float min_ti;
60  float sum_si;
61  float sum_ti;
63  float *motion_matrix;
66 } SiTiContext;
67 
68 static const enum AVPixelFormat pix_fmts[] = {
73 };
74 
76 {
77  // User options but no input data
78  SiTiContext *s = ctx->priv;
79  s->max_si = 0;
80  s->max_ti = 0;
81  return 0;
82 }
83 
85 {
86  SiTiContext *s = ctx->priv;
87 
88  if (s->print_summary) {
89  float avg_si = s->sum_si / s->nb_frames;
90  float avg_ti = s->sum_ti / s->nb_frames;
92  "SITI Summary:\nTotal frames: %"PRId64"\n\n"
93  "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
94  "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
95  s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
96  );
97  }
98 
99  av_freep(&s->prev_frame);
100  av_freep(&s->gradient_matrix);
101  av_freep(&s->motion_matrix);
102 }
103 
105 {
106  // Video input data avilable
107  AVFilterContext *ctx = inlink->dst;
108  SiTiContext *s = ctx->priv;
109  int max_pixsteps[4];
110  size_t pixel_sz;
111  size_t data_sz;
112  size_t gradient_sz;
113  size_t motion_sz;
114 
116  av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
117 
118  // free previous buffers in case they are allocated already
119  av_freep(&s->prev_frame);
120  av_freep(&s->gradient_matrix);
121  av_freep(&s->motion_matrix);
122 
123  s->pixel_depth = max_pixsteps[0];
124  s->width = inlink->w;
125  s->height = inlink->h;
126  pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
127  data_sz = s->width * pixel_sz * s->height;
128 
129  s->prev_frame = av_malloc(data_sz);
130 
131  gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
132  s->gradient_matrix = av_malloc(gradient_sz);
133 
134  motion_sz = s->width * sizeof(float) * s->height;
135  s->motion_matrix = av_malloc(motion_sz);
136 
137  if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
138  return AVERROR(ENOMEM);
139  }
140 
141  return 0;
142 }
143 
144 // Determine whether the video is in full or limited range. If not defined, assume limited.
146 {
147  // If color range not specified, fallback to pixel format
148  if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
149  return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
150  return frame->color_range == AVCOL_RANGE_JPEG;
151 }
152 
153 // Check frame's color range and convert to full range if needed
154 static uint16_t convert_full_range(int factor, uint16_t y)
155 {
156  int shift;
157  int limit_upper;
158  int full_upper;
159  int limit_y;
160 
161  // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
162  shift = 16 * factor;
163  limit_upper = 235 * factor - shift;
164  full_upper = 256 * factor - 1;
165  limit_y = fminf(fmaxf(y - shift, 0), limit_upper);
166  return (full_upper * limit_y / limit_upper);
167 }
168 
169 // Applies sobel convolution
170 static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
171 {
172  double x_conv_sum;
173  double y_conv_sum;
174  float gradient;
175  int ki;
176  int kj;
177  int index;
178  uint16_t data;
179  int filter_width = 3;
180  int filter_size = filter_width * filter_width;
181  int stride = linesize / s->pixel_depth;
182  // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
183  int factor = s->pixel_depth == 1 ? 1 : 4;
184 
185  // Dst matrix is smaller than src since we ignore edges that can't be convolved
186  #define CONVOLVE(bps) \
187  { \
188  uint##bps##_t *vsrc = (uint##bps##_t*)src; \
189  for (int j = 1; j < s->height - 1; j++) { \
190  for (int i = 1; i < s->width - 1; i++) { \
191  x_conv_sum = 0.0; \
192  y_conv_sum = 0.0; \
193  for (int k = 0; k < filter_size; k++) { \
194  ki = k % filter_width - 1; \
195  kj = floor(k / filter_width) - 1; \
196  index = (j + kj) * stride + (i + ki); \
197  data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
198  x_conv_sum += data * X_FILTER[k]; \
199  y_conv_sum += data * Y_FILTER[k]; \
200  } \
201  gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
202  dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
203  } \
204  } \
205  }
206 
207  if (s->pixel_depth == 2) {
208  CONVOLVE(16);
209  } else {
210  CONVOLVE(8);
211  }
212 }
213 
214 // Calculate pixel difference between current and previous frame, and update previous
215 static void calculate_motion(SiTiContext *s, const uint8_t *curr,
216  float *motion_matrix, int linesize)
217 {
218  int stride = linesize / s->pixel_depth;
219  float motion;
220  int curr_index;
221  int prev_index;
222  uint16_t curr_data;
223  // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
224  int factor = s->pixel_depth == 1 ? 1 : 4;
225 
226  // Previous frame is already converted to full range
227  #define CALCULATE(bps) \
228  { \
229  uint##bps##_t *vsrc = (uint##bps##_t*)curr; \
230  uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame; \
231  for (int j = 0; j < s->height; j++) { \
232  for (int i = 0; i < s->width; i++) { \
233  motion = 0; \
234  curr_index = j * stride + i; \
235  prev_index = j * s->width + i; \
236  curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
237  if (s->nb_frames > 1) \
238  motion = curr_data - vdst[prev_index]; \
239  vdst[prev_index] = curr_data; \
240  motion_matrix[j * s->width + i] = motion; \
241  } \
242  } \
243  }
244 
245  if (s->pixel_depth == 2) {
246  CALCULATE(16);
247  } else {
248  CALCULATE(8);
249  }
250 }
251 
252 static float std_deviation(float *img_metrics, int width, int height)
253 {
254  int size = height * width;
255  double mean = 0.0;
256  double sqr_diff = 0;
257 
258  for (int j = 0; j < height; j++)
259  for (int i = 0; i < width; i++)
260  mean += img_metrics[j * width + i];
261 
262  mean /= size;
263 
264  for (int j = 0; j < height; j++) {
265  for (int i = 0; i < width; i++) {
266  float mean_diff = img_metrics[j * width + i] - mean;
267  sqr_diff += (mean_diff * mean_diff);
268  }
269  }
270  sqr_diff = sqr_diff / size;
271  return sqrt(sqr_diff);
272 }
273 
274 static void set_meta(AVDictionary **metadata, const char *key, float d)
275 {
276  char value[128];
277  snprintf(value, sizeof(value), "%0.2f", d);
278  av_dict_set(metadata, key, value, 0);
279 }
280 
282 {
283  AVFilterContext *ctx = inlink->dst;
284  SiTiContext *s = ctx->priv;
285  float si;
286  float ti;
287 
288  s->full_range = is_full_range(frame);
289  s->nb_frames++;
290 
291  // Calculate si and ti
292  convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
293  calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
294  si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
295  ti = std_deviation(s->motion_matrix, s->width, s->height);
296 
297  // Calculate statistics
298  s->max_si = fmaxf(si, s->max_si);
299  s->max_ti = fmaxf(ti, s->max_ti);
300  s->sum_si += si;
301  s->sum_ti += ti;
302  s->min_si = s->nb_frames == 1 ? si : fminf(si, s->min_si);
303  s->min_ti = s->nb_frames == 1 ? ti : fminf(ti, s->min_ti);
304 
305  // Set si ti information in frame metadata
306  set_meta(&frame->metadata, "lavfi.siti.si", si);
307  set_meta(&frame->metadata, "lavfi.siti.ti", ti);
308 
309  return ff_filter_frame(inlink->dst->outputs[0], frame);
310 }
311 
312 #define OFFSET(x) offsetof(SiTiContext, x)
313 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
314 
315 static const AVOption siti_options[] = {
316  { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
317  { NULL }
318 };
319 
321 
323  {
324  .name = "default",
325  .type = AVMEDIA_TYPE_VIDEO,
326  .config_props = config_input,
327  .filter_frame = filter_frame,
328  },
329 };
330 
332  .name = "siti",
333  .description = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
334  .priv_size = sizeof(SiTiContext),
335  .priv_class = &siti_class,
336  .init = init,
337  .uninit = uninit,
342 };
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
avfilter_vf_siti_inputs
static const AVFilterPad avfilter_vf_siti_inputs[]
Definition: vf_siti.c:322
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
SiTiContext::full_range
int full_range
Definition: vf_siti.c:64
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: filters.h:242
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1061
SiTiContext::nb_frames
uint64_t nb_frames
Definition: vf_siti.c:54
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3170
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
calculate_motion
static void calculate_motion(SiTiContext *s, const uint8_t *curr, float *motion_matrix, int linesize)
Definition: vf_siti.c:215
std_deviation
static float std_deviation(float *img_metrics, int width, int height)
Definition: vf_siti.c:252
X_FILTER
static const int X_FILTER[9]
Definition: vf_siti.c:38
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:389
AVCOL_RANGE_JPEG
@ AVCOL_RANGE_JPEG
Full range content.
Definition: pixfmt.h:717
AVOption
AVOption.
Definition: opt.h:429
data
const char data[16]
Definition: mxf.c:149
AV_PIX_FMT_YUV420P10
#define AV_PIX_FMT_YUV420P10
Definition: pixfmt.h:502
convert_full_range
static uint16_t convert_full_range(int factor, uint16_t y)
Definition: vf_siti.c:154
AVDictionary
Definition: dict.c:34
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
video.h
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
CONVOLVE
#define CONVOLVE(bps)
AVCOL_RANGE_NB
@ AVCOL_RANGE_NB
Not part of ABI.
Definition: pixfmt.h:718
CALCULATE
#define CALCULATE(bps)
SiTiContext::prev_frame
uint8_t * prev_frame
Definition: vf_siti.c:55
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_siti.c:104
SiTiContext::height
int height
Definition: vf_siti.c:53
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
is_full_range
static int is_full_range(AVFrame *frame)
Definition: vf_siti.c:145
av_cold
#define av_cold
Definition: attributes.h:90
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
SiTiContext::min_si
float min_si
Definition: vf_siti.c:58
AV_PIX_FMT_YUVJ422P
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:86
float
float
Definition: af_crystalizer.c:122
SiTiContext
Definition: vf_siti.c:50
s
#define s(width, name)
Definition: cbs_vp9.c:198
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_siti.c:68
SiTiContext::sum_si
float sum_si
Definition: vf_siti.c:60
fminf
float fminf(float, float)
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_siti.c:84
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:49
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
key
const char * key
Definition: hwcontext_opencl.c:189
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
Y_FILTER
static const int Y_FILTER[9]
Definition: vf_siti.c:44
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:75
NULL
#define NULL
Definition: coverity.c:32
AV_PIX_FMT_YUVJ420P
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:85
SiTiContext::max_ti
float max_ti
Definition: vf_siti.c:57
SiTiContext::width
int width
Definition: vf_siti.c:53
AV_PIX_FMT_YUV422P10
#define AV_PIX_FMT_YUV422P10
Definition: pixfmt.h:503
OFFSET
#define OFFSET(x)
Definition: vf_siti.c:312
AVCOL_RANGE_UNSPECIFIED
@ AVCOL_RANGE_UNSPECIFIED
Definition: pixfmt.h:683
index
int index
Definition: gxfenc.c:90
set_meta
static void set_meta(AVDictionary **metadata, const char *key, float d)
Definition: vf_siti.c:274
SiTiContext::pixel_depth
int pixel_depth
Definition: vf_siti.c:52
ff_vf_siti
const AVFilter ff_vf_siti
Definition: vf_siti.c:331
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
height
#define height
Definition: dsp.h:85
shift
static int shift(int a, int b)
Definition: bonk.c:261
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
Definition: vf_siti.c:281
fmaxf
float fmaxf(float, float)
size
int size
Definition: twinvq_data.h:10344
SiTiContext::min_ti
float min_ti
Definition: vf_siti.c:59
init
static av_cold int init(AVFilterContext *ctx)
Definition: vf_siti.c:75
AV_LOG_INFO
#define AV_LOG_INFO
Standard information.
Definition: log.h:220
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
SiTiContext::sum_ti
float sum_ti
Definition: vf_siti.c:61
internal.h
value
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default value
Definition: writing_filters.txt:86
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
stride
#define stride
Definition: h264pred_template.c:537
AVFilter
Filter definition.
Definition: avfilter.h:201
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(siti)
SiTiContext::motion_matrix
float * motion_matrix
Definition: vf_siti.c:63
SiTiContext::print_summary
int print_summary
Definition: vf_siti.c:65
SiTiContext::gradient_matrix
float * gradient_matrix
Definition: vf_siti.c:62
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
avfilter.h
AVFILTER_FLAG_METADATA_ONLY
#define AVFILTER_FLAG_METADATA_ONLY
The filter is a "metadata" filter - it does not modify the frame data in any way.
Definition: avfilter.h:168
mean
static float mean(const float *input, int size)
Definition: vf_nnedi.c:866
av_image_fill_max_pixsteps
void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4], const AVPixFmtDescriptor *pixdesc)
Compute the max pixel step for each plane of an image with a format described by pixdesc.
Definition: imgutils.c:35
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
factor
static const int factor[16]
Definition: vf_pp7.c:80
desc
const char * desc
Definition: libsvtav1.c:79
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
mem.h
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Underlying C type is int.
Definition: opt.h:327
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
av_dict_set
int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags)
Set the given entry in *pm, overwriting an existing entry.
Definition: dict.c:88
SiTiContext::max_si
float max_si
Definition: vf_siti.c:56
FLAGS
#define FLAGS
Definition: vf_siti.c:313
imgutils.h
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
siti_options
static const AVOption siti_options[]
Definition: vf_siti.c:315
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
width
#define width
Definition: dsp.h:85
snprintf
#define snprintf
Definition: snprintf.h:34
src
#define src
Definition: vp8dsp.c:248
convolve_sobel
static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
Definition: vf_siti.c:170