FFmpeg
vf_siti.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Boris Baracaldo
3  * Copyright (c) 2022 Thilo Borgmann
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate Spatial Info (SI) and Temporal Info (TI) scores
25  */
26 
27 #include <math.h>
28 
29 #include "libavutil/imgutils.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/opt.h"
32 
33 #include "avfilter.h"
34 #include "internal.h"
35 #include "video.h"
36 
37 static const int X_FILTER[9] = {
38  1, 0, -1,
39  2, 0, -2,
40  1, 0, -1
41 };
42 
43 static const int Y_FILTER[9] = {
44  1, 2, 1,
45  0, 0, 0,
46  -1, -2, -1
47 };
48 
49 typedef struct SiTiContext {
50  const AVClass *class;
52  int width, height;
53  uint64_t nb_frames;
54  uint8_t *prev_frame;
55  float max_si;
56  float max_ti;
57  float min_si;
58  float min_ti;
59  float sum_si;
60  float sum_ti;
62  float *motion_matrix;
65 } SiTiContext;
66 
67 static const enum AVPixelFormat pix_fmts[] = {
72 };
73 
75 {
76  // User options but no input data
77  SiTiContext *s = ctx->priv;
78  s->max_si = 0;
79  s->max_ti = 0;
80  return 0;
81 }
82 
84 {
85  SiTiContext *s = ctx->priv;
86 
87  if (s->print_summary) {
88  float avg_si = s->sum_si / s->nb_frames;
89  float avg_ti = s->sum_ti / s->nb_frames;
91  "SITI Summary:\nTotal frames: %"PRId64"\n\n"
92  "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
93  "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
94  s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
95  );
96  }
97 
98  av_freep(&s->prev_frame);
99  av_freep(&s->gradient_matrix);
100  av_freep(&s->motion_matrix);
101 }
102 
104 {
105  // Video input data avilable
106  AVFilterContext *ctx = inlink->dst;
107  SiTiContext *s = ctx->priv;
108  int max_pixsteps[4];
109  size_t pixel_sz;
110  size_t data_sz;
111  size_t gradient_sz;
112  size_t motion_sz;
113 
115  av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
116 
117  // free previous buffers in case they are allocated already
118  av_freep(&s->prev_frame);
119  av_freep(&s->gradient_matrix);
120  av_freep(&s->motion_matrix);
121 
122  s->pixel_depth = max_pixsteps[0];
123  s->width = inlink->w;
124  s->height = inlink->h;
125  pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
126  data_sz = s->width * pixel_sz * s->height;
127 
128  s->prev_frame = av_malloc(data_sz);
129 
130  gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
131  s->gradient_matrix = av_malloc(gradient_sz);
132 
133  motion_sz = s->width * sizeof(float) * s->height;
134  s->motion_matrix = av_malloc(motion_sz);
135 
136  if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
137  return AVERROR(ENOMEM);
138  }
139 
140  return 0;
141 }
142 
143 // Determine whether the video is in full or limited range. If not defined, assume limited.
145 {
146  // If color range not specified, fallback to pixel format
150 }
151 
152 // Check frame's color range and convert to full range if needed
153 static uint16_t convert_full_range(int factor, uint16_t y)
154 {
155  int shift;
156  int limit_upper;
157  int full_upper;
158  int limit_y;
159 
160  // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
161  shift = 16 * factor;
162  limit_upper = 235 * factor - shift;
163  full_upper = 256 * factor - 1;
164  limit_y = fminf(fmaxf(y - shift, 0), limit_upper);
165  return (full_upper * limit_y / limit_upper);
166 }
167 
168 // Applies sobel convolution
169 static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
170 {
171  double x_conv_sum;
172  double y_conv_sum;
173  float gradient;
174  int ki;
175  int kj;
176  int index;
177  uint16_t data;
178  int filter_width = 3;
179  int filter_size = filter_width * filter_width;
180  int stride = linesize / s->pixel_depth;
181  // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
182  int factor = s->pixel_depth == 1 ? 1 : 4;
183 
184  // Dst matrix is smaller than src since we ignore edges that can't be convolved
185  #define CONVOLVE(bps) \
186  { \
187  uint##bps##_t *vsrc = (uint##bps##_t*)src; \
188  for (int j = 1; j < s->height - 1; j++) { \
189  for (int i = 1; i < s->width - 1; i++) { \
190  x_conv_sum = 0.0; \
191  y_conv_sum = 0.0; \
192  for (int k = 0; k < filter_size; k++) { \
193  ki = k % filter_width - 1; \
194  kj = floor(k / filter_width) - 1; \
195  index = (j + kj) * stride + (i + ki); \
196  data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
197  x_conv_sum += data * X_FILTER[k]; \
198  y_conv_sum += data * Y_FILTER[k]; \
199  } \
200  gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
201  dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
202  } \
203  } \
204  }
205 
206  if (s->pixel_depth == 2) {
207  CONVOLVE(16);
208  } else {
209  CONVOLVE(8);
210  }
211 }
212 
213 // Calculate pixel difference between current and previous frame, and update previous
214 static void calculate_motion(SiTiContext *s, const uint8_t *curr,
215  float *motion_matrix, int linesize)
216 {
217  int stride = linesize / s->pixel_depth;
218  float motion;
219  int curr_index;
220  int prev_index;
221  uint16_t curr_data;
222  // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
223  int factor = s->pixel_depth == 1 ? 1 : 4;
224 
225  // Previous frame is already converted to full range
226  #define CALCULATE(bps) \
227  { \
228  uint##bps##_t *vsrc = (uint##bps##_t*)curr; \
229  uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame; \
230  for (int j = 0; j < s->height; j++) { \
231  for (int i = 0; i < s->width; i++) { \
232  motion = 0; \
233  curr_index = j * stride + i; \
234  prev_index = j * s->width + i; \
235  curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
236  if (s->nb_frames > 1) \
237  motion = curr_data - vdst[prev_index]; \
238  vdst[prev_index] = curr_data; \
239  motion_matrix[j * s->width + i] = motion; \
240  } \
241  } \
242  }
243 
244  if (s->pixel_depth == 2) {
245  CALCULATE(16);
246  } else {
247  CALCULATE(8);
248  }
249 }
250 
251 static float std_deviation(float *img_metrics, int width, int height)
252 {
253  int size = height * width;
254  double mean = 0.0;
255  double sqr_diff = 0;
256 
257  for (int j = 0; j < height; j++)
258  for (int i = 0; i < width; i++)
259  mean += img_metrics[j * width + i];
260 
261  mean /= size;
262 
263  for (int j = 0; j < height; j++) {
264  for (int i = 0; i < width; i++) {
265  float mean_diff = img_metrics[j * width + i] - mean;
266  sqr_diff += (mean_diff * mean_diff);
267  }
268  }
269  sqr_diff = sqr_diff / size;
270  return sqrt(sqr_diff);
271 }
272 
273 static void set_meta(AVDictionary **metadata, const char *key, float d)
274 {
275  char value[128];
276  snprintf(value, sizeof(value), "%0.2f", d);
277  av_dict_set(metadata, key, value, 0);
278 }
279 
281 {
282  AVFilterContext *ctx = inlink->dst;
283  SiTiContext *s = ctx->priv;
284  float si;
285  float ti;
286 
287  s->full_range = is_full_range(frame);
288  s->nb_frames++;
289 
290  // Calculate si and ti
291  convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
292  calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
293  si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
294  ti = std_deviation(s->motion_matrix, s->width, s->height);
295 
296  // Calculate statistics
297  s->max_si = fmaxf(si, s->max_si);
298  s->max_ti = fmaxf(ti, s->max_ti);
299  s->sum_si += si;
300  s->sum_ti += ti;
301  s->min_si = s->nb_frames == 1 ? si : fminf(si, s->min_si);
302  s->min_ti = s->nb_frames == 1 ? ti : fminf(ti, s->min_ti);
303 
304  // Set si ti information in frame metadata
305  set_meta(&frame->metadata, "lavfi.siti.si", si);
306  set_meta(&frame->metadata, "lavfi.siti.ti", ti);
307 
308  return ff_filter_frame(inlink->dst->outputs[0], frame);
309 }
310 
311 #define OFFSET(x) offsetof(SiTiContext, x)
312 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
313 
314 static const AVOption siti_options[] = {
315  { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
316  { NULL }
317 };
318 
320 
322  {
323  .name = "default",
324  .type = AVMEDIA_TYPE_VIDEO,
325  .config_props = config_input,
326  .filter_frame = filter_frame,
327  },
328 };
329 
331  .name = "siti",
332  .description = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
333  .priv_size = sizeof(SiTiContext),
334  .priv_class = &siti_class,
335  .init = init,
336  .uninit = uninit,
341 };
AVFrame::color_range
enum AVColorRange color_range
MPEG vs JPEG YUV range.
Definition: frame.h:623
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
avfilter_vf_siti_inputs
static const AVFilterPad avfilter_vf_siti_inputs[]
Definition: vf_siti.c:321
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
SiTiContext::full_range
int full_range
Definition: vf_siti.c:63
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1018
SiTiContext::nb_frames
uint64_t nb_frames
Definition: vf_siti.c:53
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2962
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: internal.h:162
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
calculate_motion
static void calculate_motion(SiTiContext *s, const uint8_t *curr, float *motion_matrix, int linesize)
Definition: vf_siti.c:214
std_deviation
static float std_deviation(float *img_metrics, int width, int height)
Definition: vf_siti.c:251
X_FILTER
static const int X_FILTER[9]
Definition: vf_siti.c:37
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:344
AVCOL_RANGE_JPEG
@ AVCOL_RANGE_JPEG
Full range content.
Definition: pixfmt.h:683
AVOption
AVOption.
Definition: opt.h:346
data
const char data[16]
Definition: mxf.c:148
AV_PIX_FMT_YUV420P10
#define AV_PIX_FMT_YUV420P10
Definition: pixfmt.h:478
convert_full_range
static uint16_t convert_full_range(int factor, uint16_t y)
Definition: vf_siti.c:153
AVDictionary
Definition: dict.c:34
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
video.h
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:365
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
CONVOLVE
#define CONVOLVE(bps)
AVCOL_RANGE_NB
@ AVCOL_RANGE_NB
Not part of ABI.
Definition: pixfmt.h:684
CALCULATE
#define CALCULATE(bps)
SiTiContext::prev_frame
uint8_t * prev_frame
Definition: vf_siti.c:54
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_siti.c:103
SiTiContext::height
int height
Definition: vf_siti.c:52
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:33
is_full_range
static int is_full_range(AVFrame *frame)
Definition: vf_siti.c:144
av_cold
#define av_cold
Definition: attributes.h:90
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
SiTiContext::min_si
float min_si
Definition: vf_siti.c:57
AV_PIX_FMT_YUVJ422P
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:86
float
float
Definition: af_crystalizer.c:121
SiTiContext
Definition: vf_siti.c:49
width
#define width
s
#define s(width, name)
Definition: cbs_vp9.c:198
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_siti.c:67
SiTiContext::sum_si
float sum_si
Definition: vf_siti.c:59
fminf
float fminf(float, float)
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_siti.c:83
ctx
AVFormatContext * ctx
Definition: movenc.c:48
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
key
const char * key
Definition: hwcontext_opencl.c:189
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
Y_FILTER
static const int Y_FILTER[9]
Definition: vf_siti.c:43
frame
static AVFrame * frame
Definition: demux_decode.c:54
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
AV_PIX_FMT_YUVJ420P
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:85
SiTiContext::max_ti
float max_ti
Definition: vf_siti.c:56
SiTiContext::width
int width
Definition: vf_siti.c:52
AV_PIX_FMT_YUV422P10
#define AV_PIX_FMT_YUV422P10
Definition: pixfmt.h:479
OFFSET
#define OFFSET(x)
Definition: vf_siti.c:311
AVCOL_RANGE_UNSPECIFIED
@ AVCOL_RANGE_UNSPECIFIED
Definition: pixfmt.h:649
index
int index
Definition: gxfenc.c:89
set_meta
static void set_meta(AVDictionary **metadata, const char *key, float d)
Definition: vf_siti.c:273
SiTiContext::pixel_depth
int pixel_depth
Definition: vf_siti.c:51
ff_vf_siti
const AVFilter ff_vf_siti
Definition: vf_siti.c:330
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:106
shift
static int shift(int a, int b)
Definition: bonk.c:262
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
Definition: vf_siti.c:280
fmaxf
float fmaxf(float, float)
size
int size
Definition: twinvq_data.h:10344
AVFrame::format
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames,...
Definition: frame.h:431
height
#define height
SiTiContext::min_ti
float min_ti
Definition: vf_siti.c:58
init
static av_cold int init(AVFilterContext *ctx)
Definition: vf_siti.c:74
AV_LOG_INFO
#define AV_LOG_INFO
Standard information.
Definition: log.h:191
internal.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
SiTiContext::sum_ti
float sum_ti
Definition: vf_siti.c:60
internal.h
value
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default value
Definition: writing_filters.txt:86
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:39
stride
#define stride
Definition: h264pred_template.c:537
AVFilter
Filter definition.
Definition: avfilter.h:166
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(siti)
SiTiContext::motion_matrix
float * motion_matrix
Definition: vf_siti.c:62
SiTiContext::print_summary
int print_summary
Definition: vf_siti.c:64
SiTiContext::gradient_matrix
float * gradient_matrix
Definition: vf_siti.c:61
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
avfilter.h
AVFrame::metadata
AVDictionary * metadata
metadata.
Definition: frame.h:662
AVFILTER_FLAG_METADATA_ONLY
#define AVFILTER_FLAG_METADATA_ONLY
The filter is a "metadata" filter - it does not modify the frame data in any way.
Definition: avfilter.h:133
mean
static float mean(const float *input, int size)
Definition: vf_nnedi.c:862
av_image_fill_max_pixsteps
void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4], const AVPixFmtDescriptor *pixdesc)
Compute the max pixel step for each plane of an image with a format described by pixdesc.
Definition: imgutils.c:35
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
factor
static const int factor[16]
Definition: vf_pp7.c:78
desc
const char * desc
Definition: libsvtav1.c:75
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:251
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
av_dict_set
int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags)
Set the given entry in *pm, overwriting an existing entry.
Definition: dict.c:88
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
SiTiContext::max_si
float max_si
Definition: vf_siti.c:55
d
d
Definition: ffmpeg_filter.c:409
FLAGS
#define FLAGS
Definition: vf_siti.c:312
imgutils.h
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
siti_options
static const AVOption siti_options[]
Definition: vf_siti.c:314
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:389
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
snprintf
#define snprintf
Definition: snprintf.h:34
convolve_sobel
static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
Definition: vf_siti.c:169