FFmpeg
vf_fspp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4  * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  */
22 
23 /**
24  * @file
25  * Fast Simple Post-processing filter
26  * This implementation is based on an algorithm described in
27  * "Aria Nosratinia Embedded Post-Processing for
28  * Enhancement of Compressed Images (1999)"
29  * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30  * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31  * them can be performed once per block, not per pixel. This allows for much
32  * higher speed.
33  *
34  * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35  * project, and ported by Arwa Arif for FFmpeg.
36  */
37 
38 #include "libavutil/emms.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/mem.h"
41 #include "libavutil/mem_internal.h"
42 #include "libavutil/opt.h"
43 #include "libavutil/pixdesc.h"
44 
45 #include "filters.h"
46 #include "qp_table.h"
47 #include "vf_fspp.h"
48 #include "video.h"
49 
50 #define OFFSET(x) offsetof(FSPPContext, x)
51 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
52 static const AVOption fspp_options[] = {
53  { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
54  { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
55  { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
56  { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
57  { NULL }
58 };
59 
61 
62 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
63  { 0, 48, 12, 60, 3, 51, 15, 63, },
64  { 32, 16, 44, 28, 35, 19, 47, 31, },
65  { 8, 56, 4, 52, 11, 59, 7, 55, },
66  { 40, 24, 36, 20, 43, 27, 39, 23, },
67  { 2, 50, 14, 62, 1, 49, 13, 61, },
68  { 34, 18, 46, 30, 33, 17, 45, 29, },
69  { 10, 58, 6, 54, 9, 57, 5, 53, },
70  { 42, 26, 38, 22, 41, 25, 37, 21, },
71 };
72 
73 static const short custom_threshold[64] = {
74 // values (296) can't be too high
75 // -it causes too big quant dependence
76 // or maybe overflow(check), which results in some flashing
77  71, 296, 295, 237, 71, 40, 38, 19,
78  245, 193, 185, 121, 102, 73, 53, 27,
79  158, 129, 141, 107, 97, 73, 50, 26,
80  102, 116, 109, 98, 82, 66, 45, 23,
81  71, 94, 95, 81, 70, 56, 38, 20,
82  56, 77, 74, 66, 56, 44, 30, 15,
83  38, 53, 50, 45, 38, 30, 21, 11,
84  20, 27, 26, 23, 20, 15, 11, 5
85 };
86 
87 //This func reads from 1 slice, 1 and clears 0 & 1
88 static void store_slice_c(uint8_t *dst, int16_t *src,
89  ptrdiff_t dst_stride, ptrdiff_t src_stride,
90  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
91 {
92  int y, x;
93 #define STORE(pos) \
94  temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
95  src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
96  if (temp & 0x100) temp = ~(temp >> 31); \
97  dst[x + pos] = temp;
98 
99  for (y = 0; y < height; y++) {
100  const uint8_t *d = dither[y];
101  for (x = 0; x < width; x += 8) {
102  int temp;
103  STORE(0);
104  STORE(1);
105  STORE(2);
106  STORE(3);
107  STORE(4);
108  STORE(5);
109  STORE(6);
110  STORE(7);
111  }
112  src += src_stride;
113  dst += dst_stride;
114  }
115 }
116 
117 //This func reads from 2 slices, 0 & 2 and clears 2-nd
118 static void store_slice2_c(uint8_t *dst, int16_t *src,
119  ptrdiff_t dst_stride, ptrdiff_t src_stride,
120  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
121 {
122  int y, x;
123 #define STORE2(pos) \
124  temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
125  src[x + pos + 16 * src_stride] = 0; \
126  if (temp & 0x100) temp = ~(temp >> 31); \
127  dst[x + pos] = temp;
128 
129  for (y = 0; y < height; y++) {
130  const uint8_t *d = dither[y];
131  for (x = 0; x < width; x += 8) {
132  int temp;
133  STORE2(0);
134  STORE2(1);
135  STORE2(2);
136  STORE2(3);
137  STORE2(4);
138  STORE2(5);
139  STORE2(6);
140  STORE2(7);
141  }
142  src += src_stride;
143  dst += dst_stride;
144  }
145 }
146 
147 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
148 {
149  int a;
150  for (a = 0; a < 64; a++)
151  thr_adr[a] = q * thr_adr_noq[a];
152 }
153 
154 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
155  int dst_stride, int src_stride,
156  int width, int height,
157  uint8_t *qp_store, int qp_stride, int is_luma)
158 {
159  int x, x0, y, es, qy, t;
160 
161  const int stride = is_luma ? p->temp_stride : (width + 16);
162  const int step = 6 - p->log2_count;
163  const int qpsh = 4 - p->hsub * !is_luma;
164  const int qpsv = 4 - p->vsub * !is_luma;
165 
166  DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
167  int16_t *block = (int16_t *)block_align;
168  int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
169 
170  memset(block3, 0, 4 * 8 * BLOCKSZ);
171 
172  if (!src || !dst) return;
173 
174  for (y = 0; y < height; y++) {
175  int index = 8 + 8 * stride + y * stride;
176  memcpy(p->src + index, src + y * src_stride, width);
177  for (x = 0; x < 8; x++) {
178  p->src[index - x - 1] = p->src[index + x ];
179  p->src[index + width + x ] = p->src[index + width - x - 1];
180  }
181  }
182 
183  for (y = 0; y < 8; y++) {
184  memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
185  memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
186  }
187  //FIXME (try edge emu)
188 
189  for (y = 8; y < 24; y++)
190  memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
191 
192  for (y = step; y < height + 8; y += step) { //step= 1,2
193  const int y1 = y - 8 + step; //l5-7 l4-6;
194  qy = y - 4;
195 
196  if (qy > height - 1) qy = height - 1;
197  if (qy < 0) qy = 0;
198 
199  qy = (qy >> qpsv) * qp_stride;
200  p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
201 
202  for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
203  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
204 
205  if (p->qp)
206  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
207  else
208  for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
209  t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
210 
211  if (t < 0) t = 0; //t always < width-2
212 
213  t = qp_store[qy + (t >> qpsh)];
214  t = ff_norm_qscale(t, p->qscale_type);
215 
216  if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
217  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
218  }
219  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
220  memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
221  memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
222  }
223 
224  es = width + 8 - x0; // 8, ...
225  if (es > 8)
226  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
227 
228  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
229  if (es > 3)
230  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
231 
232  if (!(y1 & 7) && y1) {
233  if (y1 & 8)
234  p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
235  dst_stride, stride, width, 8, 5 - p->log2_count);
236  else
237  p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
238  dst_stride, stride, width, 8, 5 - p->log2_count);
239  }
240  }
241 
242  if (y & 7) { // height % 8 != 0
243  if (y & 8)
244  p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
245  dst_stride, stride, width, y&7, 5 - p->log2_count);
246  else
247  p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
248  dst_stride, stride, width, y&7, 5 - p->log2_count);
249  }
250 }
251 
252 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
253 {
254  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
255  int_simd16_t tmp10, tmp11, tmp12, tmp13;
256  int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
257  int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
258 
259  int16_t *dataptr;
260  int16_t *wsptr;
261  int16_t *threshold;
262  int ctr;
263 
264  dataptr = data;
265  wsptr = output;
266 
267  for (; cnt > 0; cnt -= 2) { //start positions
268  threshold = (int16_t *)thr_adr;//threshold_mtx
269  for (ctr = DCTSIZE; ctr > 0; ctr--) {
270  // Process columns from input, add to output.
271  tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
272  tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
273 
274  tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
275  tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
276 
277  tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
278  tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
279 
280  tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
281  tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
282 
283  // Even part of FDCT
284 
285  tmp10 = tmp0 + tmp3;
286  tmp13 = tmp0 - tmp3;
287  tmp11 = tmp1 + tmp2;
288  tmp12 = tmp1 - tmp2;
289 
290  d0 = tmp10 + tmp11;
291  d4 = tmp10 - tmp11;
292 
293  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
294  d2 = tmp13 + z1;
295  d6 = tmp13 - z1;
296 
297  // Even part of IDCT
298 
299  THRESHOLD(tmp0, d0, threshold[0 * 8]);
300  THRESHOLD(tmp1, d2, threshold[2 * 8]);
301  THRESHOLD(tmp2, d4, threshold[4 * 8]);
302  THRESHOLD(tmp3, d6, threshold[6 * 8]);
303  tmp0 += 2;
304  tmp10 = (tmp0 + tmp2) >> 2;
305  tmp11 = (tmp0 - tmp2) >> 2;
306 
307  tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
308  tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
309 
310  tmp0 = tmp10 + tmp13; //->temps
311  tmp3 = tmp10 - tmp13; //->temps
312  tmp1 = tmp11 + tmp12; //->temps
313  tmp2 = tmp11 - tmp12; //->temps
314 
315  // Odd part of FDCT
316 
317  tmp10 = tmp4 + tmp5;
318  tmp11 = tmp5 + tmp6;
319  tmp12 = tmp6 + tmp7;
320 
321  z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
322  z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
323  z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
324  z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
325 
326  z11 = tmp7 + z3;
327  z13 = tmp7 - z3;
328 
329  d5 = z13 + z2;
330  d3 = z13 - z2;
331  d1 = z11 + z4;
332  d7 = z11 - z4;
333 
334  // Odd part of IDCT
335 
336  THRESHOLD(tmp4, d1, threshold[1 * 8]);
337  THRESHOLD(tmp5, d3, threshold[3 * 8]);
338  THRESHOLD(tmp6, d5, threshold[5 * 8]);
339  THRESHOLD(tmp7, d7, threshold[7 * 8]);
340 
341  //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
342  z13 = tmp6 + tmp5;
343  z10 = (tmp6 - tmp5) << 1;
344  z11 = tmp4 + tmp7;
345  z12 = (tmp4 - tmp7) << 1;
346 
347  tmp7 = (z11 + z13) >> 2; //+2 !
348  tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
349  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
350  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
351  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
352 
353  tmp6 = tmp12 - tmp7;
354  tmp5 = tmp11 - tmp6;
355  tmp4 = tmp10 + tmp5;
356 
357  wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
358  wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
359  wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
360  wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
361  wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
362  wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
363  wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
364  wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
365  //
366  dataptr++; //next column
367  wsptr++;
368  threshold++;
369  }
370  dataptr += 8; //skip each second start pos
371  wsptr += 8;
372  }
373 }
374 
375 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
376 {
377  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
378  int_simd16_t tmp10, tmp11, tmp12, tmp13;
379  int_simd16_t z5, z10, z11, z12, z13;
380  int16_t *outptr;
381  int16_t *wsptr;
382 
383  cnt *= 4;
384  wsptr = workspace;
385  outptr = output_adr;
386  for (; cnt > 0; cnt--) {
387  // Even part
388  //Simd version reads 4x4 block and transposes it
389  tmp10 = wsptr[2] + wsptr[3];
390  tmp11 = wsptr[2] - wsptr[3];
391 
392  tmp13 = wsptr[0] + wsptr[1];
393  tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
394 
395  tmp0 = tmp10 + tmp13; //->temps
396  tmp3 = tmp10 - tmp13; //->temps
397  tmp1 = tmp11 + tmp12;
398  tmp2 = tmp11 - tmp12;
399 
400  // Odd part
401  //Also transpose, with previous:
402  // ---- ---- ||||
403  // ---- ---- idct ||||
404  // ---- ---- ---> ||||
405  // ---- ---- ||||
406  z13 = wsptr[4] + wsptr[5];
407  z10 = wsptr[4] - wsptr[5];
408  z11 = wsptr[6] + wsptr[7];
409  z12 = wsptr[6] - wsptr[7];
410 
411  tmp7 = z11 + z13;
412  tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
413 
414  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
415  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
416  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
417 
418  tmp6 = (tmp12 << 3) - tmp7;
419  tmp5 = (tmp11 << 3) - tmp6;
420  tmp4 = (tmp10 << 3) + tmp5;
421 
422  // Final output stage: descale and write column
423  outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
424  outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
425  outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
426  outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
427  outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
428  outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
429  outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
430  outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
431  outptr++;
432 
433  wsptr += DCTSIZE; // advance pointer to next row
434  }
435 }
436 
437 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
438 {
439  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
440  int_simd16_t tmp10, tmp11, tmp12, tmp13;
441  int_simd16_t z1, z2, z3, z4, z5, z11, z13;
442  int16_t *dataptr;
443 
444  cnt *= 4;
445  // Pass 1: process rows.
446 
447  dataptr = data;
448  for (; cnt > 0; cnt--) {
449  tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
450  tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
451  tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
452  tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
453  tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
454  tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
455  tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
456  tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
457 
458  // Even part
459 
460  tmp10 = tmp0 + tmp3;
461  tmp13 = tmp0 - tmp3;
462  tmp11 = tmp1 + tmp2;
463  tmp12 = tmp1 - tmp2;
464  //Even columns are written first, this leads to different order of columns
465  //in column_fidct(), but they are processed independently, so all ok.
466  //Later in the row_idct() columns readed at the same order.
467  dataptr[2] = tmp10 + tmp11;
468  dataptr[3] = tmp10 - tmp11;
469 
470  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
471  dataptr[0] = tmp13 + z1;
472  dataptr[1] = tmp13 - z1;
473 
474  // Odd part
475 
476  tmp10 = (tmp4 + tmp5) << 2;
477  tmp11 = (tmp5 + tmp6) << 2;
478  tmp12 = (tmp6 + tmp7) << 2;
479 
480  z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
481  z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
482  z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
483  z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
484 
485  z11 = tmp7 + z3;
486  z13 = tmp7 - z3;
487 
488  dataptr[4] = z13 + z2;
489  dataptr[5] = z13 - z2;
490  dataptr[6] = z11 + z4;
491  dataptr[7] = z11 - z4;
492 
493  pixels++; // advance pointer to next column
494  dataptr += DCTSIZE;
495  }
496 }
497 
498 static const enum AVPixelFormat pix_fmts[] = {
506 };
507 
509 {
510  AVFilterContext *ctx = inlink->dst;
511  FSPPContext *fspp = ctx->priv;
512  const int h = FFALIGN(inlink->h + 16, 16);
514 
515  fspp->hsub = desc->log2_chroma_w;
516  fspp->vsub = desc->log2_chroma_h;
517 
518  fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
519  fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
520  fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
521 
522  if (!fspp->temp || !fspp->src)
523  return AVERROR(ENOMEM);
524 
525  fspp->store_slice = store_slice_c;
527  fspp->mul_thrmat = mul_thrmat_c;
529  fspp->row_idct = row_idct_c;
530  fspp->row_fdct = row_fdct_c;
531 
532 #if ARCH_X86
533  ff_fspp_init_x86(fspp);
534 #endif
535 
536  return 0;
537 }
538 
540 {
541  AVFilterContext *ctx = inlink->dst;
542  FSPPContext *fspp = ctx->priv;
543  AVFilterLink *outlink = ctx->outputs[0];
544  AVFrame *out = in;
545 
546  int qp_stride = 0;
547  int8_t *qp_table = NULL;
548  int i, bias;
549  int ret = 0;
550  int custom_threshold_m[64];
551 
552  bias = (1 << 4) + fspp->strength;
553 
554  for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
555  custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
556 
557  for (i = 0; i < 8; i++) {
558  fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
559  |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
560  |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
561  |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
562 
563  fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
564  |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
565  |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
566  |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
567  }
568 
569  if (fspp->qp)
570  fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
571 
572  /* if we are not in a constant user quantizer mode and we don't want to use
573  * the quantizers from the B-frames (B-frames often have a higher QP), we
574  * need to save the qp table from the last non B-frame; this is what the
575  * following code block does */
576  if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
577  ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
578  if (ret < 0) {
579  av_frame_free(&in);
580  return ret;
581  }
582 
583  if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
584  av_freep(&fspp->non_b_qp_table);
585  fspp->non_b_qp_table = qp_table;
586  fspp->non_b_qp_stride = qp_stride;
587  }
588  }
589 
590  if (fspp->log2_count && !ctx->is_disabled) {
591  if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
592  qp_table = fspp->non_b_qp_table;
593  qp_stride = fspp->non_b_qp_stride;
594  }
595 
596  if (qp_table || fspp->qp) {
597  const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
598  const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
599 
600  /* get a new frame if in-place is not possible or if the dimensions
601  * are not multiple of 8 */
602  if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
603  const int aligned_w = FFALIGN(inlink->w, 8);
604  const int aligned_h = FFALIGN(inlink->h, 8);
605 
606  out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
607  if (!out) {
608  av_frame_free(&in);
609  ret = AVERROR(ENOMEM);
610  goto finish;
611  }
613  out->width = in->width;
614  out->height = in->height;
615  }
616 
617  filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
618  inlink->w, inlink->h, qp_table, qp_stride, 1);
619  filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
620  cw, ch, qp_table, qp_stride, 0);
621  filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
622  cw, ch, qp_table, qp_stride, 0);
623  emms_c();
624  }
625  }
626 
627  if (in != out) {
628  if (in->data[3])
629  av_image_copy_plane(out->data[3], out->linesize[3],
630  in ->data[3], in ->linesize[3],
631  inlink->w, inlink->h);
632  av_frame_free(&in);
633  }
634  ret = ff_filter_frame(outlink, out);
635 finish:
636  if (qp_table != fspp->non_b_qp_table)
637  av_freep(&qp_table);
638  return ret;
639 }
640 
642 {
643  FSPPContext *fspp = ctx->priv;
644  av_freep(&fspp->temp);
645  av_freep(&fspp->src);
646  av_freep(&fspp->non_b_qp_table);
647 }
648 
649 static const AVFilterPad fspp_inputs[] = {
650  {
651  .name = "default",
652  .type = AVMEDIA_TYPE_VIDEO,
653  .config_props = config_input,
654  .filter_frame = filter_frame,
655  },
656 };
657 
659  .p.name = "fspp",
660  .p.description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
661  .p.priv_class = &fspp_class,
663  .priv_size = sizeof(FSPPContext),
664  .uninit = uninit,
668 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:116
ff_vf_fspp
const FFFilter ff_vf_fspp
Definition: vf_fspp.c:658
MULTIPLY16H
#define MULTIPLY16H(x, k)
Definition: vf_fspp.h:37
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
FIX_0_707106781
#define FIX_0_707106781
Definition: jfdctfst.c:117
FIX_0_541196100
#define FIX_0_541196100
Definition: jfdctfst.c:116
FSPPContext::column_fidct
void(* column_fidct)(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.h:83
store_slice2_c
static void store_slice2_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:118
vf_fspp.h
qp_table.h
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
FSPPContext::hsub
int hsub
Definition: vf_fspp.h:61
STORE
#define STORE(pos)
mem_internal.h
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: filters.h:242
out
FILE * out
Definition: movenc.c:55
FSPPContext::threshold_mtx_noq
uint64_t threshold_mtx_noq[8 *2]
Definition: vf_fspp.h:56
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1062
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3170
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
FLAGS
#define FLAGS
Definition: vf_fspp.c:51
FSPPContext::store_slice
void(* store_slice)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:73
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:163
FSPPContext::vsub
int vsub
Definition: vf_fspp.h:62
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:403
pixdesc.h
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
AVFrame::width
int width
Definition: frame.h:475
AVOption
AVOption.
Definition: opt.h:429
data
const char data[16]
Definition: mxf.c:149
AV_PIX_FMT_YUV440P
@ AV_PIX_FMT_YUV440P
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:106
FSPPContext::src
uint8_t * src
Definition: vf_fspp.h:67
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:203
ff_norm_qscale
static int ff_norm_qscale(int qscale, enum AVVideoEncParamsType type)
Normalize the qscale factor FIXME Add support for other values of enum AVVideoEncParamsType besides A...
Definition: qp_table.h:39
video.h
FIX_1_082392200
#define FIX_1_082392200
Definition: 4xm.c:160
FSPPContext::row_idct
void(* row_idct)(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.h:86
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:424
av_image_copy_plane
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:374
FIX_2_613125930
#define FIX_2_613125930
Definition: 4xm.c:163
row_fdct_c
static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.c:437
finish
static void finish(void)
Definition: movenc.c:374
BLOCKSZ
#define BLOCKSZ
Definition: vf_fspp.h:29
FIX_0_382683433
#define FIX_0_382683433
Definition: jfdctfst.c:115
fspp_inputs
static const AVFilterPad fspp_inputs[]
Definition: vf_fspp.c:649
custom_threshold
static const short custom_threshold[64]
Definition: vf_fspp.c:73
mul_thrmat_c
static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.c:147
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
FSPPContext::qscale_type
enum AVVideoEncParamsType qscale_type
Definition: vf_fspp.h:65
av_cold
#define av_cold
Definition: attributes.h:90
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
FFFilter
Definition: filters.h:265
column_fidct_c
static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.c:252
AV_PIX_FMT_YUVJ422P
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:86
emms_c
#define emms_c()
Definition: emms.h:63
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
FSPPContext::row_fdct
void(* row_fdct)(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.h:89
DCTSIZE
#define DCTSIZE
Definition: jfdctfst.c:73
FSPPContext::non_b_qp_table
int8_t * non_b_qp_table
Definition: vf_fspp.h:69
filters.h
FSPPContext::non_b_qp_stride
int non_b_qp_stride
Definition: vf_fspp.h:70
ctx
AVFormatContext * ctx
Definition: movenc.c:49
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_fspp.c:508
store_slice_c
static void store_slice_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:88
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
FSPPContext::log2_count
int log2_count
Definition: vf_fspp.h:59
AV_PIX_FMT_YUVJ444P
@ AV_PIX_FMT_YUVJ444P
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:87
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:726
bias
static int bias(int x, int c)
Definition: vqcdec.c:115
FSPPContext::qp
int qp
Definition: vf_fspp.h:64
AV_PIX_FMT_YUVJ420P
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:85
FIX_1_306562965
#define FIX_1_306562965
Definition: jfdctfst.c:118
STORE2
#define STORE2(pos)
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
index
int index
Definition: gxfenc.c:90
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_fspp.c:498
MAX_LEVEL
#define MAX_LEVEL
Definition: rl.h:36
AVFrame::pict_type
enum AVPictureType pict_type
Picture type of the frame.
Definition: frame.h:505
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
height
#define height
Definition: dsp.h:85
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:104
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
FSPPContext::strength
int strength
Definition: vf_fspp.h:60
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_fspp.c:641
av_frame_is_writable
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:662
OFFSET
#define OFFSET(x)
Definition: vf_fspp.c:50
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
fspp_options
static const AVOption fspp_options[]
Definition: vf_fspp.c:52
row_idct_c
static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.c:375
FSPPContext::threshold_mtx
uint64_t threshold_mtx[8 *2]
Definition: vf_fspp.h:57
FIX_1_847759065
#define FIX_1_847759065
Definition: 4xm.c:162
emms.h
FSPPContext::temp_stride
int temp_stride
Definition: vf_fspp.h:63
FSPPContext::use_bframe_qp
int use_bframe_qp
Definition: vf_fspp.h:71
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
AV_PIX_FMT_YUVJ440P
@ AV_PIX_FMT_YUVJ440P
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
Definition: pixfmt.h:107
FIX_1_414213562
#define FIX_1_414213562
Definition: 4xm.c:161
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
DESCALE
#define DESCALE(x, n)
Definition: jfdctfst.c:134
stride
#define stride
Definition: h264pred_template.c:537
ret
ret
Definition: filter_design.txt:187
ff_qp_table_extract
int ff_qp_table_extract(AVFrame *frame, int8_t **table, int *table_w, int *table_h, enum AVVideoEncParamsType *qscale_type)
Extract a libpostproc-compatible QP table - an 8-bit QP value per 16x16 macroblock,...
Definition: qp_table.c:27
AVFrame::height
int height
Definition: frame.h:475
FSPPContext
Definition: vf_fspp.h:54
ff_fspp_init_x86
void ff_fspp_init_x86(FSPPContext *fspp)
Definition: vf_fspp_init.c:37
FIX_1_414213562_A
static const int16_t FIX_1_414213562_A
Definition: vf_fspp.h:48
AV_PICTURE_TYPE_B
@ AV_PICTURE_TYPE_B
Bi-dir predicted.
Definition: avutil.h:281
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
filter
static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma)
Definition: vf_fspp.c:154
FSPPContext::mul_thrmat
void(* mul_thrmat)(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.h:81
temp
else temp
Definition: vf_mcdeint.c:263
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AVFilterContext
An instance of a filter.
Definition: avfilter.h:257
FSPPContext::store_slice2
void(* store_slice2)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:77
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
desc
const char * desc
Definition: libsvtav1.c:79
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFFilter::p
AVFilter p
The public AVFilter.
Definition: filters.h:269
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
mem.h
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
FSPPContext::temp
int16_t * temp
Definition: vf_fspp.h:68
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: vf_fspp.c:539
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Underlying C type is int.
Definition: opt.h:327
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
FSPPContext::prev_q
int prev_q
Definition: vf_fspp.h:66
int32_t
int32_t
Definition: audioconvert.c:56
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:188
imgutils.h
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:448
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2070
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(fspp)
int_simd16_t
int32_t int_simd16_t
Definition: vf_fspp.h:43
width
#define width
Definition: dsp.h:85
src
#define src
Definition: vp8dsp.c:248
THRESHOLD
#define THRESHOLD(r, x, t)
Definition: vf_fspp.h:38
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:62