FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
postprocess.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3  *
4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * postprocessing.
26  */
27 
28 /*
29  C MMX MMX2 3DNow AltiVec
30 isVertDC Ec Ec Ec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
34 isHorizDC Ec Ec Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
39 deRing E e e* Ecp
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
42 Vertical X1# a E E
43 Horizontal X1# a E E
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
49 
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
57 */
58 
59 /*
60 TODO:
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66  (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
68 split this huge file
69 optimize c versions
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 ...
72 */
73 
74 //Changelog: use git log
75 
76 #include "config.h"
77 #include "libavutil/avutil.h"
78 #include "libavutil/avassert.h"
79 #include <inttypes.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 //#undef HAVE_MMXEXT_INLINE
84 //#define HAVE_AMD3DNOW_INLINE
85 //#undef HAVE_MMX_INLINE
86 //#undef ARCH_X86
87 //#define DEBUG_BRIGHTNESS
88 #include "postprocess.h"
89 #include "postprocess_internal.h"
90 #include "libavutil/avstring.h"
91 
92 #include "libavutil/ffversion.h"
93 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
94 
95 unsigned postproc_version(void)
96 {
99 }
100 
101 const char *postproc_configuration(void)
102 {
103  return FFMPEG_CONFIGURATION;
104 }
105 
106 const char *postproc_license(void)
107 {
108 #define LICENSE_PREFIX "libpostproc license: "
109  return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
110 }
111 
112 #if HAVE_ALTIVEC_H
113 #include <altivec.h>
114 #endif
115 
116 #define GET_MODE_BUFFER_SIZE 500
117 #define OPTIONS_ARRAY_SIZE 10
118 #define BLOCK_SIZE 8
119 #define TEMP_STRIDE 8
120 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
121 
122 #if ARCH_X86 && HAVE_INLINE_ASM
123 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
124 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
125 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
126 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
127 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
128 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
129 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
130 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
131 #endif
132 
133 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
134 
135 
136 static const struct PPFilter filters[]=
137 {
138  {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
139  {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
140 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
141  {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
142  {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
143  {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
144  {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
145  {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
146  {"dr", "dering", 1, 5, 6, DERING},
147  {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
148  {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
149  {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
150  {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
151  {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
152  {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
153  {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
154  {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
155  {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
156  {"be", "bitexact", 1, 0, 0, BITEXACT},
157  {"vi", "visualize", 1, 0, 0, VISUALIZE},
158  {NULL, NULL,0,0,0,0} //End Marker
159 };
160 
161 static const char * const replaceTable[]=
162 {
163  "default", "hb:a,vb:a,dr:a",
164  "de", "hb:a,vb:a,dr:a",
165  "fast", "h1:a,v1:a,dr:a",
166  "fa", "h1:a,v1:a,dr:a",
167  "ac", "ha:a:128:7,va:a,dr:a",
168  NULL //End Marker
169 };
170 
171 
172 #if ARCH_X86 && HAVE_INLINE_ASM
173 static inline void prefetchnta(const void *p)
174 {
175  __asm__ volatile( "prefetchnta (%0)\n\t"
176  : : "r" (p)
177  );
178 }
179 
180 static inline void prefetcht0(const void *p)
181 {
182  __asm__ volatile( "prefetcht0 (%0)\n\t"
183  : : "r" (p)
184  );
185 }
186 
187 static inline void prefetcht1(const void *p)
188 {
189  __asm__ volatile( "prefetcht1 (%0)\n\t"
190  : : "r" (p)
191  );
192 }
193 
194 static inline void prefetcht2(const void *p)
195 {
196  __asm__ volatile( "prefetcht2 (%0)\n\t"
197  : : "r" (p)
198  );
199 }
200 #endif
201 
202 /* The horizontal functions exist only in C because the MMX
203  * code is faster with vertical filters and transposing. */
204 
205 /**
206  * Check if the given 8x8 Block is mostly "flat"
207  */
208 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
209 {
210  int numEq= 0;
211  int y;
212  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
213  const int dcThreshold= dcOffset*2 + 1;
214 
215  for(y=0; y<BLOCK_SIZE; y++){
216  numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
217  numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
218  numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
219  numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
220  numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
221  numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
222  numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
223  src+= stride;
224  }
225  return numEq > c->ppMode.flatnessThreshold;
226 }
227 
228 /**
229  * Check if the middle 8x8 Block in the given 8x16 block is flat
230  */
231 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
232 {
233  int numEq= 0;
234  int y;
235  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
236  const int dcThreshold= dcOffset*2 + 1;
237 
238  src+= stride*4; // src points to begin of the 8x8 Block
239  for(y=0; y<BLOCK_SIZE-1; y++){
240  numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
241  numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
242  numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
243  numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
244  numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
245  numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
246  numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
247  numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
248  src+= stride;
249  }
250  return numEq > c->ppMode.flatnessThreshold;
251 }
252 
253 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
254 {
255  int i;
256  for(i=0; i<2; i++){
257  if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
258  src += stride;
259  if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
260  src += stride;
261  if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
262  src += stride;
263  if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
264  src += stride;
265  }
266  return 1;
267 }
268 
269 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
270 {
271  int x;
272  src+= stride*4;
273  for(x=0; x<BLOCK_SIZE; x+=4){
274  if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
275  if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
276  if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
277  if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
278  }
279  return 1;
280 }
281 
282 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
283 {
284  if( isHorizDC_C(src, stride, c) ){
285  return isHorizMinMaxOk_C(src, stride, c->QP);
286  }else{
287  return 2;
288  }
289 }
290 
291 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
292 {
293  if( isVertDC_C(src, stride, c) ){
294  return isVertMinMaxOk_C(src, stride, c->QP);
295  }else{
296  return 2;
297  }
298 }
299 
300 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
301 {
302  int y;
303  for(y=0; y<BLOCK_SIZE; y++){
304  const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
305 
306  if(FFABS(middleEnergy) < 8*c->QP){
307  const int q=(dst[3] - dst[4])/2;
308  const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
309  const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
310 
311  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
312  d= FFMAX(d, 0);
313 
314  d= (5*d + 32) >> 6;
315  d*= FFSIGN(-middleEnergy);
316 
317  if(q>0)
318  {
319  d = FFMAX(d, 0);
320  d = FFMIN(d, q);
321  }
322  else
323  {
324  d = FFMIN(d, 0);
325  d = FFMAX(d, q);
326  }
327 
328  dst[3]-= d;
329  dst[4]+= d;
330  }
331  dst+= stride;
332  }
333 }
334 
335 /**
336  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
337  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
338  */
339 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
340 {
341  int y;
342  for(y=0; y<BLOCK_SIZE; y++){
343  const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
344  const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
345 
346  int sums[10];
347  sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
348  sums[1] = sums[0] - first + dst[3];
349  sums[2] = sums[1] - first + dst[4];
350  sums[3] = sums[2] - first + dst[5];
351  sums[4] = sums[3] - first + dst[6];
352  sums[5] = sums[4] - dst[0] + dst[7];
353  sums[6] = sums[5] - dst[1] + last;
354  sums[7] = sums[6] - dst[2] + last;
355  sums[8] = sums[7] - dst[3] + last;
356  sums[9] = sums[8] - dst[4] + last;
357 
358  dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
359  dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
360  dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
361  dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
362  dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
363  dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
364  dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
365  dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
366 
367  dst+= stride;
368  }
369 }
370 
371 /**
372  * Experimental Filter 1 (Horizontal)
373  * will not damage linear gradients
374  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
375  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
376  * MMX2 version does correct clipping C version does not
377  * not identical with the vertical one
378  */
379 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
380 {
381  int y;
382  static uint64_t lut[256];
383  if(!lut[255])
384  {
385  int i;
386  for(i=0; i<256; i++)
387  {
388  int v= i < 128 ? 2*i : 2*(i-256);
389 /*
390 //Simulate 112242211 9-Tap filter
391  uint64_t a= (v/16) & 0xFF;
392  uint64_t b= (v/8) & 0xFF;
393  uint64_t c= (v/4) & 0xFF;
394  uint64_t d= (3*v/8) & 0xFF;
395 */
396 //Simulate piecewise linear interpolation
397  uint64_t a= (v/16) & 0xFF;
398  uint64_t b= (v*3/16) & 0xFF;
399  uint64_t c= (v*5/16) & 0xFF;
400  uint64_t d= (7*v/16) & 0xFF;
401  uint64_t A= (0x100 - a)&0xFF;
402  uint64_t B= (0x100 - b)&0xFF;
403  uint64_t C= (0x100 - c)&0xFF;
404  uint64_t D= (0x100 - c)&0xFF;
405 
406  lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
407  (D<<24) | (C<<16) | (B<<8) | (A);
408  //lut[i] = (v<<32) | (v<<24);
409  }
410  }
411 
412  for(y=0; y<BLOCK_SIZE; y++){
413  int a= src[1] - src[2];
414  int b= src[3] - src[4];
415  int c= src[5] - src[6];
416 
417  int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
418 
419  if(d < QP){
420  int v = d * FFSIGN(-b);
421 
422  src[1] +=v/8;
423  src[2] +=v/4;
424  src[3] +=3*v/8;
425  src[4] -=3*v/8;
426  src[5] -=v/4;
427  src[6] -=v/8;
428  }
429  src+=stride;
430  }
431 }
432 
433 /**
434  * accurate deblock filter
435  */
437  int stride, const PPContext *c, int mode)
438 {
439  int y;
440  const int QP= c->QP;
441  const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
442  const int dcThreshold= dcOffset*2 + 1;
443 //START_TIMER
444  src+= step*4; // src points to begin of the 8x8 Block
445  for(y=0; y<8; y++){
446  int numEq= 0;
447 
448  numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
449  numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
450  numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
451  numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
452  numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
453  numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
454  numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
455  numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
456  numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
457  if(numEq > c->ppMode.flatnessThreshold){
458  int min, max, x;
459 
460  if(src[0] > src[step]){
461  max= src[0];
462  min= src[step];
463  }else{
464  max= src[step];
465  min= src[0];
466  }
467  for(x=2; x<8; x+=2){
468  if(src[x*step] > src[(x+1)*step]){
469  if(src[x *step] > max) max= src[ x *step];
470  if(src[(x+1)*step] < min) min= src[(x+1)*step];
471  }else{
472  if(src[(x+1)*step] > max) max= src[(x+1)*step];
473  if(src[ x *step] < min) min= src[ x *step];
474  }
475  }
476  if(max-min < 2*QP){
477  const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
478  const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
479 
480  int sums[10];
481  sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
482  sums[1] = sums[0] - first + src[3*step];
483  sums[2] = sums[1] - first + src[4*step];
484  sums[3] = sums[2] - first + src[5*step];
485  sums[4] = sums[3] - first + src[6*step];
486  sums[5] = sums[4] - src[0*step] + src[7*step];
487  sums[6] = sums[5] - src[1*step] + last;
488  sums[7] = sums[6] - src[2*step] + last;
489  sums[8] = sums[7] - src[3*step] + last;
490  sums[9] = sums[8] - src[4*step] + last;
491 
492  if (mode & VISUALIZE) {
493  src[0*step] =
494  src[1*step] =
495  src[2*step] =
496  src[3*step] =
497  src[4*step] =
498  src[5*step] =
499  src[6*step] =
500  src[7*step] = 128;
501  }
502  src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
503  src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
504  src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
505  src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
506  src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
507  src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
508  src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
509  src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
510  }
511  }else{
512  const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
513 
514  if(FFABS(middleEnergy) < 8*QP){
515  const int q=(src[3*step] - src[4*step])/2;
516  const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
517  const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
518 
519  int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
520  d= FFMAX(d, 0);
521 
522  d= (5*d + 32) >> 6;
523  d*= FFSIGN(-middleEnergy);
524 
525  if(q>0){
526  d = FFMAX(d, 0);
527  d = FFMIN(d, q);
528  }else{
529  d = FFMIN(d, 0);
530  d = FFMAX(d, q);
531  }
532 
533  if ((mode & VISUALIZE) && d) {
534  d= (d < 0) ? 32 : -32;
535  src[3*step]= av_clip_uint8(src[3*step] - d);
536  src[4*step]= av_clip_uint8(src[4*step] + d);
537  d = 0;
538  }
539 
540  src[3*step]-= d;
541  src[4*step]+= d;
542  }
543  }
544 
545  src += stride;
546  }
547 /*if(step==16){
548  STOP_TIMER("step16")
549 }else{
550  STOP_TIMER("stepX")
551 }*/
552 }
553 
554 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
555 //Plain C versions
556 //we always compile C for testing which needs bitexactness
557 #define TEMPLATE_PP_C 1
558 #include "postprocess_template.c"
559 
560 #if HAVE_ALTIVEC
561 # define TEMPLATE_PP_ALTIVEC 1
563 # include "postprocess_template.c"
564 #endif
565 
566 #if ARCH_X86 && HAVE_INLINE_ASM
567 # if CONFIG_RUNTIME_CPUDETECT
568 # define TEMPLATE_PP_MMX 1
569 # include "postprocess_template.c"
570 # define TEMPLATE_PP_MMXEXT 1
571 # include "postprocess_template.c"
572 # define TEMPLATE_PP_3DNOW 1
573 # include "postprocess_template.c"
574 # define TEMPLATE_PP_SSE2 1
575 # include "postprocess_template.c"
576 # else
577 # if HAVE_SSE2_INLINE
578 # define TEMPLATE_PP_SSE2 1
579 # include "postprocess_template.c"
580 # elif HAVE_MMXEXT_INLINE
581 # define TEMPLATE_PP_MMXEXT 1
582 # include "postprocess_template.c"
583 # elif HAVE_AMD3DNOW_INLINE
584 # define TEMPLATE_PP_3DNOW 1
585 # include "postprocess_template.c"
586 # elif HAVE_MMX_INLINE
587 # define TEMPLATE_PP_MMX 1
588 # include "postprocess_template.c"
589 # endif
590 # endif
591 #endif
592 
593 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
594  const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
595 
596 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
597  const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
598 {
599  pp_fn pp = postProcess_C;
600  PPContext *c= (PPContext *)vc;
601  PPMode *ppMode= (PPMode *)vm;
602  c->ppMode= *ppMode; //FIXME
603 
604  if (!(ppMode->lumMode & BITEXACT)) {
605 #if CONFIG_RUNTIME_CPUDETECT
606 #if ARCH_X86 && HAVE_INLINE_ASM
607  // ordered per speed fastest first
608  if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
609  else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
610  else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
611  else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
612 #elif HAVE_ALTIVEC
613  if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
614 #endif
615 #else /* CONFIG_RUNTIME_CPUDETECT */
616 #if HAVE_SSE2_INLINE
617  pp = postProcess_SSE2;
618 #elif HAVE_MMXEXT_INLINE
619  pp = postProcess_MMX2;
620 #elif HAVE_AMD3DNOW_INLINE
621  pp = postProcess_3DNow;
622 #elif HAVE_MMX_INLINE
623  pp = postProcess_MMX;
624 #elif HAVE_ALTIVEC
625  pp = postProcess_altivec;
626 #endif
627 #endif /* !CONFIG_RUNTIME_CPUDETECT */
628  }
629 
630  pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
631 }
632 
633 /* -pp Command line Help
634 */
635 const char pp_help[] =
636 "Available postprocessing filters:\n"
637 "Filters Options\n"
638 "short long name short long option Description\n"
639 "* * a autoq CPU power dependent enabler\n"
640 " c chrom chrominance filtering enabled\n"
641 " y nochrom chrominance filtering disabled\n"
642 " n noluma luma filtering disabled\n"
643 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
644 " 1. difference factor: default=32, higher -> more deblocking\n"
645 " 2. flatness threshold: default=39, lower -> more deblocking\n"
646 " the h & v deblocking filters share these\n"
647 " so you can't set different thresholds for h / v\n"
648 "vb vdeblock (2 threshold) vertical deblocking filter\n"
649 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
650 "va vadeblock (2 threshold) vertical deblocking filter\n"
651 "h1 x1hdeblock experimental h deblock filter 1\n"
652 "v1 x1vdeblock experimental v deblock filter 1\n"
653 "dr dering deringing filter\n"
654 "al autolevels automatic brightness / contrast\n"
655 " f fullyrange stretch luminance to (0..255)\n"
656 "lb linblenddeint linear blend deinterlacer\n"
657 "li linipoldeint linear interpolating deinterlace\n"
658 "ci cubicipoldeint cubic interpolating deinterlacer\n"
659 "md mediandeint median deinterlacer\n"
660 "fd ffmpegdeint ffmpeg deinterlacer\n"
661 "l5 lowpass5 FIR lowpass deinterlacer\n"
662 "de default hb:a,vb:a,dr:a\n"
663 "fa fast h1:a,v1:a,dr:a\n"
664 "ac ha:a:128:7,va:a,dr:a\n"
665 "tn tmpnoise (3 threshold) temporal noise reducer\n"
666 " 1. <= 2. <= 3. larger -> stronger filtering\n"
667 "fq forceQuant <quantizer> force quantizer\n"
668 "Usage:\n"
669 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
670 "long form example:\n"
671 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
672 "short form example:\n"
673 "vb:a/hb:a/lb de,-vb\n"
674 "more examples:\n"
675 "tn:64:128:256\n"
676 "\n"
677 ;
678 
679 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
680 {
682  char *p= temp;
683  static const char filterDelimiters[] = ",/";
684  static const char optionDelimiters[] = ":|";
685  struct PPMode *ppMode;
686  char *filterToken;
687 
688  if (!name) {
689  av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
690  return NULL;
691  }
692 
693  if (!strcmp(name, "help")) {
694  const char *p;
695  for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
696  av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
697  av_log(NULL, AV_LOG_INFO, "%s", temp);
698  }
699  return NULL;
700  }
701 
702  ppMode= av_malloc(sizeof(PPMode));
703  if (!ppMode)
704  return NULL;
705 
706  ppMode->lumMode= 0;
707  ppMode->chromMode= 0;
708  ppMode->maxTmpNoise[0]= 700;
709  ppMode->maxTmpNoise[1]= 1500;
710  ppMode->maxTmpNoise[2]= 3000;
711  ppMode->maxAllowedY= 234;
712  ppMode->minAllowedY= 16;
713  ppMode->baseDcDiff= 256/8;
714  ppMode->flatnessThreshold= 56-16-1;
715  ppMode->maxClippedThreshold= 0.01;
716  ppMode->error=0;
717 
718  memset(temp, 0, GET_MODE_BUFFER_SIZE);
719  av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
720 
721  av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
722 
723  for(;;){
724  const char *filterName;
725  int q= 1000000; //PP_QUALITY_MAX;
726  int chrom=-1;
727  int luma=-1;
728  const char *option;
729  const char *options[OPTIONS_ARRAY_SIZE];
730  int i;
731  int filterNameOk=0;
732  int numOfUnknownOptions=0;
733  int enable=1; //does the user want us to enabled or disabled the filter
734  char *tokstate;
735 
736  filterToken= av_strtok(p, filterDelimiters, &tokstate);
737  if(!filterToken) break;
738  p+= strlen(filterToken) + 1; // p points to next filterToken
739  filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
740  if (!filterName) {
741  ppMode->error++;
742  break;
743  }
744  av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
745 
746  if(*filterName == '-'){
747  enable=0;
748  filterName++;
749  }
750 
751  for(;;){ //for all options
752  option= av_strtok(NULL, optionDelimiters, &tokstate);
753  if(!option) break;
754 
755  av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
756  if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
757  else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
758  else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
759  else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
760  else{
761  options[numOfUnknownOptions] = option;
762  numOfUnknownOptions++;
763  }
764  if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
765  }
766  options[numOfUnknownOptions] = NULL;
767 
768  /* replace stuff from the replace Table */
769  for(i=0; replaceTable[2*i]; i++){
770  if(!strcmp(replaceTable[2*i], filterName)){
771  int newlen= strlen(replaceTable[2*i + 1]);
772  int plen;
773  int spaceLeft;
774 
775  p--, *p=',';
776 
777  plen= strlen(p);
778  spaceLeft= p - temp + plen;
779  if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
780  ppMode->error++;
781  break;
782  }
783  memmove(p + newlen, p, plen+1);
784  memcpy(p, replaceTable[2*i + 1], newlen);
785  filterNameOk=1;
786  }
787  }
788 
789  for(i=0; filters[i].shortName; i++){
790  if( !strcmp(filters[i].longName, filterName)
791  || !strcmp(filters[i].shortName, filterName)){
792  ppMode->lumMode &= ~filters[i].mask;
793  ppMode->chromMode &= ~filters[i].mask;
794 
795  filterNameOk=1;
796  if(!enable) break; // user wants to disable it
797 
798  if(q >= filters[i].minLumQuality && luma)
799  ppMode->lumMode|= filters[i].mask;
800  if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
801  if(q >= filters[i].minChromQuality)
802  ppMode->chromMode|= filters[i].mask;
803 
804  if(filters[i].mask == LEVEL_FIX){
805  int o;
806  ppMode->minAllowedY= 16;
807  ppMode->maxAllowedY= 234;
808  for(o=0; options[o]; o++){
809  if( !strcmp(options[o],"fullyrange")
810  ||!strcmp(options[o],"f")){
811  ppMode->minAllowedY= 0;
812  ppMode->maxAllowedY= 255;
813  numOfUnknownOptions--;
814  }
815  }
816  }
817  else if(filters[i].mask == TEMP_NOISE_FILTER)
818  {
819  int o;
820  int numOfNoises=0;
821 
822  for(o=0; options[o]; o++){
823  char *tail;
824  ppMode->maxTmpNoise[numOfNoises]=
825  strtol(options[o], &tail, 0);
826  if(tail!=options[o]){
827  numOfNoises++;
828  numOfUnknownOptions--;
829  if(numOfNoises >= 3) break;
830  }
831  }
832  }
833  else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
834  || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
835  int o;
836 
837  for(o=0; options[o] && o<2; o++){
838  char *tail;
839  int val= strtol(options[o], &tail, 0);
840  if(tail==options[o]) break;
841 
842  numOfUnknownOptions--;
843  if(o==0) ppMode->baseDcDiff= val;
844  else ppMode->flatnessThreshold= val;
845  }
846  }
847  else if(filters[i].mask == FORCE_QUANT){
848  int o;
849  ppMode->forcedQuant= 15;
850 
851  for(o=0; options[o] && o<1; o++){
852  char *tail;
853  int val= strtol(options[o], &tail, 0);
854  if(tail==options[o]) break;
855 
856  numOfUnknownOptions--;
857  ppMode->forcedQuant= val;
858  }
859  }
860  }
861  }
862  if(!filterNameOk) ppMode->error++;
863  ppMode->error += numOfUnknownOptions;
864  }
865 
866  av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
867  if(ppMode->error){
868  av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
869  av_free(ppMode);
870  return NULL;
871  }
872  return ppMode;
873 }
874 
876  av_free(mode);
877 }
878 
879 static void reallocAlign(void **p, int size){
880  av_free(*p);
881  *p= av_mallocz(size);
882 }
883 
884 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
885  int mbWidth = (width+15)>>4;
886  int mbHeight= (height+15)>>4;
887  int i;
888 
889  c->stride= stride;
890  c->qpStride= qpStride;
891 
892  reallocAlign((void **)&c->tempDst, stride*24+32);
893  reallocAlign((void **)&c->tempSrc, stride*24);
894  reallocAlign((void **)&c->tempBlocks, 2*16*8);
895  reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
896  for(i=0; i<256; i++)
897  c->yHistogram[i]= width*height/64*15/256;
898 
899  for(i=0; i<3; i++){
900  //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
901  reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
902  reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
903  }
904 
905  reallocAlign((void **)&c->deintTemp, 2*width+32);
906  reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
907  reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
908  reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
909 }
910 
911 static const char * context_to_name(void * ptr) {
912  return "postproc";
913 }
914 
915 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
916 
917 av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){
918  PPContext *c= av_mallocz(sizeof(PPContext));
919  int stride= FFALIGN(width, 16); //assumed / will realloc if needed
920  int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
921 
922  if (!c)
923  return NULL;
924 
926  if(cpuCaps&PP_FORMAT){
927  c->hChromaSubSample= cpuCaps&0x3;
928  c->vChromaSubSample= (cpuCaps>>4)&0x3;
929  }else{
930  c->hChromaSubSample= 1;
931  c->vChromaSubSample= 1;
932  }
933  if (cpuCaps & PP_CPU_CAPS_AUTO) {
934  c->cpuCaps = av_get_cpu_flags();
935  } else {
936  c->cpuCaps = 0;
937  if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
938  if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
939  if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
940  if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
941  }
942 
943  reallocBuffers(c, width, height, stride, qpStride);
944 
945  c->frameNum=-1;
946 
947  return c;
948 }
949 
950 av_cold void pp_free_context(void *vc){
951  PPContext *c = (PPContext*)vc;
952  int i;
953 
954  for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
955  av_free(c->tempBlurred[i]);
956  for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
957  av_free(c->tempBlurredPast[i]);
958 
959  av_free(c->tempBlocks);
960  av_free(c->yHistogram);
961  av_free(c->tempDst);
962  av_free(c->tempSrc);
963  av_free(c->deintTemp);
964  av_free(c->stdQPTable);
965  av_free(c->nonBQPTable);
967 
968  memset(c, 0, sizeof(PPContext));
969 
970  av_free(c);
971 }
972 
973 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
974  uint8_t * dst[3], const int dstStride[3],
975  int width, int height,
976  const QP_STORE_T *QP_store, int QPStride,
977  pp_mode *vm, void *vc, int pict_type)
978 {
979  int mbWidth = (width+15)>>4;
980  int mbHeight= (height+15)>>4;
981  PPMode *mode = vm;
982  PPContext *c = vc;
983  int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
984  int absQPStride = FFABS(QPStride);
985 
986  // c->stride and c->QPStride are always positive
987  if(c->stride < minStride || c->qpStride < absQPStride)
988  reallocBuffers(c, width, height,
989  FFMAX(minStride, c->stride),
990  FFMAX(c->qpStride, absQPStride));
991 
992  if(!QP_store || (mode->lumMode & FORCE_QUANT)){
993  int i;
994  QP_store= c->forcedQPTable;
995  absQPStride = QPStride = 0;
996  if(mode->lumMode & FORCE_QUANT)
997  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
998  else
999  for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1000  }
1001 
1002  if(pict_type & PP_PICT_TYPE_QP2){
1003  int i;
1004  const int count= FFMAX(mbHeight * absQPStride, mbWidth);
1005  for(i=0; i<(count>>2); i++){
1006  ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1007  }
1008  for(i<<=2; i<count; i++){
1009  c->stdQPTable[i] = QP_store[i]>>1;
1010  }
1011  QP_store= c->stdQPTable;
1012  QPStride= absQPStride;
1013  }
1014 
1015  if(0){
1016  int x,y;
1017  for(y=0; y<mbHeight; y++){
1018  for(x=0; x<mbWidth; x++){
1019  av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1020  }
1021  av_log(c, AV_LOG_INFO, "\n");
1022  }
1023  av_log(c, AV_LOG_INFO, "\n");
1024  }
1025 
1026  if((pict_type&7)!=3){
1027  if (QPStride >= 0){
1028  int i;
1029  const int count= FFMAX(mbHeight * QPStride, mbWidth);
1030  for(i=0; i<(count>>2); i++){
1031  ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1032  }
1033  for(i<<=2; i<count; i++){
1034  c->nonBQPTable[i] = QP_store[i] & 0x3F;
1035  }
1036  } else {
1037  int i,j;
1038  for(i=0; i<mbHeight; i++) {
1039  for(j=0; j<absQPStride; j++) {
1040  c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1041  }
1042  }
1043  }
1044  }
1045 
1046  av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1047  mode->lumMode, mode->chromMode);
1048 
1049  postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1050  width, height, QP_store, QPStride, 0, mode, c);
1051 
1052  if (!(src[1] && src[2] && dst[1] && dst[2]))
1053  return;
1054 
1055  width = (width )>>c->hChromaSubSample;
1056  height = (height)>>c->vChromaSubSample;
1057 
1058  if(mode->chromMode){
1059  postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1060  width, height, QP_store, QPStride, 1, mode, c);
1061  postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1062  width, height, QP_store, QPStride, 2, mode, c);
1063  }
1064  else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1065  linecpy(dst[1], src[1], height, srcStride[1]);
1066  linecpy(dst[2], src[2], height, srcStride[2]);
1067  }else{
1068  int y;
1069  for(y=0; y<height; y++){
1070  memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1071  memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1072  }
1073  }
1074 }