92 #include "libavutil/ffversion.h"
103 return FFMPEG_CONFIGURATION;
108 #define LICENSE_PREFIX "libpostproc license: "
116 #define GET_MODE_BUFFER_SIZE 500
117 #define OPTIONS_ARRAY_SIZE 10
119 #define TEMP_STRIDE 8
122 #if ARCH_X86 && HAVE_INLINE_ASM
146 {
"dr",
"dering", 1, 5, 6,
DERING},
147 {
"al",
"autolevels", 0, 1, 2,
LEVEL_FIX},
156 {
"be",
"bitexact", 1, 0, 0,
BITEXACT},
163 "default",
"hb:a,vb:a,dr:a",
164 "de",
"hb:a,vb:a,dr:a",
165 "fast",
"h1:a,v1:a,dr:a",
166 "fa",
"h1:a,v1:a,dr:a",
167 "ac",
"ha:a:128:7,va:a,dr:a",
172 #if ARCH_X86 && HAVE_INLINE_ASM
173 static inline void prefetchnta(
const void *p)
175 __asm__
volatile(
"prefetchnta (%0)\n\t"
180 static inline void prefetcht0(
const void *p)
182 __asm__
volatile(
"prefetcht0 (%0)\n\t"
187 static inline void prefetcht1(
const void *p)
189 __asm__
volatile(
"prefetcht1 (%0)\n\t"
194 static inline void prefetcht2(
const void *p)
196 __asm__
volatile(
"prefetcht2 (%0)\n\t"
213 const int dcThreshold= dcOffset*2 + 1;
216 numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
217 numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
218 numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
219 numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
220 numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
221 numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
222 numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
236 const int dcThreshold= dcOffset*2 + 1;
240 numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
241 numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
242 numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
243 numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
244 numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
245 numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
246 numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
247 numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
257 if((
unsigned)(src[0] - src[5] + 2*QP) > 4*QP)
return 0;
259 if((
unsigned)(src[2] - src[7] + 2*QP) > 4*QP)
return 0;
261 if((
unsigned)(src[4] - src[1] + 2*QP) > 4*QP)
return 0;
263 if((
unsigned)(src[6] - src[3] + 2*QP) > 4*QP)
return 0;
274 if((
unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP)
return 0;
275 if((
unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP)
return 0;
276 if((
unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP)
return 0;
277 if((
unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP)
return 0;
304 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
306 if(
FFABS(middleEnergy) < 8*c->
QP){
307 const int q=(dst[3] - dst[4])/2;
308 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
309 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
315 d*=
FFSIGN(-middleEnergy);
343 const int first=
FFABS(dst[-1] - dst[0]) < c->
QP ? dst[-1] : dst[0];
344 const int last=
FFABS(dst[8] - dst[7]) < c->
QP ? dst[8] : dst[7];
347 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
348 sums[1] = sums[0] - first + dst[3];
349 sums[2] = sums[1] - first + dst[4];
350 sums[3] = sums[2] - first + dst[5];
351 sums[4] = sums[3] - first + dst[6];
352 sums[5] = sums[4] - dst[0] + dst[7];
353 sums[6] = sums[5] - dst[1] + last;
354 sums[7] = sums[6] - dst[2] + last;
355 sums[8] = sums[7] - dst[3] + last;
356 sums[9] = sums[8] - dst[4] + last;
358 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
359 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
360 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
361 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
362 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
363 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
364 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
365 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
382 static uint64_t lut[256];
388 int v= i < 128 ? 2*i : 2*(i-256);
397 uint64_t
a= (v/16) & 0xFF;
398 uint64_t
b= (v*3/16) & 0xFF;
399 uint64_t
c= (v*5/16) & 0xFF;
400 uint64_t d= (7*v/16) & 0xFF;
401 uint64_t
A= (0x100 -
a)&0xFF;
402 uint64_t
B= (0x100 -
b)&0xFF;
403 uint64_t
C= (0x100 -
c)&0xFF;
404 uint64_t
D= (0x100 -
c)&0xFF;
406 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
407 (D<<24) | (C<<16) | (B<<8) | (A);
413 int a= src[1] - src[2];
414 int b= src[3] - src[4];
415 int c= src[5] - src[6];
442 const int dcThreshold= dcOffset*2 + 1;
448 numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
449 numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
450 numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
451 numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
452 numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
453 numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
454 numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
455 numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
456 numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
460 if(src[0] > src[step]){
468 if(src[x*step] > src[(x+1)*step]){
469 if(src[x *step] > max) max= src[ x *step];
470 if(src[(x+1)*step] <
min) min= src[(x+1)*step];
472 if(src[(x+1)*step] > max) max= src[(x+1)*step];
473 if(src[ x *step] < min) min= src[ x *step];
477 const int first=
FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
478 const int last=
FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
481 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
482 sums[1] = sums[0] - first + src[3*step];
483 sums[2] = sums[1] - first + src[4*step];
484 sums[3] = sums[2] - first + src[5*step];
485 sums[4] = sums[3] - first + src[6*step];
486 sums[5] = sums[4] - src[0*step] + src[7*step];
487 sums[6] = sums[5] - src[1*step] + last;
488 sums[7] = sums[6] - src[2*step] + last;
489 sums[8] = sums[7] - src[3*step] + last;
490 sums[9] = sums[8] - src[4*step] + last;
502 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
503 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
504 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
505 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
506 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
507 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
508 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
509 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
512 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
514 if(
FFABS(middleEnergy) < 8*
QP){
515 const int q=(src[3*step] - src[4*step])/2;
516 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
517 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
523 d*=
FFSIGN(-middleEnergy);
534 d= (d < 0) ? 32 : -32;
535 src[3*step]= av_clip_uint8(src[3*step] - d);
536 src[4*step]= av_clip_uint8(src[4*step] + d);
557 #define TEMPLATE_PP_C 1
561 # define TEMPLATE_PP_ALTIVEC 1
566 #if ARCH_X86 && HAVE_INLINE_ASM
567 # if CONFIG_RUNTIME_CPUDETECT
568 # define TEMPLATE_PP_MMX 1
570 # define TEMPLATE_PP_MMXEXT 1
572 # define TEMPLATE_PP_3DNOW 1
574 # define TEMPLATE_PP_SSE2 1
577 # if HAVE_SSE2_INLINE
578 # define TEMPLATE_PP_SSE2 1
580 # elif HAVE_MMXEXT_INLINE
581 # define TEMPLATE_PP_MMXEXT 1
583 # elif HAVE_AMD3DNOW_INLINE
584 # define TEMPLATE_PP_3DNOW 1
586 # elif HAVE_MMX_INLINE
587 # define TEMPLATE_PP_MMX 1
599 pp_fn pp = postProcess_C;
605 #if CONFIG_RUNTIME_CPUDETECT
606 #if ARCH_X86 && HAVE_INLINE_ASM
617 pp = postProcess_SSE2;
618 #elif HAVE_MMXEXT_INLINE
619 pp = postProcess_MMX2;
620 #elif HAVE_AMD3DNOW_INLINE
621 pp = postProcess_3DNow;
622 #elif HAVE_MMX_INLINE
623 pp = postProcess_MMX;
625 pp = postProcess_altivec;
630 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
636 "Available postprocessing filters:\n"
638 "short long name short long option Description\n"
639 "* * a autoq CPU power dependent enabler\n"
640 " c chrom chrominance filtering enabled\n"
641 " y nochrom chrominance filtering disabled\n"
642 " n noluma luma filtering disabled\n"
643 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
644 " 1. difference factor: default=32, higher -> more deblocking\n"
645 " 2. flatness threshold: default=39, lower -> more deblocking\n"
646 " the h & v deblocking filters share these\n"
647 " so you can't set different thresholds for h / v\n"
648 "vb vdeblock (2 threshold) vertical deblocking filter\n"
649 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
650 "va vadeblock (2 threshold) vertical deblocking filter\n"
651 "h1 x1hdeblock experimental h deblock filter 1\n"
652 "v1 x1vdeblock experimental v deblock filter 1\n"
653 "dr dering deringing filter\n"
654 "al autolevels automatic brightness / contrast\n"
655 " f fullyrange stretch luminance to (0..255)\n"
656 "lb linblenddeint linear blend deinterlacer\n"
657 "li linipoldeint linear interpolating deinterlace\n"
658 "ci cubicipoldeint cubic interpolating deinterlacer\n"
659 "md mediandeint median deinterlacer\n"
660 "fd ffmpegdeint ffmpeg deinterlacer\n"
661 "l5 lowpass5 FIR lowpass deinterlacer\n"
662 "de default hb:a,vb:a,dr:a\n"
663 "fa fast h1:a,v1:a,dr:a\n"
664 "ac ha:a:128:7,va:a,dr:a\n"
665 "tn tmpnoise (3 threshold) temporal noise reducer\n"
666 " 1. <= 2. <= 3. larger -> stronger filtering\n"
667 "fq forceQuant <quantizer> force quantizer\n"
669 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
670 "long form example:\n"
671 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
672 "short form example:\n"
673 "vb:a/hb:a/lb de,-vb\n"
683 static const char filterDelimiters[] =
",/";
684 static const char optionDelimiters[] =
":|";
693 if (!strcmp(name,
"help")) {
695 for (p =
pp_help; strchr(p,
'\n'); p = strchr(p,
'\n') + 1) {
724 const char *filterName;
732 int numOfUnknownOptions=0;
736 filterToken=
av_strtok(p, filterDelimiters, &tokstate);
737 if(!filterToken)
break;
738 p+= strlen(filterToken) + 1;
739 filterName=
av_strtok(filterToken, optionDelimiters, &tokstate);
746 if(*filterName ==
'-'){
756 if(!strcmp(
"autoq", option) || !strcmp(
"a", option)) q= quality;
757 else if(!strcmp(
"nochrom", option) || !strcmp(
"y", option)) chrom=0;
758 else if(!strcmp(
"chrom", option) || !strcmp(
"c", option)) chrom=1;
759 else if(!strcmp(
"noluma", option) || !strcmp(
"n", option)) luma=0;
761 options[numOfUnknownOptions] =
option;
762 numOfUnknownOptions++;
766 options[numOfUnknownOptions] =
NULL;
778 spaceLeft= p - temp + plen;
783 memmove(p + newlen, p, plen+1);
790 if( !strcmp(filters[i].longName, filterName)
791 || !strcmp(filters[i].shortName, filterName)){
798 if(q >= filters[i].minLumQuality && luma)
800 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
801 if(q >= filters[i].minChromQuality)
808 for(o=0; options[o]; o++){
809 if( !strcmp(options[o],
"fullyrange")
810 ||!strcmp(options[o],
"f")){
813 numOfUnknownOptions--;
822 for(o=0; options[o]; o++){
825 strtol(options[o], &tail, 0);
826 if(tail!=options[o]){
828 numOfUnknownOptions--;
829 if(numOfNoises >= 3)
break;
837 for(o=0; options[o] && o<2; o++){
839 int val= strtol(options[o], &tail, 0);
840 if(tail==options[o])
break;
842 numOfUnknownOptions--;
851 for(o=0; options[o] && o<1; o++){
853 int val= strtol(options[o], &tail, 0);
854 if(tail==options[o])
break;
856 numOfUnknownOptions--;
862 if(!filterNameOk) ppMode->
error++;
863 ppMode->
error += numOfUnknownOptions;
885 int mbWidth = (width+15)>>4;
886 int mbHeight= (height+15)>>4;
920 int qpStride= (width+15)/16 + 2;
974 uint8_t * dst[3],
const int dstStride[3],
977 pp_mode *vm,
void *vc,
int pict_type)
979 int mbWidth = (width+15)>>4;
980 int mbHeight= (height+15)>>4;
984 int absQPStride =
FFABS(QPStride);
995 absQPStride = QPStride = 0;
1004 const int count=
FFMAX(mbHeight * absQPStride, mbWidth);
1005 for(i=0; i<(count>>2); i++){
1006 ((uint32_t*)c->
stdQPTable)[i] = (((
const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1008 for(i<<=2; i<
count; i++){
1012 QPStride= absQPStride;
1017 for(y=0; y<mbHeight; y++){
1018 for(x=0; x<mbWidth; x++){
1026 if((pict_type&7)!=3){
1029 const int count=
FFMAX(mbHeight * QPStride, mbWidth);
1030 for(i=0; i<(count>>2); i++){
1031 ((uint32_t*)c->
nonBQPTable)[i] = ((
const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1033 for(i<<=2; i<
count; i++){
1038 for(i=0; i<mbHeight; i++) {
1039 for(j=0; j<absQPStride; j++) {
1040 c->
nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1049 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1050 width, height, QP_store, QPStride, 0, mode, c);
1052 if (!(src[1] && src[2] && dst[1] && dst[2]))
1059 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1060 width, height, QP_store, QPStride, 1, mode, c);
1061 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1062 width, height, QP_store, QPStride, 2, mode, c);
1064 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1065 linecpy(dst[1], src[1], height, srcStride[1]);
1066 linecpy(dst[2], src[2], height, srcStride[2]);
1070 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1071 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);