56 #define XMIN(a,b) ((a) < (b) ? (a) : (b))
60 { 0, 48, 12, 60, 3, 51, 15, 63, },
61 { 32, 16, 44, 28, 35, 19, 47, 31, },
62 { 8, 56, 4, 52, 11, 59, 7, 55, },
63 { 40, 24, 36, 20, 43, 27, 39, 23, },
64 { 2, 50, 14, 62, 1, 49, 13, 61, },
65 { 34, 18, 46, 30, 33, 17, 45, 29, },
66 { 10, 58, 6, 54, 9, 57, 5, 53, },
67 { 42, 26, 38, 22, 41, 25, 37, 21, },
73 {0,0}, {2,2}, {6,4}, {4,6},
74 {0,0}, {5,1}, {2,2}, {7,3}, {4,4}, {1,5}, {6,6}, {3,7},
76 {0,0}, {4,0}, {1,1}, {5,1}, {3,2}, {7,2}, {2,3}, {6,3},
77 {0,4}, {4,4}, {1,5}, {5,5}, {3,6}, {7,6}, {2,7}, {6,7},
79 {0,0}, {0,2}, {0,4}, {0,6}, {1,1}, {1,3}, {1,5}, {1,7},
80 {2,0}, {2,2}, {2,4}, {2,6}, {3,1}, {3,3}, {3,5}, {3,7},
81 {4,0}, {4,2}, {4,4}, {4,6}, {5,1}, {5,3}, {5,5}, {5,7},
82 {6,0}, {6,2}, {6,4}, {6,6}, {7,1}, {7,3}, {7,5}, {7,7},
84 {0,0}, {4,4}, {0,4}, {4,0}, {2,2}, {6,6}, {2,6}, {6,2},
85 {0,2}, {4,6}, {0,6}, {4,2}, {2,0}, {6,4}, {2,4}, {6,0},
86 {1,1}, {5,5}, {1,5}, {5,1}, {3,3}, {7,7}, {3,7}, {7,3},
87 {1,3}, {5,7}, {1,7}, {5,3}, {3,1}, {7,5}, {3,5}, {7,1},
88 {0,1}, {4,5}, {0,5}, {4,1}, {2,3}, {6,7}, {2,7}, {6,3},
89 {0,3}, {4,7}, {0,7}, {4,3}, {2,1}, {6,5}, {2,5}, {6,1},
90 {1,0}, {5,4}, {1,4}, {5,0}, {3,2}, {7,6}, {3,6}, {7,2},
91 {1,2}, {5,6}, {1,6}, {5,2}, {3,0}, {7,4}, {3,4}, {7,0},
112 unsigned int threshold1, threshold2;
114 threshold1= qp*((1<<4) - bias) - 1;
115 threshold2= (threshold1<<1);
117 memset(dst, 0, 64*
sizeof(int16_t));
118 dst[0]= (src[0] + 4)>>3;
122 if(((
unsigned)(level+threshold1))>threshold2){
123 const int j= permutation[i];
124 dst[j]= (level + 4)>>3;
132 unsigned int threshold1, threshold2;
134 threshold1= qp*((1<<4) - bias) - 1;
135 threshold2= (threshold1<<1);
137 memset(dst, 0, 64*
sizeof(int16_t));
138 dst[0]= (src[0] + 4)>>3;
142 if(((
unsigned)(level+threshold1))>threshold2){
143 const int j= permutation[i];
145 dst[j]= (level - threshold1 + 4)>>3;
147 dst[j]= (level + threshold1 + 4)>>3;
155 unsigned int threshold1;
157 threshold1= qp*((1<<4) - bias) - 1;
160 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
161 "movq " #src0 ", %%mm0 \n\t"\
162 "movq " #src1 ", %%mm1 \n\t"\
163 "movq " #src2 ", %%mm2 \n\t"\
164 "movq " #src3 ", %%mm3 \n\t"\
165 "psubw %%mm4, %%mm0 \n\t"\
166 "psubw %%mm4, %%mm1 \n\t"\
167 "psubw %%mm4, %%mm2 \n\t"\
168 "psubw %%mm4, %%mm3 \n\t"\
169 "paddusw %%mm5, %%mm0 \n\t"\
170 "paddusw %%mm5, %%mm1 \n\t"\
171 "paddusw %%mm5, %%mm2 \n\t"\
172 "paddusw %%mm5, %%mm3 \n\t"\
173 "paddw %%mm6, %%mm0 \n\t"\
174 "paddw %%mm6, %%mm1 \n\t"\
175 "paddw %%mm6, %%mm2 \n\t"\
176 "paddw %%mm6, %%mm3 \n\t"\
177 "psubusw %%mm6, %%mm0 \n\t"\
178 "psubusw %%mm6, %%mm1 \n\t"\
179 "psubusw %%mm6, %%mm2 \n\t"\
180 "psubusw %%mm6, %%mm3 \n\t"\
181 "psraw $3, %%mm0 \n\t"\
182 "psraw $3, %%mm1 \n\t"\
183 "psraw $3, %%mm2 \n\t"\
184 "psraw $3, %%mm3 \n\t"\
186 "movq %%mm0, %%mm7 \n\t"\
187 "punpcklwd %%mm2, %%mm0 \n\t" \
188 "punpckhwd %%mm2, %%mm7 \n\t" \
189 "movq %%mm1, %%mm2 \n\t"\
190 "punpcklwd %%mm3, %%mm1 \n\t" \
191 "punpckhwd %%mm3, %%mm2 \n\t" \
192 "movq %%mm0, %%mm3 \n\t"\
193 "punpcklwd %%mm1, %%mm0 \n\t" \
194 "punpckhwd %%mm7, %%mm3 \n\t" \
195 "punpcklwd %%mm2, %%mm7 \n\t" \
196 "punpckhwd %%mm2, %%mm1 \n\t" \
198 "movq %%mm0, " #dst0 " \n\t"\
199 "movq %%mm7, " #dst1 " \n\t"\
200 "movq %%mm3, " #dst2 " \n\t"\
201 "movq %%mm1, " #dst3 " \n\t"
203 "movd %2, %%mm4 \n\t"
204 "movd %3, %%mm5 \n\t"
205 "movd %4, %%mm6 \n\t"
206 "packssdw %%mm4, %%mm4 \n\t"
207 "packssdw %%mm5, %%mm5 \n\t"
208 "packssdw %%mm6, %%mm6 \n\t"
209 "packssdw %%mm4, %%mm4 \n\t"
210 "packssdw %%mm5, %%mm5 \n\t"
211 "packssdw %%mm6, %%mm6 \n\t"
212 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
213 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
214 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
215 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
216 : :
"r" (src),
"r" (dst),
"g" (threshold1+1),
"g" (threshold1+5),
"g" (threshold1-4)
218 dst[0]= (src[0] + 4)>>3;
223 unsigned int threshold1;
225 threshold1= qp*((1<<4) - bias) - 1;
229 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
230 "movq " #src0 ", %%mm0 \n\t"\
231 "movq " #src1 ", %%mm1 \n\t"\
232 "pxor %%mm6, %%mm6 \n\t"\
233 "pxor %%mm7, %%mm7 \n\t"\
234 "pcmpgtw %%mm0, %%mm6 \n\t"\
235 "pcmpgtw %%mm1, %%mm7 \n\t"\
236 "pxor %%mm6, %%mm0 \n\t"\
237 "pxor %%mm7, %%mm1 \n\t"\
238 "psubusw %%mm4, %%mm0 \n\t"\
239 "psubusw %%mm4, %%mm1 \n\t"\
240 "pxor %%mm6, %%mm0 \n\t"\
241 "pxor %%mm7, %%mm1 \n\t"\
242 "movq " #src2 ", %%mm2 \n\t"\
243 "movq " #src3 ", %%mm3 \n\t"\
244 "pxor %%mm6, %%mm6 \n\t"\
245 "pxor %%mm7, %%mm7 \n\t"\
246 "pcmpgtw %%mm2, %%mm6 \n\t"\
247 "pcmpgtw %%mm3, %%mm7 \n\t"\
248 "pxor %%mm6, %%mm2 \n\t"\
249 "pxor %%mm7, %%mm3 \n\t"\
250 "psubusw %%mm4, %%mm2 \n\t"\
251 "psubusw %%mm4, %%mm3 \n\t"\
252 "pxor %%mm6, %%mm2 \n\t"\
253 "pxor %%mm7, %%mm3 \n\t"\
255 "paddsw %%mm5, %%mm0 \n\t"\
256 "paddsw %%mm5, %%mm1 \n\t"\
257 "paddsw %%mm5, %%mm2 \n\t"\
258 "paddsw %%mm5, %%mm3 \n\t"\
259 "psraw $3, %%mm0 \n\t"\
260 "psraw $3, %%mm1 \n\t"\
261 "psraw $3, %%mm2 \n\t"\
262 "psraw $3, %%mm3 \n\t"\
264 "movq %%mm0, %%mm7 \n\t"\
265 "punpcklwd %%mm2, %%mm0 \n\t" \
266 "punpckhwd %%mm2, %%mm7 \n\t" \
267 "movq %%mm1, %%mm2 \n\t"\
268 "punpcklwd %%mm3, %%mm1 \n\t" \
269 "punpckhwd %%mm3, %%mm2 \n\t" \
270 "movq %%mm0, %%mm3 \n\t"\
271 "punpcklwd %%mm1, %%mm0 \n\t" \
272 "punpckhwd %%mm7, %%mm3 \n\t" \
273 "punpcklwd %%mm2, %%mm7 \n\t" \
274 "punpckhwd %%mm2, %%mm1 \n\t" \
276 "movq %%mm0, " #dst0 " \n\t"\
277 "movq %%mm7, " #dst1 " \n\t"\
278 "movq %%mm3, " #dst2 " \n\t"\
279 "movq %%mm1, " #dst3 " \n\t"
281 "movd %2, %%mm4 \n\t"
282 "movd %3, %%mm5 \n\t"
283 "packssdw %%mm4, %%mm4 \n\t"
284 "packssdw %%mm5, %%mm5 \n\t"
285 "packssdw %%mm4, %%mm4 \n\t"
286 "packssdw %%mm5, %%mm5 \n\t"
287 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
288 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
289 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
290 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
291 : :
"r" (src),
"r" (dst),
"g" (threshold1),
"rm" (4)
294 dst[0]= (src[0] + 4)>>3;
302 *(uint32_t*)&dst[0 + y*stride]+= *(uint32_t*)&block[0 + y*8];
303 *(uint32_t*)&dst[2 + y*stride]+= *(uint32_t*)&block[2 + y*8];
304 *(uint32_t*)&dst[4 + y*stride]+= *(uint32_t*)&block[4 + y*8];
305 *(uint32_t*)&dst[6 + y*stride]+= *(uint32_t*)&block[6 + y*8];
313 temp= ((src[x + y*src_stride + pos]<<log2_scale) + d[pos])>>6;\
314 if(temp & 0x100) temp= ~(temp>>31);\
315 dst[x + y*dst_stride + pos]= temp;
319 for(x=0; x<
width; x+=8){
341 "movq (%3), %%mm3 \n\t"
342 "movq (%3), %%mm4 \n\t"
343 "movd %4, %%mm2 \n\t"
344 "pxor %%mm0, %%mm0 \n\t"
345 "punpcklbw %%mm0, %%mm3 \n\t"
346 "punpckhbw %%mm0, %%mm4 \n\t"
347 "psraw %%mm2, %%mm3 \n\t"
348 "psraw %%mm2, %%mm4 \n\t"
349 "movd %5, %%mm2 \n\t"
351 "movq (%0), %%mm0 \n\t"
352 "movq 8(%0), %%mm1 \n\t"
353 "paddw %%mm3, %%mm0 \n\t"
354 "paddw %%mm4, %%mm1 \n\t"
355 "psraw %%mm2, %%mm0 \n\t"
356 "psraw %%mm2, %%mm1 \n\t"
357 "packuswb %%mm1, %%mm0 \n\t"
358 "movq %%mm0, (%1) \n\t"
363 :
"+r" (src1),
"+r"(dst1)
364 :
"r"(dst +
width),
"r"(
dither[y]),
"g"(log2_scale),
"g"(6-log2_scale)
382 uint64_t __attribute__((aligned(16))) block_align[32];
383 int16_t *
block = (int16_t *)block_align;
384 int16_t *block2= (int16_t *)(block_align+16);
386 if (!src || !dst)
return;
391 p->
src[index - x - 1]= p->
src[index + x ];
392 p->
src[index + width + x ]= p->
src[index + width - x - 1];
401 for(y=0; y<height+8; y+=8){
402 memset(p->
temp + (8+y)*stride, 0, 8*stride*
sizeof(int16_t));
403 for(x=0; x<width+8; x+=8){
404 const int qps= 3 + is_luma;
410 qp= qp_store[ (
XMIN(x, width-1)>>qps) + (
XMIN(y, height-1)>>qps) * qp_stride];
413 for(i=0; i<count; i++){
414 const int x1= x + offset[i+count-1][0];
415 const int y1= y + offset[i+count-1][1];
429 for(x=0; x<
width; x++){
430 if((((x>>6) ^ (y>>6)) & 1) == 0)
431 dst[x + y*dst_stride]= p->
src[8 + 8*stride + x + y*
stride];
432 if((x&63) == 0 || (y&63)==0)
433 dst[x + y*dst_stride] += 128;
440 static int config(
struct vf_instance *vf,
441 int width,
int height,
int d_width,
int d_height,
442 unsigned int flags,
unsigned int outfmt){
443 int h= (height+16+15)&(~15);
445 vf->priv->temp_stride= (width+16+15)&(~15);
446 vf->priv->temp= malloc(vf->priv->temp_stride*h*
sizeof(int16_t));
447 vf->priv->src = malloc(vf->priv->temp_stride*h*
sizeof(
uint8_t));
457 mpi->
planes[0]=vf->dmpi->planes[0];
458 mpi->
stride[0]=vf->dmpi->stride[0];
459 mpi->
width=vf->dmpi->width;
461 mpi->
planes[1]=vf->dmpi->planes[1];
462 mpi->
planes[2]=vf->dmpi->planes[2];
463 mpi->
stride[1]=vf->dmpi->stride[1];
464 mpi->
stride[2]=vf->dmpi->stride[2];
486 int h = (mpi->
h + 15) >> 4;
488 w = (mpi->
w + 15) >> 4;
491 if(!vf->priv->non_b_qp)
492 vf->priv->non_b_qp= malloc(w*h);
496 char *qp_tab= vf->priv->non_b_qp;
497 if((vf->priv->mode&4) || !qp_tab)
500 if(qp_tab || vf->priv->qp){
521 static void uninit(
struct vf_instance *vf){
522 if(!vf->priv)
return;
524 free(vf->priv->temp);
525 vf->priv->temp=
NULL;
528 free(vf->priv->avctx);
529 vf->priv->avctx=
NULL;
530 free(vf->priv->non_b_qp);
531 vf->priv->non_b_qp=
NULL;
556 static int control(
struct vf_instance *vf,
int request,
void*
data){
561 vf->priv->log2_count= *((
unsigned int*)data);
587 if (args) sscanf(args,
"%d:%d:%d", &log2c, &vf->
priv->
qp, &vf->
priv->
mode);
589 if( log2c >=0 && log2c <=6 )
615 "simple postprocess",
617 "Michael Niedermayer",