105 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
107 int16_t *
block, int16_t *qmat);
109 static void ff_prores_idct_put_10_sse2_wrap(int16_t *
dst){
137 #if ARCH_X86_64 && HAVE_YASM
167 #define AANSCALE_BITS 12
170 #define NB_ITS_SPEED 50000
175 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
176 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
177 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
178 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
179 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
180 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
181 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
182 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
192 for (i = 0; i < 64; i++) {
193 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
204 memset(block, 0, 64 *
sizeof(*block));
208 for (i = 0; i < 64; i++)
209 block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
212 for (i = 0; i < 64; i++)
218 for (i = 0; i < j; i++) {
220 block[idx] =
av_lfg_get(prng) % (2*vals) -vals;
224 block[ 0] =
av_lfg_get(prng) % (16*vals) - (8*vals);
225 block[63] = (block[0] & 1) ^ 1;
230 static void permute(int16_t dst[64],
const int16_t src[64],
int perm)
235 for (i = 0; i < 64; i++)
236 dst[idct_mmx_perm[i]] = src[i];
238 for (i = 0; i < 64; i++)
239 dst[idct_simple_mmx_perm[i]] = src[i];
241 for (i = 0; i < 64; i++)
242 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
244 for (i = 0; i < 64; i++)
245 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
247 for (i = 0; i < 64; i++)
248 dst[(i>>3) | ((i<<3)&0x38)] = src[i];
250 for (i = 0; i < 64; i++)
260 int64_t err2, ti, ti1, it1, err_sum = 0;
261 int64_t sysErr[64], sysErrMax = 0;
263 int blockSumErrMax = 0, blockSumErr;
265 const int vals=1<<
bits;
273 for (i = 0; i < 64; i++)
275 for (it = 0; it <
NB_ITS; it++) {
283 for (i = 0; i < 64; i++) {
292 for (i = 0; i < 64; i++) {
299 sysErr[i] +=
block[i] - block1[i];
301 if (abs(
block[i]) > maxout)
302 maxout = abs(
block[i]);
304 if (blockSumErrMax < blockSumErr)
305 blockSumErrMax = blockSumErr;
307 for (i = 0; i < 64; i++)
308 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
310 for (i = 0; i < 64; i++) {
313 printf(
"%7d ", (
int) sysErr[i]);
317 omse = (double) err2 / NB_ITS / 64;
318 ome = (double) err_sum / NB_ITS / 64;
320 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
322 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
323 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
324 omse, ome, (
double) sysErrMax / NB_ITS,
325 maxout, blockSumErrMax);
348 }
while (ti1 < 1000000);
350 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
351 (
double) it1 * 1000.0 / (
double) ti1);
362 static double c8[8][8];
363 static double c4[4][4];
364 double block1[64], block2[64], block3[64];
371 for (i = 0; i < 8; i++) {
373 for (j = 0; j < 8; j++) {
374 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
375 c8[i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
376 sum += c8[i][j] * c8[i][j];
380 for (i = 0; i < 4; i++) {
382 for (j = 0; j < 4; j++) {
383 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
384 c4[i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
385 sum += c4[i][j] * c4[i][j];
392 for (i = 0; i < 4; i++) {
393 for (j = 0; j < 8; j++) {
394 block1[8 * (2 * i) + j] =
395 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
396 block1[8 * (2 * i + 1) + j] =
397 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
402 for (i = 0; i < 8; i++) {
403 for (j = 0; j < 8; j++) {
405 for (k = 0; k < 8; k++)
406 sum += c8[k][j] * block1[8 * i + k];
407 block2[8 * i + j] = sum;
412 for (i = 0; i < 8; i++) {
413 for (j = 0; j < 4; j++) {
416 for (k = 0; k < 4; k++)
417 sum += c4[k][j] * block2[8 * (2 * k) + i];
418 block3[8 * (2 * j) + i] = sum;
422 for (k = 0; k < 4; k++)
423 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
424 block3[8 * (2 * j + 1) + i] = sum;
429 for (i = 0; i < 8; i++) {
430 for (j = 0; j < 8; j++) {
431 v = block3[8 * i + j];
433 else if (v > 255) v = 255;
434 dest[i * linesize + j] = (int)
rint(v);
440 void (*idct248_put)(
uint8_t *dest,
int line_size,
444 int it, i, it1, ti, ti1, err_max,
v;
452 for (it = 0; it <
NB_ITS; it++) {
454 for (i = 0; i < 64; i++)
458 for (i = 0; i < 64; i++)
462 for (i = 0; i < 64; i++)
466 for (i = 0; i < 64; i++) {
493 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
502 for (i = 0; i < 64; i++)
509 }
while (ti1 < 1000000);
511 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
512 (
double) it1 * 1000.0 / (
double) ti1);
517 printf(
"dct-test [-i] [<test-number>] [<bits>]\n"
518 "test-number 0 -> test with random matrixes\n"
519 " 1 -> test with random sparse matrixes\n"
520 " 2 -> do 3. test from mpeg4 std\n"
521 "bits Number of time domain bits to use, 8 is default\n"
522 "-i test IDCT implementations\n"
523 "-4 test IDCT248 implementations\n"
531 int main(
int argc,
char **argv)
533 int test_idct = 0, test_248_dct = 0;
546 c =
getopt(argc, argv,
"ih4t");
567 test = atoi(argv[
optind]);
568 if(optind+1 < argc) bits= atoi(argv[optind+1]);
570 printf(
"ffmpeg DCT/IDCT test\n");
575 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
576 for (i = 0; algos[i].
name; i++)
578 err |=
dct_error(&algos[i], test, test_idct, speed, bits);