00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "libavutil/common.h"
00034 #include "libavutil/x86/asm.h"
00035 #include "libavcodec/dsputil.h"
00036
00037 #if HAVE_INLINE_ASM
00038
00040
00041
00042
00043
00044
00045
00046
00047
00049
00050 #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
00051 #define SHIFT_FRW_COL BITS_FRW_ACC
00052 #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
00053 #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
00054
00055
00056 #define X8(x) x,x,x,x,x,x,x,x
00057
00058
00059 DECLARE_ALIGNED(16, static const int16_t, fdct_tg_all_16)[24] = {
00060 X8(13036),
00061 X8(27146),
00062 X8(-21746)
00063 };
00064
00065 DECLARE_ALIGNED(16, static const int16_t, ocos_4_16)[8] = {
00066 X8(23170)
00067 };
00068
00069 DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
00070
00071 DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW };
00072
00073 static const struct
00074 {
00075 DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
00076 } fdct_r_row_sse2 =
00077 {{
00078 RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW
00079 }};
00080
00081
00082 DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = {
00083 16384, 16384, 22725, 19266,
00084 16384, 16384, 12873, 4520,
00085 21407, 8867, 19266, -4520,
00086 -8867, -21407, -22725, -12873,
00087 16384, -16384, 12873, -22725,
00088 -16384, 16384, 4520, 19266,
00089 8867, -21407, 4520, -12873,
00090 21407, -8867, 19266, -22725,
00091
00092 22725, 22725, 31521, 26722,
00093 22725, 22725, 17855, 6270,
00094 29692, 12299, 26722, -6270,
00095 -12299, -29692, -31521, -17855,
00096 22725, -22725, 17855, -31521,
00097 -22725, 22725, 6270, 26722,
00098 12299, -29692, 6270, -17855,
00099 29692, -12299, 26722, -31521,
00100
00101 21407, 21407, 29692, 25172,
00102 21407, 21407, 16819, 5906,
00103 27969, 11585, 25172, -5906,
00104 -11585, -27969, -29692, -16819,
00105 21407, -21407, 16819, -29692,
00106 -21407, 21407, 5906, 25172,
00107 11585, -27969, 5906, -16819,
00108 27969, -11585, 25172, -29692,
00109
00110 19266, 19266, 26722, 22654,
00111 19266, 19266, 15137, 5315,
00112 25172, 10426, 22654, -5315,
00113 -10426, -25172, -26722, -15137,
00114 19266, -19266, 15137, -26722,
00115 -19266, 19266, 5315, 22654,
00116 10426, -25172, 5315, -15137,
00117 25172, -10426, 22654, -26722,
00118
00119 16384, 16384, 22725, 19266,
00120 16384, 16384, 12873, 4520,
00121 21407, 8867, 19266, -4520,
00122 -8867, -21407, -22725, -12873,
00123 16384, -16384, 12873, -22725,
00124 -16384, 16384, 4520, 19266,
00125 8867, -21407, 4520, -12873,
00126 21407, -8867, 19266, -22725,
00127
00128 19266, 19266, 26722, 22654,
00129 19266, 19266, 15137, 5315,
00130 25172, 10426, 22654, -5315,
00131 -10426, -25172, -26722, -15137,
00132 19266, -19266, 15137, -26722,
00133 -19266, 19266, 5315, 22654,
00134 10426, -25172, 5315, -15137,
00135 25172, -10426, 22654, -26722,
00136
00137 21407, 21407, 29692, 25172,
00138 21407, 21407, 16819, 5906,
00139 27969, 11585, 25172, -5906,
00140 -11585, -27969, -29692, -16819,
00141 21407, -21407, 16819, -29692,
00142 -21407, 21407, 5906, 25172,
00143 11585, -27969, 5906, -16819,
00144 27969, -11585, 25172, -29692,
00145
00146 22725, 22725, 31521, 26722,
00147 22725, 22725, 17855, 6270,
00148 29692, 12299, 26722, -6270,
00149 -12299, -29692, -31521, -17855,
00150 22725, -22725, 17855, -31521,
00151 -22725, 22725, 6270, 26722,
00152 12299, -29692, 6270, -17855,
00153 29692, -12299, 26722, -31521,
00154 };
00155
00156 static const struct
00157 {
00158 DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
00159 } tab_frw_01234567_sse2 =
00160 {{
00161
00162 #define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \
00163 C4, C4, C5, C7, C2, C6, C3, -C7, \
00164 -C4, C4, C7, C3, C6, -C2, C7, -C5, \
00165 C4, -C4, C5, -C1, C2, -C6, C3, -C1,
00166
00167 #define C1 22725
00168 #define C2 21407
00169 #define C3 19266
00170 #define C4 16384
00171 #define C5 12873
00172 #define C6 8867
00173 #define C7 4520
00174 TABLE_SSE2
00175
00176 #undef C1
00177 #undef C2
00178 #undef C3
00179 #undef C4
00180 #undef C5
00181 #undef C6
00182 #undef C7
00183 #define C1 31521
00184 #define C2 29692
00185 #define C3 26722
00186 #define C4 22725
00187 #define C5 17855
00188 #define C6 12299
00189 #define C7 6270
00190 TABLE_SSE2
00191
00192 #undef C1
00193 #undef C2
00194 #undef C3
00195 #undef C4
00196 #undef C5
00197 #undef C6
00198 #undef C7
00199 #define C1 29692
00200 #define C2 27969
00201 #define C3 25172
00202 #define C4 21407
00203 #define C5 16819
00204 #define C6 11585
00205 #define C7 5906
00206 TABLE_SSE2
00207
00208 #undef C1
00209 #undef C2
00210 #undef C3
00211 #undef C4
00212 #undef C5
00213 #undef C6
00214 #undef C7
00215 #define C1 26722
00216 #define C2 25172
00217 #define C3 22654
00218 #define C4 19266
00219 #define C5 15137
00220 #define C6 10426
00221 #define C7 5315
00222 TABLE_SSE2
00223
00224 #undef C1
00225 #undef C2
00226 #undef C3
00227 #undef C4
00228 #undef C5
00229 #undef C6
00230 #undef C7
00231 #define C1 22725
00232 #define C2 21407
00233 #define C3 19266
00234 #define C4 16384
00235 #define C5 12873
00236 #define C6 8867
00237 #define C7 4520
00238 TABLE_SSE2
00239
00240 #undef C1
00241 #undef C2
00242 #undef C3
00243 #undef C4
00244 #undef C5
00245 #undef C6
00246 #undef C7
00247 #define C1 26722
00248 #define C2 25172
00249 #define C3 22654
00250 #define C4 19266
00251 #define C5 15137
00252 #define C6 10426
00253 #define C7 5315
00254 TABLE_SSE2
00255
00256 #undef C1
00257 #undef C2
00258 #undef C3
00259 #undef C4
00260 #undef C5
00261 #undef C6
00262 #undef C7
00263 #define C1 29692
00264 #define C2 27969
00265 #define C3 25172
00266 #define C4 21407
00267 #define C5 16819
00268 #define C6 11585
00269 #define C7 5906
00270 TABLE_SSE2
00271
00272 #undef C1
00273 #undef C2
00274 #undef C3
00275 #undef C4
00276 #undef C5
00277 #undef C6
00278 #undef C7
00279 #define C1 31521
00280 #define C2 29692
00281 #define C3 26722
00282 #define C4 22725
00283 #define C5 17855
00284 #define C6 12299
00285 #define C7 6270
00286 TABLE_SSE2
00287 }};
00288
00289 #define S(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
00290
00291 #define FDCT_COL(cpu, mm, mov)\
00292 static av_always_inline void fdct_col_##cpu(const int16_t *in, int16_t *out, int offset)\
00293 {\
00294 __asm__ volatile (\
00295 #mov" 16(%0), %%"#mm"0 \n\t" \
00296 #mov" 96(%0), %%"#mm"1 \n\t" \
00297 #mov" %%"#mm"0, %%"#mm"2 \n\t" \
00298 #mov" 32(%0), %%"#mm"3 \n\t" \
00299 "paddsw %%"#mm"1, %%"#mm"0 \n\t" \
00300 #mov" 80(%0), %%"#mm"4 \n\t" \
00301 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"0 \n\t" \
00302 #mov" (%0), %%"#mm"5 \n\t" \
00303 "paddsw %%"#mm"3, %%"#mm"4 \n\t" \
00304 "paddsw 112(%0), %%"#mm"5 \n\t" \
00305 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"4 \n\t" \
00306 #mov" %%"#mm"0, %%"#mm"6 \n\t" \
00307 "psubsw %%"#mm"1, %%"#mm"2 \n\t" \
00308 #mov" 16(%1), %%"#mm"1 \n\t" \
00309 "psubsw %%"#mm"4, %%"#mm"0 \n\t" \
00310 #mov" 48(%0), %%"#mm"7 \n\t" \
00311 "pmulhw %%"#mm"0, %%"#mm"1 \n\t" \
00312 "paddsw 64(%0), %%"#mm"7 \n\t" \
00313 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"5 \n\t" \
00314 "paddsw %%"#mm"4, %%"#mm"6 \n\t" \
00315 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"7 \n\t" \
00316 #mov" %%"#mm"5, %%"#mm"4 \n\t" \
00317 "psubsw %%"#mm"7, %%"#mm"5 \n\t" \
00318 "paddsw %%"#mm"5, %%"#mm"1 \n\t" \
00319 "paddsw %%"#mm"7, %%"#mm"4 \n\t" \
00320 "por (%2), %%"#mm"1 \n\t" \
00321 "psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"2 \n\t" \
00322 "pmulhw 16(%1), %%"#mm"5 \n\t" \
00323 #mov" %%"#mm"4, %%"#mm"7 \n\t" \
00324 "psubsw 80(%0), %%"#mm"3 \n\t" \
00325 "psubsw %%"#mm"6, %%"#mm"4 \n\t" \
00326 #mov" %%"#mm"1, 32(%3) \n\t" \
00327 "paddsw %%"#mm"6, %%"#mm"7 \n\t" \
00328 #mov" 48(%0), %%"#mm"1 \n\t" \
00329 "psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"3 \n\t" \
00330 "psubsw 64(%0), %%"#mm"1 \n\t" \
00331 #mov" %%"#mm"2, %%"#mm"6 \n\t" \
00332 #mov" %%"#mm"4, 64(%3) \n\t" \
00333 "paddsw %%"#mm"3, %%"#mm"2 \n\t" \
00334 "pmulhw (%4), %%"#mm"2 \n\t" \
00335 "psubsw %%"#mm"3, %%"#mm"6 \n\t" \
00336 "pmulhw (%4), %%"#mm"6 \n\t" \
00337 "psubsw %%"#mm"0, %%"#mm"5 \n\t" \
00338 "por (%2), %%"#mm"5 \n\t" \
00339 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"1 \n\t" \
00340 "por (%2), %%"#mm"2 \n\t" \
00341 #mov" %%"#mm"1, %%"#mm"4 \n\t" \
00342 #mov" (%0), %%"#mm"3 \n\t" \
00343 "paddsw %%"#mm"6, %%"#mm"1 \n\t" \
00344 "psubsw 112(%0), %%"#mm"3 \n\t" \
00345 "psubsw %%"#mm"6, %%"#mm"4 \n\t" \
00346 #mov" (%1), %%"#mm"0 \n\t" \
00347 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"3 \n\t" \
00348 #mov" 32(%1), %%"#mm"6 \n\t" \
00349 "pmulhw %%"#mm"1, %%"#mm"0 \n\t" \
00350 #mov" %%"#mm"7, (%3) \n\t" \
00351 "pmulhw %%"#mm"4, %%"#mm"6 \n\t" \
00352 #mov" %%"#mm"5, 96(%3) \n\t" \
00353 #mov" %%"#mm"3, %%"#mm"7 \n\t" \
00354 #mov" 32(%1), %%"#mm"5 \n\t" \
00355 "psubsw %%"#mm"2, %%"#mm"7 \n\t" \
00356 "paddsw %%"#mm"2, %%"#mm"3 \n\t" \
00357 "pmulhw %%"#mm"7, %%"#mm"5 \n\t" \
00358 "paddsw %%"#mm"3, %%"#mm"0 \n\t" \
00359 "paddsw %%"#mm"4, %%"#mm"6 \n\t" \
00360 "pmulhw (%1), %%"#mm"3 \n\t" \
00361 "por (%2), %%"#mm"0 \n\t" \
00362 "paddsw %%"#mm"7, %%"#mm"5 \n\t" \
00363 "psubsw %%"#mm"6, %%"#mm"7 \n\t" \
00364 #mov" %%"#mm"0, 16(%3) \n\t" \
00365 "paddsw %%"#mm"4, %%"#mm"5 \n\t" \
00366 #mov" %%"#mm"7, 48(%3) \n\t" \
00367 "psubsw %%"#mm"1, %%"#mm"3 \n\t" \
00368 #mov" %%"#mm"5, 80(%3) \n\t" \
00369 #mov" %%"#mm"3, 112(%3) \n\t" \
00370 : \
00371 : "r" (in + offset), "r" (fdct_tg_all_16), "r" (fdct_one_corr), \
00372 "r" (out + offset), "r" (ocos_4_16)); \
00373 }
00374
00375 FDCT_COL(mmx, mm, movq)
00376 FDCT_COL(sse2, xmm, movdqa)
00377
00378 static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
00379 {
00380 __asm__ volatile(
00381 #define FDCT_ROW_SSE2_H1(i,t) \
00382 "movq " #i "(%0), %%xmm2 \n\t" \
00383 "movq " #i "+8(%0), %%xmm0 \n\t" \
00384 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
00385 "movdqa " #t "+48(%1), %%xmm7 \n\t" \
00386 "movdqa " #t "(%1), %%xmm4 \n\t" \
00387 "movdqa " #t "+16(%1), %%xmm5 \n\t"
00388
00389 #define FDCT_ROW_SSE2_H2(i,t) \
00390 "movq " #i "(%0), %%xmm2 \n\t" \
00391 "movq " #i "+8(%0), %%xmm0 \n\t" \
00392 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
00393 "movdqa " #t "+48(%1), %%xmm7 \n\t"
00394
00395 #define FDCT_ROW_SSE2(i) \
00396 "movq %%xmm2, %%xmm1 \n\t" \
00397 "pshuflw $27, %%xmm0, %%xmm0 \n\t" \
00398 "paddsw %%xmm0, %%xmm1 \n\t" \
00399 "psubsw %%xmm0, %%xmm2 \n\t" \
00400 "punpckldq %%xmm2, %%xmm1 \n\t" \
00401 "pshufd $78, %%xmm1, %%xmm2 \n\t" \
00402 "pmaddwd %%xmm2, %%xmm3 \n\t" \
00403 "pmaddwd %%xmm1, %%xmm7 \n\t" \
00404 "pmaddwd %%xmm5, %%xmm2 \n\t" \
00405 "pmaddwd %%xmm4, %%xmm1 \n\t" \
00406 "paddd %%xmm7, %%xmm3 \n\t" \
00407 "paddd %%xmm2, %%xmm1 \n\t" \
00408 "paddd %%xmm6, %%xmm3 \n\t" \
00409 "paddd %%xmm6, %%xmm1 \n\t" \
00410 "psrad %3, %%xmm3 \n\t" \
00411 "psrad %3, %%xmm1 \n\t" \
00412 "packssdw %%xmm3, %%xmm1 \n\t" \
00413 "movdqa %%xmm1, " #i "(%4) \n\t"
00414
00415 "movdqa (%2), %%xmm6 \n\t"
00416 FDCT_ROW_SSE2_H1(0,0)
00417 FDCT_ROW_SSE2(0)
00418 FDCT_ROW_SSE2_H2(64,0)
00419 FDCT_ROW_SSE2(64)
00420
00421 FDCT_ROW_SSE2_H1(16,64)
00422 FDCT_ROW_SSE2(16)
00423 FDCT_ROW_SSE2_H2(112,64)
00424 FDCT_ROW_SSE2(112)
00425
00426 FDCT_ROW_SSE2_H1(32,128)
00427 FDCT_ROW_SSE2(32)
00428 FDCT_ROW_SSE2_H2(96,128)
00429 FDCT_ROW_SSE2(96)
00430
00431 FDCT_ROW_SSE2_H1(48,192)
00432 FDCT_ROW_SSE2(48)
00433 FDCT_ROW_SSE2_H2(80,192)
00434 FDCT_ROW_SSE2(80)
00435 :
00436 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2),
00437 "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
00438 XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
00439 "%xmm4", "%xmm5", "%xmm6", "%xmm7")
00440 );
00441 }
00442
00443 static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
00444 {
00445 __asm__ volatile (
00446 "pshufw $0x1B, 8(%0), %%mm5 \n\t"
00447 "movq (%0), %%mm0 \n\t"
00448 "movq %%mm0, %%mm1 \n\t"
00449 "paddsw %%mm5, %%mm0 \n\t"
00450 "psubsw %%mm5, %%mm1 \n\t"
00451 "movq %%mm0, %%mm2 \n\t"
00452 "punpckldq %%mm1, %%mm0 \n\t"
00453 "punpckhdq %%mm1, %%mm2 \n\t"
00454 "movq (%1), %%mm1 \n\t"
00455 "movq 8(%1), %%mm3 \n\t"
00456 "movq 16(%1), %%mm4 \n\t"
00457 "movq 24(%1), %%mm5 \n\t"
00458 "movq 32(%1), %%mm6 \n\t"
00459 "movq 40(%1), %%mm7 \n\t"
00460 "pmaddwd %%mm0, %%mm1 \n\t"
00461 "pmaddwd %%mm2, %%mm3 \n\t"
00462 "pmaddwd %%mm0, %%mm4 \n\t"
00463 "pmaddwd %%mm2, %%mm5 \n\t"
00464 "pmaddwd %%mm0, %%mm6 \n\t"
00465 "pmaddwd %%mm2, %%mm7 \n\t"
00466 "pmaddwd 48(%1), %%mm0 \n\t"
00467 "pmaddwd 56(%1), %%mm2 \n\t"
00468 "paddd %%mm1, %%mm3 \n\t"
00469 "paddd %%mm4, %%mm5 \n\t"
00470 "paddd %%mm6, %%mm7 \n\t"
00471 "paddd %%mm0, %%mm2 \n\t"
00472 "movq (%2), %%mm0 \n\t"
00473 "paddd %%mm0, %%mm3 \n\t"
00474 "paddd %%mm0, %%mm5 \n\t"
00475 "paddd %%mm0, %%mm7 \n\t"
00476 "paddd %%mm0, %%mm2 \n\t"
00477 "psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
00478 "psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
00479 "psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
00480 "psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
00481 "packssdw %%mm5, %%mm3 \n\t"
00482 "packssdw %%mm2, %%mm7 \n\t"
00483 "movq %%mm3, (%3) \n\t"
00484 "movq %%mm7, 8(%3) \n\t"
00485 :
00486 : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
00487 }
00488
00489 static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
00490 {
00491
00492 __asm__ volatile(
00493 "movd 12(%0), %%mm1 \n\t"
00494 "punpcklwd 8(%0), %%mm1 \n\t"
00495 "movq %%mm1, %%mm2 \n\t"
00496 "psrlq $0x20, %%mm1 \n\t"
00497 "movq 0(%0), %%mm0 \n\t"
00498 "punpcklwd %%mm2, %%mm1 \n\t"
00499 "movq %%mm0, %%mm5 \n\t"
00500 "paddsw %%mm1, %%mm0 \n\t"
00501 "psubsw %%mm1, %%mm5 \n\t"
00502 "movq %%mm0, %%mm2 \n\t"
00503 "punpckldq %%mm5, %%mm0 \n\t"
00504 "punpckhdq %%mm5, %%mm2 \n\t"
00505 "movq 0(%1), %%mm1 \n\t"
00506 "movq 8(%1), %%mm3 \n\t"
00507 "movq 16(%1), %%mm4 \n\t"
00508 "movq 24(%1), %%mm5 \n\t"
00509 "movq 32(%1), %%mm6 \n\t"
00510 "movq 40(%1), %%mm7 \n\t"
00511 "pmaddwd %%mm0, %%mm1 \n\t"
00512 "pmaddwd %%mm2, %%mm3 \n\t"
00513 "pmaddwd %%mm0, %%mm4 \n\t"
00514 "pmaddwd %%mm2, %%mm5 \n\t"
00515 "pmaddwd %%mm0, %%mm6 \n\t"
00516 "pmaddwd %%mm2, %%mm7 \n\t"
00517 "pmaddwd 48(%1), %%mm0 \n\t"
00518 "pmaddwd 56(%1), %%mm2 \n\t"
00519 "paddd %%mm1, %%mm3 \n\t"
00520 "paddd %%mm4, %%mm5 \n\t"
00521 "paddd %%mm6, %%mm7 \n\t"
00522 "paddd %%mm0, %%mm2 \n\t"
00523 "movq (%2), %%mm0 \n\t"
00524 "paddd %%mm0, %%mm3 \n\t"
00525 "paddd %%mm0, %%mm5 \n\t"
00526 "paddd %%mm0, %%mm7 \n\t"
00527 "paddd %%mm0, %%mm2 \n\t"
00528 "psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
00529 "psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
00530 "psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
00531 "psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
00532 "packssdw %%mm5, %%mm3 \n\t"
00533 "packssdw %%mm2, %%mm7 \n\t"
00534 "movq %%mm3, 0(%3) \n\t"
00535 "movq %%mm7, 8(%3) \n\t"
00536 :
00537 : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
00538 }
00539
00540 void ff_fdct_mmx(int16_t *block)
00541 {
00542 DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
00543 int16_t * block1= (int16_t*)align_tmp;
00544 const int16_t *table= tab_frw_01234567;
00545 int i;
00546
00547 fdct_col_mmx(block, block1, 0);
00548 fdct_col_mmx(block, block1, 4);
00549
00550 for(i=8;i>0;i--) {
00551 fdct_row_mmx(block1, block, table);
00552 block1 += 8;
00553 table += 32;
00554 block += 8;
00555 }
00556 }
00557
00558 void ff_fdct_mmx2(int16_t *block)
00559 {
00560 DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
00561 int16_t *block1= (int16_t*)align_tmp;
00562 const int16_t *table= tab_frw_01234567;
00563 int i;
00564
00565 fdct_col_mmx(block, block1, 0);
00566 fdct_col_mmx(block, block1, 4);
00567
00568 for(i=8;i>0;i--) {
00569 fdct_row_mmx2(block1, block, table);
00570 block1 += 8;
00571 table += 32;
00572 block += 8;
00573 }
00574 }
00575
00576 void ff_fdct_sse2(int16_t *block)
00577 {
00578 DECLARE_ALIGNED(16, int64_t, align_tmp)[16];
00579 int16_t * const block1= (int16_t*)align_tmp;
00580
00581 fdct_col_sse2(block, block1, 0);
00582 fdct_row_sse2(block1, block);
00583 }
00584
00585 #endif