00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "libavutil/common.h"
00034 #include "libavcodec/dsputil.h"
00035
00037
00038
00039
00040
00041
00042
00043
00044
00046
00047 #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
00048 #define SHIFT_FRW_COL BITS_FRW_ACC
00049 #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
00050 #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
00051
00052
00053 #define X8(x) x,x,x,x,x,x,x,x
00054
00055
00056 DECLARE_ALIGNED(16, static const int16_t, fdct_tg_all_16)[24] = {
00057 X8(13036),
00058 X8(27146),
00059 X8(-21746)
00060 };
00061
00062 DECLARE_ALIGNED(16, static const int16_t, ocos_4_16)[8] = {
00063 X8(23170)
00064 };
00065
00066 DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
00067
00068 DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW };
00069
00070 static struct
00071 {
00072 DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
00073 } fdct_r_row_sse2 =
00074 {{
00075 RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW
00076 }};
00077
00078
00079 DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = {
00080 16384, 16384, 22725, 19266,
00081 16384, 16384, 12873, 4520,
00082 21407, 8867, 19266, -4520,
00083 -8867, -21407, -22725, -12873,
00084 16384, -16384, 12873, -22725,
00085 -16384, 16384, 4520, 19266,
00086 8867, -21407, 4520, -12873,
00087 21407, -8867, 19266, -22725,
00088
00089 22725, 22725, 31521, 26722,
00090 22725, 22725, 17855, 6270,
00091 29692, 12299, 26722, -6270,
00092 -12299, -29692, -31521, -17855,
00093 22725, -22725, 17855, -31521,
00094 -22725, 22725, 6270, 26722,
00095 12299, -29692, 6270, -17855,
00096 29692, -12299, 26722, -31521,
00097
00098 21407, 21407, 29692, 25172,
00099 21407, 21407, 16819, 5906,
00100 27969, 11585, 25172, -5906,
00101 -11585, -27969, -29692, -16819,
00102 21407, -21407, 16819, -29692,
00103 -21407, 21407, 5906, 25172,
00104 11585, -27969, 5906, -16819,
00105 27969, -11585, 25172, -29692,
00106
00107 19266, 19266, 26722, 22654,
00108 19266, 19266, 15137, 5315,
00109 25172, 10426, 22654, -5315,
00110 -10426, -25172, -26722, -15137,
00111 19266, -19266, 15137, -26722,
00112 -19266, 19266, 5315, 22654,
00113 10426, -25172, 5315, -15137,
00114 25172, -10426, 22654, -26722,
00115
00116 16384, 16384, 22725, 19266,
00117 16384, 16384, 12873, 4520,
00118 21407, 8867, 19266, -4520,
00119 -8867, -21407, -22725, -12873,
00120 16384, -16384, 12873, -22725,
00121 -16384, 16384, 4520, 19266,
00122 8867, -21407, 4520, -12873,
00123 21407, -8867, 19266, -22725,
00124
00125 19266, 19266, 26722, 22654,
00126 19266, 19266, 15137, 5315,
00127 25172, 10426, 22654, -5315,
00128 -10426, -25172, -26722, -15137,
00129 19266, -19266, 15137, -26722,
00130 -19266, 19266, 5315, 22654,
00131 10426, -25172, 5315, -15137,
00132 25172, -10426, 22654, -26722,
00133
00134 21407, 21407, 29692, 25172,
00135 21407, 21407, 16819, 5906,
00136 27969, 11585, 25172, -5906,
00137 -11585, -27969, -29692, -16819,
00138 21407, -21407, 16819, -29692,
00139 -21407, 21407, 5906, 25172,
00140 11585, -27969, 5906, -16819,
00141 27969, -11585, 25172, -29692,
00142
00143 22725, 22725, 31521, 26722,
00144 22725, 22725, 17855, 6270,
00145 29692, 12299, 26722, -6270,
00146 -12299, -29692, -31521, -17855,
00147 22725, -22725, 17855, -31521,
00148 -22725, 22725, 6270, 26722,
00149 12299, -29692, 6270, -17855,
00150 29692, -12299, 26722, -31521,
00151 };
00152
00153 static struct
00154 {
00155 DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
00156 } tab_frw_01234567_sse2 =
00157 {{
00158
00159 #define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \
00160 C4, C4, C5, C7, C2, C6, C3, -C7, \
00161 -C4, C4, C7, C3, C6, -C2, C7, -C5, \
00162 C4, -C4, C5, -C1, C2, -C6, C3, -C1,
00163
00164 #define C1 22725
00165 #define C2 21407
00166 #define C3 19266
00167 #define C4 16384
00168 #define C5 12873
00169 #define C6 8867
00170 #define C7 4520
00171 TABLE_SSE2
00172
00173 #undef C1
00174 #undef C2
00175 #undef C3
00176 #undef C4
00177 #undef C5
00178 #undef C6
00179 #undef C7
00180 #define C1 31521
00181 #define C2 29692
00182 #define C3 26722
00183 #define C4 22725
00184 #define C5 17855
00185 #define C6 12299
00186 #define C7 6270
00187 TABLE_SSE2
00188
00189 #undef C1
00190 #undef C2
00191 #undef C3
00192 #undef C4
00193 #undef C5
00194 #undef C6
00195 #undef C7
00196 #define C1 29692
00197 #define C2 27969
00198 #define C3 25172
00199 #define C4 21407
00200 #define C5 16819
00201 #define C6 11585
00202 #define C7 5906
00203 TABLE_SSE2
00204
00205 #undef C1
00206 #undef C2
00207 #undef C3
00208 #undef C4
00209 #undef C5
00210 #undef C6
00211 #undef C7
00212 #define C1 26722
00213 #define C2 25172
00214 #define C3 22654
00215 #define C4 19266
00216 #define C5 15137
00217 #define C6 10426
00218 #define C7 5315
00219 TABLE_SSE2
00220
00221 #undef C1
00222 #undef C2
00223 #undef C3
00224 #undef C4
00225 #undef C5
00226 #undef C6
00227 #undef C7
00228 #define C1 22725
00229 #define C2 21407
00230 #define C3 19266
00231 #define C4 16384
00232 #define C5 12873
00233 #define C6 8867
00234 #define C7 4520
00235 TABLE_SSE2
00236
00237 #undef C1
00238 #undef C2
00239 #undef C3
00240 #undef C4
00241 #undef C5
00242 #undef C6
00243 #undef C7
00244 #define C1 26722
00245 #define C2 25172
00246 #define C3 22654
00247 #define C4 19266
00248 #define C5 15137
00249 #define C6 10426
00250 #define C7 5315
00251 TABLE_SSE2
00252
00253 #undef C1
00254 #undef C2
00255 #undef C3
00256 #undef C4
00257 #undef C5
00258 #undef C6
00259 #undef C7
00260 #define C1 29692
00261 #define C2 27969
00262 #define C3 25172
00263 #define C4 21407
00264 #define C5 16819
00265 #define C6 11585
00266 #define C7 5906
00267 TABLE_SSE2
00268
00269 #undef C1
00270 #undef C2
00271 #undef C3
00272 #undef C4
00273 #undef C5
00274 #undef C6
00275 #undef C7
00276 #define C1 31521
00277 #define C2 29692
00278 #define C3 26722
00279 #define C4 22725
00280 #define C5 17855
00281 #define C6 12299
00282 #define C7 6270
00283 TABLE_SSE2
00284 }};
00285
00286 #define S(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
00287
00288 #define FDCT_COL(cpu, mm, mov)\
00289 static av_always_inline void fdct_col_##cpu(const int16_t *in, int16_t *out, int offset)\
00290 {\
00291 __asm__ volatile (\
00292 #mov" 16(%0), %%"#mm"0 \n\t" \
00293 #mov" 96(%0), %%"#mm"1 \n\t" \
00294 #mov" %%"#mm"0, %%"#mm"2 \n\t" \
00295 #mov" 32(%0), %%"#mm"3 \n\t" \
00296 "paddsw %%"#mm"1, %%"#mm"0 \n\t" \
00297 #mov" 80(%0), %%"#mm"4 \n\t" \
00298 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"0 \n\t" \
00299 #mov" (%0), %%"#mm"5 \n\t" \
00300 "paddsw %%"#mm"3, %%"#mm"4 \n\t" \
00301 "paddsw 112(%0), %%"#mm"5 \n\t" \
00302 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"4 \n\t" \
00303 #mov" %%"#mm"0, %%"#mm"6 \n\t" \
00304 "psubsw %%"#mm"1, %%"#mm"2 \n\t" \
00305 #mov" 16(%1), %%"#mm"1 \n\t" \
00306 "psubsw %%"#mm"4, %%"#mm"0 \n\t" \
00307 #mov" 48(%0), %%"#mm"7 \n\t" \
00308 "pmulhw %%"#mm"0, %%"#mm"1 \n\t" \
00309 "paddsw 64(%0), %%"#mm"7 \n\t" \
00310 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"5 \n\t" \
00311 "paddsw %%"#mm"4, %%"#mm"6 \n\t" \
00312 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"7 \n\t" \
00313 #mov" %%"#mm"5, %%"#mm"4 \n\t" \
00314 "psubsw %%"#mm"7, %%"#mm"5 \n\t" \
00315 "paddsw %%"#mm"5, %%"#mm"1 \n\t" \
00316 "paddsw %%"#mm"7, %%"#mm"4 \n\t" \
00317 "por (%2), %%"#mm"1 \n\t" \
00318 "psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"2 \n\t" \
00319 "pmulhw 16(%1), %%"#mm"5 \n\t" \
00320 #mov" %%"#mm"4, %%"#mm"7 \n\t" \
00321 "psubsw 80(%0), %%"#mm"3 \n\t" \
00322 "psubsw %%"#mm"6, %%"#mm"4 \n\t" \
00323 #mov" %%"#mm"1, 32(%3) \n\t" \
00324 "paddsw %%"#mm"6, %%"#mm"7 \n\t" \
00325 #mov" 48(%0), %%"#mm"1 \n\t" \
00326 "psllw $"S(SHIFT_FRW_COL)"+1, %%"#mm"3 \n\t" \
00327 "psubsw 64(%0), %%"#mm"1 \n\t" \
00328 #mov" %%"#mm"2, %%"#mm"6 \n\t" \
00329 #mov" %%"#mm"4, 64(%3) \n\t" \
00330 "paddsw %%"#mm"3, %%"#mm"2 \n\t" \
00331 "pmulhw (%4), %%"#mm"2 \n\t" \
00332 "psubsw %%"#mm"3, %%"#mm"6 \n\t" \
00333 "pmulhw (%4), %%"#mm"6 \n\t" \
00334 "psubsw %%"#mm"0, %%"#mm"5 \n\t" \
00335 "por (%2), %%"#mm"5 \n\t" \
00336 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"1 \n\t" \
00337 "por (%2), %%"#mm"2 \n\t" \
00338 #mov" %%"#mm"1, %%"#mm"4 \n\t" \
00339 #mov" (%0), %%"#mm"3 \n\t" \
00340 "paddsw %%"#mm"6, %%"#mm"1 \n\t" \
00341 "psubsw 112(%0), %%"#mm"3 \n\t" \
00342 "psubsw %%"#mm"6, %%"#mm"4 \n\t" \
00343 #mov" (%1), %%"#mm"0 \n\t" \
00344 "psllw $"S(SHIFT_FRW_COL)", %%"#mm"3 \n\t" \
00345 #mov" 32(%1), %%"#mm"6 \n\t" \
00346 "pmulhw %%"#mm"1, %%"#mm"0 \n\t" \
00347 #mov" %%"#mm"7, (%3) \n\t" \
00348 "pmulhw %%"#mm"4, %%"#mm"6 \n\t" \
00349 #mov" %%"#mm"5, 96(%3) \n\t" \
00350 #mov" %%"#mm"3, %%"#mm"7 \n\t" \
00351 #mov" 32(%1), %%"#mm"5 \n\t" \
00352 "psubsw %%"#mm"2, %%"#mm"7 \n\t" \
00353 "paddsw %%"#mm"2, %%"#mm"3 \n\t" \
00354 "pmulhw %%"#mm"7, %%"#mm"5 \n\t" \
00355 "paddsw %%"#mm"3, %%"#mm"0 \n\t" \
00356 "paddsw %%"#mm"4, %%"#mm"6 \n\t" \
00357 "pmulhw (%1), %%"#mm"3 \n\t" \
00358 "por (%2), %%"#mm"0 \n\t" \
00359 "paddsw %%"#mm"7, %%"#mm"5 \n\t" \
00360 "psubsw %%"#mm"6, %%"#mm"7 \n\t" \
00361 #mov" %%"#mm"0, 16(%3) \n\t" \
00362 "paddsw %%"#mm"4, %%"#mm"5 \n\t" \
00363 #mov" %%"#mm"7, 48(%3) \n\t" \
00364 "psubsw %%"#mm"1, %%"#mm"3 \n\t" \
00365 #mov" %%"#mm"5, 80(%3) \n\t" \
00366 #mov" %%"#mm"3, 112(%3) \n\t" \
00367 : \
00368 : "r" (in + offset), "r" (fdct_tg_all_16), "r" (fdct_one_corr), \
00369 "r" (out + offset), "r" (ocos_4_16)); \
00370 }
00371
00372 FDCT_COL(mmx, mm, movq)
00373 FDCT_COL(sse2, xmm, movdqa)
00374
00375 static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
00376 {
00377 __asm__ volatile(
00378 #define FDCT_ROW_SSE2_H1(i,t) \
00379 "movq " #i "(%0), %%xmm2 \n\t" \
00380 "movq " #i "+8(%0), %%xmm0 \n\t" \
00381 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
00382 "movdqa " #t "+48(%1), %%xmm7 \n\t" \
00383 "movdqa " #t "(%1), %%xmm4 \n\t" \
00384 "movdqa " #t "+16(%1), %%xmm5 \n\t"
00385
00386 #define FDCT_ROW_SSE2_H2(i,t) \
00387 "movq " #i "(%0), %%xmm2 \n\t" \
00388 "movq " #i "+8(%0), %%xmm0 \n\t" \
00389 "movdqa " #t "+32(%1), %%xmm3 \n\t" \
00390 "movdqa " #t "+48(%1), %%xmm7 \n\t"
00391
00392 #define FDCT_ROW_SSE2(i) \
00393 "movq %%xmm2, %%xmm1 \n\t" \
00394 "pshuflw $27, %%xmm0, %%xmm0 \n\t" \
00395 "paddsw %%xmm0, %%xmm1 \n\t" \
00396 "psubsw %%xmm0, %%xmm2 \n\t" \
00397 "punpckldq %%xmm2, %%xmm1 \n\t" \
00398 "pshufd $78, %%xmm1, %%xmm2 \n\t" \
00399 "pmaddwd %%xmm2, %%xmm3 \n\t" \
00400 "pmaddwd %%xmm1, %%xmm7 \n\t" \
00401 "pmaddwd %%xmm5, %%xmm2 \n\t" \
00402 "pmaddwd %%xmm4, %%xmm1 \n\t" \
00403 "paddd %%xmm7, %%xmm3 \n\t" \
00404 "paddd %%xmm2, %%xmm1 \n\t" \
00405 "paddd %%xmm6, %%xmm3 \n\t" \
00406 "paddd %%xmm6, %%xmm1 \n\t" \
00407 "psrad %3, %%xmm3 \n\t" \
00408 "psrad %3, %%xmm1 \n\t" \
00409 "packssdw %%xmm3, %%xmm1 \n\t" \
00410 "movdqa %%xmm1, " #i "(%4) \n\t"
00411
00412 "movdqa (%2), %%xmm6 \n\t"
00413 FDCT_ROW_SSE2_H1(0,0)
00414 FDCT_ROW_SSE2(0)
00415 FDCT_ROW_SSE2_H2(64,0)
00416 FDCT_ROW_SSE2(64)
00417
00418 FDCT_ROW_SSE2_H1(16,64)
00419 FDCT_ROW_SSE2(16)
00420 FDCT_ROW_SSE2_H2(112,64)
00421 FDCT_ROW_SSE2(112)
00422
00423 FDCT_ROW_SSE2_H1(32,128)
00424 FDCT_ROW_SSE2(32)
00425 FDCT_ROW_SSE2_H2(96,128)
00426 FDCT_ROW_SSE2(96)
00427
00428 FDCT_ROW_SSE2_H1(48,192)
00429 FDCT_ROW_SSE2(48)
00430 FDCT_ROW_SSE2_H2(80,192)
00431 FDCT_ROW_SSE2(80)
00432 :
00433 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
00434 );
00435 }
00436
00437 static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
00438 {
00439 __asm__ volatile (
00440 "pshufw $0x1B, 8(%0), %%mm5 \n\t"
00441 "movq (%0), %%mm0 \n\t"
00442 "movq %%mm0, %%mm1 \n\t"
00443 "paddsw %%mm5, %%mm0 \n\t"
00444 "psubsw %%mm5, %%mm1 \n\t"
00445 "movq %%mm0, %%mm2 \n\t"
00446 "punpckldq %%mm1, %%mm0 \n\t"
00447 "punpckhdq %%mm1, %%mm2 \n\t"
00448 "movq (%1), %%mm1 \n\t"
00449 "movq 8(%1), %%mm3 \n\t"
00450 "movq 16(%1), %%mm4 \n\t"
00451 "movq 24(%1), %%mm5 \n\t"
00452 "movq 32(%1), %%mm6 \n\t"
00453 "movq 40(%1), %%mm7 \n\t"
00454 "pmaddwd %%mm0, %%mm1 \n\t"
00455 "pmaddwd %%mm2, %%mm3 \n\t"
00456 "pmaddwd %%mm0, %%mm4 \n\t"
00457 "pmaddwd %%mm2, %%mm5 \n\t"
00458 "pmaddwd %%mm0, %%mm6 \n\t"
00459 "pmaddwd %%mm2, %%mm7 \n\t"
00460 "pmaddwd 48(%1), %%mm0 \n\t"
00461 "pmaddwd 56(%1), %%mm2 \n\t"
00462 "paddd %%mm1, %%mm3 \n\t"
00463 "paddd %%mm4, %%mm5 \n\t"
00464 "paddd %%mm6, %%mm7 \n\t"
00465 "paddd %%mm0, %%mm2 \n\t"
00466 "movq (%2), %%mm0 \n\t"
00467 "paddd %%mm0, %%mm3 \n\t"
00468 "paddd %%mm0, %%mm5 \n\t"
00469 "paddd %%mm0, %%mm7 \n\t"
00470 "paddd %%mm0, %%mm2 \n\t"
00471 "psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
00472 "psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
00473 "psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
00474 "psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
00475 "packssdw %%mm5, %%mm3 \n\t"
00476 "packssdw %%mm2, %%mm7 \n\t"
00477 "movq %%mm3, (%3) \n\t"
00478 "movq %%mm7, 8(%3) \n\t"
00479 :
00480 : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
00481 }
00482
00483 static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
00484 {
00485
00486 __asm__ volatile(
00487 "movd 12(%0), %%mm1 \n\t"
00488 "punpcklwd 8(%0), %%mm1 \n\t"
00489 "movq %%mm1, %%mm2 \n\t"
00490 "psrlq $0x20, %%mm1 \n\t"
00491 "movq 0(%0), %%mm0 \n\t"
00492 "punpcklwd %%mm2, %%mm1 \n\t"
00493 "movq %%mm0, %%mm5 \n\t"
00494 "paddsw %%mm1, %%mm0 \n\t"
00495 "psubsw %%mm1, %%mm5 \n\t"
00496 "movq %%mm0, %%mm2 \n\t"
00497 "punpckldq %%mm5, %%mm0 \n\t"
00498 "punpckhdq %%mm5, %%mm2 \n\t"
00499 "movq 0(%1), %%mm1 \n\t"
00500 "movq 8(%1), %%mm3 \n\t"
00501 "movq 16(%1), %%mm4 \n\t"
00502 "movq 24(%1), %%mm5 \n\t"
00503 "movq 32(%1), %%mm6 \n\t"
00504 "movq 40(%1), %%mm7 \n\t"
00505 "pmaddwd %%mm0, %%mm1 \n\t"
00506 "pmaddwd %%mm2, %%mm3 \n\t"
00507 "pmaddwd %%mm0, %%mm4 \n\t"
00508 "pmaddwd %%mm2, %%mm5 \n\t"
00509 "pmaddwd %%mm0, %%mm6 \n\t"
00510 "pmaddwd %%mm2, %%mm7 \n\t"
00511 "pmaddwd 48(%1), %%mm0 \n\t"
00512 "pmaddwd 56(%1), %%mm2 \n\t"
00513 "paddd %%mm1, %%mm3 \n\t"
00514 "paddd %%mm4, %%mm5 \n\t"
00515 "paddd %%mm6, %%mm7 \n\t"
00516 "paddd %%mm0, %%mm2 \n\t"
00517 "movq (%2), %%mm0 \n\t"
00518 "paddd %%mm0, %%mm3 \n\t"
00519 "paddd %%mm0, %%mm5 \n\t"
00520 "paddd %%mm0, %%mm7 \n\t"
00521 "paddd %%mm0, %%mm2 \n\t"
00522 "psrad $"S(SHIFT_FRW_ROW)", %%mm3 \n\t"
00523 "psrad $"S(SHIFT_FRW_ROW)", %%mm5 \n\t"
00524 "psrad $"S(SHIFT_FRW_ROW)", %%mm7 \n\t"
00525 "psrad $"S(SHIFT_FRW_ROW)", %%mm2 \n\t"
00526 "packssdw %%mm5, %%mm3 \n\t"
00527 "packssdw %%mm2, %%mm7 \n\t"
00528 "movq %%mm3, 0(%3) \n\t"
00529 "movq %%mm7, 8(%3) \n\t"
00530 :
00531 : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
00532 }
00533
00534 void ff_fdct_mmx(int16_t *block)
00535 {
00536 DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
00537 int16_t * block1= (int16_t*)align_tmp;
00538 const int16_t *table= tab_frw_01234567;
00539 int i;
00540
00541 fdct_col_mmx(block, block1, 0);
00542 fdct_col_mmx(block, block1, 4);
00543
00544 for(i=8;i>0;i--) {
00545 fdct_row_mmx(block1, block, table);
00546 block1 += 8;
00547 table += 32;
00548 block += 8;
00549 }
00550 }
00551
00552 void ff_fdct_mmx2(int16_t *block)
00553 {
00554 DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
00555 int16_t *block1= (int16_t*)align_tmp;
00556 const int16_t *table= tab_frw_01234567;
00557 int i;
00558
00559 fdct_col_mmx(block, block1, 0);
00560 fdct_col_mmx(block, block1, 4);
00561
00562 for(i=8;i>0;i--) {
00563 fdct_row_mmx2(block1, block, table);
00564 block1 += 8;
00565 table += 32;
00566 block += 8;
00567 }
00568 }
00569
00570 void ff_fdct_sse2(int16_t *block)
00571 {
00572 DECLARE_ALIGNED(16, int64_t, align_tmp)[16];
00573 int16_t * const block1= (int16_t*)align_tmp;
00574
00575 fdct_col_sse2(block, block1, 0);
00576 fdct_row_sse2(block1, block);
00577 }
00578