00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "libavcodec/dsputil.h"
00023 #include "libavcodec/simple_idct.h"
00024 #include "dsputil_mmx.h"
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00037 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00038 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00039 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00040 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
00041 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00042 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00043 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00044
00045 #define ROW_SHIFT 11
00046 #define COL_SHIFT 20 // 6
00047
00048 DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL;
00049 DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL;
00050
00051 DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
00052 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
00053
00054
00055 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
00056
00057
00058
00059
00060 C4, C4, C4, C4,
00061 C4, -C4, C4, -C4,
00062
00063 C2, C6, C2, C6,
00064 C6, -C2, C6, -C2,
00065
00066 C1, C3, C1, C3,
00067 C5, C7, C5, C7,
00068
00069 C3, -C7, C3, -C7,
00070 -C1, -C5, -C1, -C5,
00071
00072 C5, -C1, C5, -C1,
00073 C7, C3, C7, C3,
00074
00075 C7, -C5, C7, -C5,
00076 C3, -C1, C3, -C1
00077 };
00078
00079 static inline void idct(int16_t *block)
00080 {
00081 DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
00082 int16_t * const temp= (int16_t*)align_tmp;
00083
00084 __asm__ volatile(
00085 #if 0 //Alternative, simpler variant
00086
00087 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00088 "movq " #src0 ", %%mm0 \n\t" \
00089 "movq " #src4 ", %%mm1 \n\t" \
00090 "movq " #src1 ", %%mm2 \n\t" \
00091 "movq " #src5 ", %%mm3 \n\t" \
00092 "movq 16(%2), %%mm4 \n\t" \
00093 "pmaddwd %%mm0, %%mm4 \n\t" \
00094 "movq 24(%2), %%mm5 \n\t" \
00095 "pmaddwd %%mm5, %%mm0 \n\t" \
00096 "movq 32(%2), %%mm5 \n\t" \
00097 "pmaddwd %%mm1, %%mm5 \n\t" \
00098 "movq 40(%2), %%mm6 \n\t" \
00099 "pmaddwd %%mm6, %%mm1 \n\t" \
00100 "movq 48(%2), %%mm7 \n\t" \
00101 "pmaddwd %%mm2, %%mm7 \n\t" \
00102 #rounder ", %%mm4 \n\t"\
00103 "movq %%mm4, %%mm6 \n\t" \
00104 "paddd %%mm5, %%mm4 \n\t" \
00105 "psubd %%mm5, %%mm6 \n\t" \
00106 "movq 56(%2), %%mm5 \n\t" \
00107 "pmaddwd %%mm3, %%mm5 \n\t" \
00108 #rounder ", %%mm0 \n\t"\
00109 "paddd %%mm0, %%mm1 \n\t" \
00110 "paddd %%mm0, %%mm0 \n\t" \
00111 "psubd %%mm1, %%mm0 \n\t" \
00112 "pmaddwd 64(%2), %%mm2 \n\t" \
00113 "paddd %%mm5, %%mm7 \n\t" \
00114 "movq 72(%2), %%mm5 \n\t" \
00115 "pmaddwd %%mm3, %%mm5 \n\t" \
00116 "paddd %%mm4, %%mm7 \n\t" \
00117 "paddd %%mm4, %%mm4 \n\t" \
00118 "psubd %%mm7, %%mm4 \n\t" \
00119 "paddd %%mm2, %%mm5 \n\t" \
00120 "psrad $" #shift ", %%mm7 \n\t"\
00121 "psrad $" #shift ", %%mm4 \n\t"\
00122 "movq %%mm1, %%mm2 \n\t" \
00123 "paddd %%mm5, %%mm1 \n\t" \
00124 "psubd %%mm5, %%mm2 \n\t" \
00125 "psrad $" #shift ", %%mm1 \n\t"\
00126 "psrad $" #shift ", %%mm2 \n\t"\
00127 "packssdw %%mm1, %%mm7 \n\t" \
00128 "packssdw %%mm4, %%mm2 \n\t" \
00129 "movq %%mm7, " #dst " \n\t"\
00130 "movq " #src1 ", %%mm1 \n\t" \
00131 "movq 80(%2), %%mm4 \n\t" \
00132 "movq %%mm2, 24+" #dst " \n\t"\
00133 "pmaddwd %%mm1, %%mm4 \n\t" \
00134 "movq 88(%2), %%mm7 \n\t" \
00135 "pmaddwd 96(%2), %%mm1 \n\t" \
00136 "pmaddwd %%mm3, %%mm7 \n\t" \
00137 "movq %%mm0, %%mm2 \n\t" \
00138 "pmaddwd 104(%2), %%mm3 \n\t" \
00139 "paddd %%mm7, %%mm4 \n\t" \
00140 "paddd %%mm4, %%mm2 \n\t" \
00141 "psubd %%mm4, %%mm0 \n\t" \
00142 "psrad $" #shift ", %%mm2 \n\t"\
00143 "psrad $" #shift ", %%mm0 \n\t"\
00144 "movq %%mm6, %%mm4 \n\t" \
00145 "paddd %%mm1, %%mm3 \n\t" \
00146 "paddd %%mm3, %%mm6 \n\t" \
00147 "psubd %%mm3, %%mm4 \n\t" \
00148 "psrad $" #shift ", %%mm6 \n\t"\
00149 "packssdw %%mm6, %%mm2 \n\t" \
00150 "movq %%mm2, 8+" #dst " \n\t"\
00151 "psrad $" #shift ", %%mm4 \n\t"\
00152 "packssdw %%mm0, %%mm4 \n\t" \
00153 "movq %%mm4, 16+" #dst " \n\t"\
00154
00155 #define COL_IDCT(src0, src4, src1, src5, dst, shift) \
00156 "movq " #src0 ", %%mm0 \n\t" \
00157 "movq " #src4 ", %%mm1 \n\t" \
00158 "movq " #src1 ", %%mm2 \n\t" \
00159 "movq " #src5 ", %%mm3 \n\t" \
00160 "movq 16(%2), %%mm4 \n\t" \
00161 "pmaddwd %%mm0, %%mm4 \n\t" \
00162 "movq 24(%2), %%mm5 \n\t" \
00163 "pmaddwd %%mm5, %%mm0 \n\t" \
00164 "movq 32(%2), %%mm5 \n\t" \
00165 "pmaddwd %%mm1, %%mm5 \n\t" \
00166 "movq 40(%2), %%mm6 \n\t" \
00167 "pmaddwd %%mm6, %%mm1 \n\t" \
00168 "movq %%mm4, %%mm6 \n\t" \
00169 "movq 48(%2), %%mm7 \n\t" \
00170 "pmaddwd %%mm2, %%mm7 \n\t" \
00171 "paddd %%mm5, %%mm4 \n\t" \
00172 "psubd %%mm5, %%mm6 \n\t" \
00173 "movq %%mm0, %%mm5 \n\t" \
00174 "paddd %%mm1, %%mm0 \n\t" \
00175 "psubd %%mm1, %%mm5 \n\t" \
00176 "movq 56(%2), %%mm1 \n\t" \
00177 "pmaddwd %%mm3, %%mm1 \n\t" \
00178 "pmaddwd 64(%2), %%mm2 \n\t" \
00179 "paddd %%mm1, %%mm7 \n\t" \
00180 "movq 72(%2), %%mm1 \n\t" \
00181 "pmaddwd %%mm3, %%mm1 \n\t" \
00182 "paddd %%mm4, %%mm7 \n\t" \
00183 "paddd %%mm4, %%mm4 \n\t" \
00184 "psubd %%mm7, %%mm4 \n\t" \
00185 "paddd %%mm2, %%mm1 \n\t" \
00186 "psrad $" #shift ", %%mm7 \n\t"\
00187 "psrad $" #shift ", %%mm4 \n\t"\
00188 "movq %%mm0, %%mm2 \n\t" \
00189 "paddd %%mm1, %%mm0 \n\t" \
00190 "psubd %%mm1, %%mm2 \n\t" \
00191 "psrad $" #shift ", %%mm0 \n\t"\
00192 "psrad $" #shift ", %%mm2 \n\t"\
00193 "packssdw %%mm7, %%mm7 \n\t" \
00194 "movd %%mm7, " #dst " \n\t"\
00195 "packssdw %%mm0, %%mm0 \n\t" \
00196 "movd %%mm0, 16+" #dst " \n\t"\
00197 "packssdw %%mm2, %%mm2 \n\t" \
00198 "movd %%mm2, 96+" #dst " \n\t"\
00199 "packssdw %%mm4, %%mm4 \n\t" \
00200 "movd %%mm4, 112+" #dst " \n\t"\
00201 "movq " #src1 ", %%mm0 \n\t" \
00202 "movq 80(%2), %%mm4 \n\t" \
00203 "pmaddwd %%mm0, %%mm4 \n\t" \
00204 "movq 88(%2), %%mm7 \n\t" \
00205 "pmaddwd 96(%2), %%mm0 \n\t" \
00206 "pmaddwd %%mm3, %%mm7 \n\t" \
00207 "movq %%mm5, %%mm2 \n\t" \
00208 "pmaddwd 104(%2), %%mm3 \n\t" \
00209 "paddd %%mm7, %%mm4 \n\t" \
00210 "paddd %%mm4, %%mm2 \n\t" \
00211 "psubd %%mm4, %%mm5 \n\t" \
00212 "psrad $" #shift ", %%mm2 \n\t"\
00213 "psrad $" #shift ", %%mm5 \n\t"\
00214 "movq %%mm6, %%mm4 \n\t" \
00215 "paddd %%mm0, %%mm3 \n\t" \
00216 "paddd %%mm3, %%mm6 \n\t" \
00217 "psubd %%mm3, %%mm4 \n\t" \
00218 "psrad $" #shift ", %%mm6 \n\t"\
00219 "psrad $" #shift ", %%mm4 \n\t"\
00220 "packssdw %%mm2, %%mm2 \n\t" \
00221 "packssdw %%mm6, %%mm6 \n\t" \
00222 "movd %%mm2, 32+" #dst " \n\t"\
00223 "packssdw %%mm4, %%mm4 \n\t" \
00224 "packssdw %%mm5, %%mm5 \n\t" \
00225 "movd %%mm6, 48+" #dst " \n\t"\
00226 "movd %%mm4, 64+" #dst " \n\t"\
00227 "movd %%mm5, 80+" #dst " \n\t"\
00228
00229
00230 #define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00231 "movq " #src0 ", %%mm0 \n\t" \
00232 "movq " #src4 ", %%mm1 \n\t" \
00233 "movq " #src1 ", %%mm2 \n\t" \
00234 "movq " #src5 ", %%mm3 \n\t" \
00235 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
00236 "pand %%mm0, %%mm4 \n\t"\
00237 "por %%mm1, %%mm4 \n\t"\
00238 "por %%mm2, %%mm4 \n\t"\
00239 "por %%mm3, %%mm4 \n\t"\
00240 "packssdw %%mm4,%%mm4 \n\t"\
00241 "movd %%mm4, %%eax \n\t"\
00242 "orl %%eax, %%eax \n\t"\
00243 "jz 1f \n\t"\
00244 "movq 16(%2), %%mm4 \n\t" \
00245 "pmaddwd %%mm0, %%mm4 \n\t" \
00246 "movq 24(%2), %%mm5 \n\t" \
00247 "pmaddwd %%mm5, %%mm0 \n\t" \
00248 "movq 32(%2), %%mm5 \n\t" \
00249 "pmaddwd %%mm1, %%mm5 \n\t" \
00250 "movq 40(%2), %%mm6 \n\t" \
00251 "pmaddwd %%mm6, %%mm1 \n\t" \
00252 "movq 48(%2), %%mm7 \n\t" \
00253 "pmaddwd %%mm2, %%mm7 \n\t" \
00254 #rounder ", %%mm4 \n\t"\
00255 "movq %%mm4, %%mm6 \n\t" \
00256 "paddd %%mm5, %%mm4 \n\t" \
00257 "psubd %%mm5, %%mm6 \n\t" \
00258 "movq 56(%2), %%mm5 \n\t" \
00259 "pmaddwd %%mm3, %%mm5 \n\t" \
00260 #rounder ", %%mm0 \n\t"\
00261 "paddd %%mm0, %%mm1 \n\t" \
00262 "paddd %%mm0, %%mm0 \n\t" \
00263 "psubd %%mm1, %%mm0 \n\t" \
00264 "pmaddwd 64(%2), %%mm2 \n\t" \
00265 "paddd %%mm5, %%mm7 \n\t" \
00266 "movq 72(%2), %%mm5 \n\t" \
00267 "pmaddwd %%mm3, %%mm5 \n\t" \
00268 "paddd %%mm4, %%mm7 \n\t" \
00269 "paddd %%mm4, %%mm4 \n\t" \
00270 "psubd %%mm7, %%mm4 \n\t" \
00271 "paddd %%mm2, %%mm5 \n\t" \
00272 "psrad $" #shift ", %%mm7 \n\t"\
00273 "psrad $" #shift ", %%mm4 \n\t"\
00274 "movq %%mm1, %%mm2 \n\t" \
00275 "paddd %%mm5, %%mm1 \n\t" \
00276 "psubd %%mm5, %%mm2 \n\t" \
00277 "psrad $" #shift ", %%mm1 \n\t"\
00278 "psrad $" #shift ", %%mm2 \n\t"\
00279 "packssdw %%mm1, %%mm7 \n\t" \
00280 "packssdw %%mm4, %%mm2 \n\t" \
00281 "movq %%mm7, " #dst " \n\t"\
00282 "movq " #src1 ", %%mm1 \n\t" \
00283 "movq 80(%2), %%mm4 \n\t" \
00284 "movq %%mm2, 24+" #dst " \n\t"\
00285 "pmaddwd %%mm1, %%mm4 \n\t" \
00286 "movq 88(%2), %%mm7 \n\t" \
00287 "pmaddwd 96(%2), %%mm1 \n\t" \
00288 "pmaddwd %%mm3, %%mm7 \n\t" \
00289 "movq %%mm0, %%mm2 \n\t" \
00290 "pmaddwd 104(%2), %%mm3 \n\t" \
00291 "paddd %%mm7, %%mm4 \n\t" \
00292 "paddd %%mm4, %%mm2 \n\t" \
00293 "psubd %%mm4, %%mm0 \n\t" \
00294 "psrad $" #shift ", %%mm2 \n\t"\
00295 "psrad $" #shift ", %%mm0 \n\t"\
00296 "movq %%mm6, %%mm4 \n\t" \
00297 "paddd %%mm1, %%mm3 \n\t" \
00298 "paddd %%mm3, %%mm6 \n\t" \
00299 "psubd %%mm3, %%mm4 \n\t" \
00300 "psrad $" #shift ", %%mm6 \n\t"\
00301 "packssdw %%mm6, %%mm2 \n\t" \
00302 "movq %%mm2, 8+" #dst " \n\t"\
00303 "psrad $" #shift ", %%mm4 \n\t"\
00304 "packssdw %%mm0, %%mm4 \n\t" \
00305 "movq %%mm4, 16+" #dst " \n\t"\
00306 "jmp 2f \n\t"\
00307 "1: \n\t"\
00308 "pslld $16, %%mm0 \n\t"\
00309 "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
00310 "psrad $13, %%mm0 \n\t"\
00311 "packssdw %%mm0, %%mm0 \n\t"\
00312 "movq %%mm0, " #dst " \n\t"\
00313 "movq %%mm0, 8+" #dst " \n\t"\
00314 "movq %%mm0, 16+" #dst " \n\t"\
00315 "movq %%mm0, 24+" #dst " \n\t"\
00316 "2: \n\t"
00317
00318
00319
00320 ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
00321
00322
00323
00324
00325 DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
00326 DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
00327 DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
00328
00329
00330
00331 COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00332 COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00333 COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00334 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00335
00336 #else
00337
00338 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00339 "movq " #src0 ", %%mm0 \n\t" \
00340 "movq " #src4 ", %%mm1 \n\t" \
00341 "movq " #src1 ", %%mm2 \n\t" \
00342 "movq " #src5 ", %%mm3 \n\t" \
00343 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
00344 "pand %%mm0, %%mm4 \n\t"\
00345 "por %%mm1, %%mm4 \n\t"\
00346 "por %%mm2, %%mm4 \n\t"\
00347 "por %%mm3, %%mm4 \n\t"\
00348 "packssdw %%mm4,%%mm4 \n\t"\
00349 "movd %%mm4, %%eax \n\t"\
00350 "orl %%eax, %%eax \n\t"\
00351 "jz 1f \n\t"\
00352 "movq 16(%2), %%mm4 \n\t" \
00353 "pmaddwd %%mm0, %%mm4 \n\t" \
00354 "movq 24(%2), %%mm5 \n\t" \
00355 "pmaddwd %%mm5, %%mm0 \n\t" \
00356 "movq 32(%2), %%mm5 \n\t" \
00357 "pmaddwd %%mm1, %%mm5 \n\t" \
00358 "movq 40(%2), %%mm6 \n\t" \
00359 "pmaddwd %%mm6, %%mm1 \n\t" \
00360 "movq 48(%2), %%mm7 \n\t" \
00361 "pmaddwd %%mm2, %%mm7 \n\t" \
00362 #rounder ", %%mm4 \n\t"\
00363 "movq %%mm4, %%mm6 \n\t" \
00364 "paddd %%mm5, %%mm4 \n\t" \
00365 "psubd %%mm5, %%mm6 \n\t" \
00366 "movq 56(%2), %%mm5 \n\t" \
00367 "pmaddwd %%mm3, %%mm5 \n\t" \
00368 #rounder ", %%mm0 \n\t"\
00369 "paddd %%mm0, %%mm1 \n\t" \
00370 "paddd %%mm0, %%mm0 \n\t" \
00371 "psubd %%mm1, %%mm0 \n\t" \
00372 "pmaddwd 64(%2), %%mm2 \n\t" \
00373 "paddd %%mm5, %%mm7 \n\t" \
00374 "movq 72(%2), %%mm5 \n\t" \
00375 "pmaddwd %%mm3, %%mm5 \n\t" \
00376 "paddd %%mm4, %%mm7 \n\t" \
00377 "paddd %%mm4, %%mm4 \n\t" \
00378 "psubd %%mm7, %%mm4 \n\t" \
00379 "paddd %%mm2, %%mm5 \n\t" \
00380 "psrad $" #shift ", %%mm7 \n\t"\
00381 "psrad $" #shift ", %%mm4 \n\t"\
00382 "movq %%mm1, %%mm2 \n\t" \
00383 "paddd %%mm5, %%mm1 \n\t" \
00384 "psubd %%mm5, %%mm2 \n\t" \
00385 "psrad $" #shift ", %%mm1 \n\t"\
00386 "psrad $" #shift ", %%mm2 \n\t"\
00387 "packssdw %%mm1, %%mm7 \n\t" \
00388 "packssdw %%mm4, %%mm2 \n\t" \
00389 "movq %%mm7, " #dst " \n\t"\
00390 "movq " #src1 ", %%mm1 \n\t" \
00391 "movq 80(%2), %%mm4 \n\t" \
00392 "movq %%mm2, 24+" #dst " \n\t"\
00393 "pmaddwd %%mm1, %%mm4 \n\t" \
00394 "movq 88(%2), %%mm7 \n\t" \
00395 "pmaddwd 96(%2), %%mm1 \n\t" \
00396 "pmaddwd %%mm3, %%mm7 \n\t" \
00397 "movq %%mm0, %%mm2 \n\t" \
00398 "pmaddwd 104(%2), %%mm3 \n\t" \
00399 "paddd %%mm7, %%mm4 \n\t" \
00400 "paddd %%mm4, %%mm2 \n\t" \
00401 "psubd %%mm4, %%mm0 \n\t" \
00402 "psrad $" #shift ", %%mm2 \n\t"\
00403 "psrad $" #shift ", %%mm0 \n\t"\
00404 "movq %%mm6, %%mm4 \n\t" \
00405 "paddd %%mm1, %%mm3 \n\t" \
00406 "paddd %%mm3, %%mm6 \n\t" \
00407 "psubd %%mm3, %%mm4 \n\t" \
00408 "psrad $" #shift ", %%mm6 \n\t"\
00409 "packssdw %%mm6, %%mm2 \n\t" \
00410 "movq %%mm2, 8+" #dst " \n\t"\
00411 "psrad $" #shift ", %%mm4 \n\t"\
00412 "packssdw %%mm0, %%mm4 \n\t" \
00413 "movq %%mm4, 16+" #dst " \n\t"\
00414 "jmp 2f \n\t"\
00415 "1: \n\t"\
00416 "pslld $16, %%mm0 \n\t"\
00417 "paddd "MANGLE(d40000)", %%mm0 \n\t"\
00418 "psrad $13, %%mm0 \n\t"\
00419 "packssdw %%mm0, %%mm0 \n\t"\
00420 "movq %%mm0, " #dst " \n\t"\
00421 "movq %%mm0, 8+" #dst " \n\t"\
00422 "movq %%mm0, 16+" #dst " \n\t"\
00423 "movq %%mm0, 24+" #dst " \n\t"\
00424 "2: \n\t"
00425
00426 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
00427 "movq " #src0 ", %%mm0 \n\t" \
00428 "movq " #src4 ", %%mm1 \n\t" \
00429 "movq " #src1 ", %%mm2 \n\t" \
00430 "movq " #src5 ", %%mm3 \n\t" \
00431 "movq %%mm0, %%mm4 \n\t"\
00432 "por %%mm1, %%mm4 \n\t"\
00433 "por %%mm2, %%mm4 \n\t"\
00434 "por %%mm3, %%mm4 \n\t"\
00435 "packssdw %%mm4,%%mm4 \n\t"\
00436 "movd %%mm4, %%eax \n\t"\
00437 "orl %%eax, %%eax \n\t"\
00438 "jz " #bt " \n\t"\
00439 "movq 16(%2), %%mm4 \n\t" \
00440 "pmaddwd %%mm0, %%mm4 \n\t" \
00441 "movq 24(%2), %%mm5 \n\t" \
00442 "pmaddwd %%mm5, %%mm0 \n\t" \
00443 "movq 32(%2), %%mm5 \n\t" \
00444 "pmaddwd %%mm1, %%mm5 \n\t" \
00445 "movq 40(%2), %%mm6 \n\t" \
00446 "pmaddwd %%mm6, %%mm1 \n\t" \
00447 "movq 48(%2), %%mm7 \n\t" \
00448 "pmaddwd %%mm2, %%mm7 \n\t" \
00449 #rounder ", %%mm4 \n\t"\
00450 "movq %%mm4, %%mm6 \n\t" \
00451 "paddd %%mm5, %%mm4 \n\t" \
00452 "psubd %%mm5, %%mm6 \n\t" \
00453 "movq 56(%2), %%mm5 \n\t" \
00454 "pmaddwd %%mm3, %%mm5 \n\t" \
00455 #rounder ", %%mm0 \n\t"\
00456 "paddd %%mm0, %%mm1 \n\t" \
00457 "paddd %%mm0, %%mm0 \n\t" \
00458 "psubd %%mm1, %%mm0 \n\t" \
00459 "pmaddwd 64(%2), %%mm2 \n\t" \
00460 "paddd %%mm5, %%mm7 \n\t" \
00461 "movq 72(%2), %%mm5 \n\t" \
00462 "pmaddwd %%mm3, %%mm5 \n\t" \
00463 "paddd %%mm4, %%mm7 \n\t" \
00464 "paddd %%mm4, %%mm4 \n\t" \
00465 "psubd %%mm7, %%mm4 \n\t" \
00466 "paddd %%mm2, %%mm5 \n\t" \
00467 "psrad $" #shift ", %%mm7 \n\t"\
00468 "psrad $" #shift ", %%mm4 \n\t"\
00469 "movq %%mm1, %%mm2 \n\t" \
00470 "paddd %%mm5, %%mm1 \n\t" \
00471 "psubd %%mm5, %%mm2 \n\t" \
00472 "psrad $" #shift ", %%mm1 \n\t"\
00473 "psrad $" #shift ", %%mm2 \n\t"\
00474 "packssdw %%mm1, %%mm7 \n\t" \
00475 "packssdw %%mm4, %%mm2 \n\t" \
00476 "movq %%mm7, " #dst " \n\t"\
00477 "movq " #src1 ", %%mm1 \n\t" \
00478 "movq 80(%2), %%mm4 \n\t" \
00479 "movq %%mm2, 24+" #dst " \n\t"\
00480 "pmaddwd %%mm1, %%mm4 \n\t" \
00481 "movq 88(%2), %%mm7 \n\t" \
00482 "pmaddwd 96(%2), %%mm1 \n\t" \
00483 "pmaddwd %%mm3, %%mm7 \n\t" \
00484 "movq %%mm0, %%mm2 \n\t" \
00485 "pmaddwd 104(%2), %%mm3 \n\t" \
00486 "paddd %%mm7, %%mm4 \n\t" \
00487 "paddd %%mm4, %%mm2 \n\t" \
00488 "psubd %%mm4, %%mm0 \n\t" \
00489 "psrad $" #shift ", %%mm2 \n\t"\
00490 "psrad $" #shift ", %%mm0 \n\t"\
00491 "movq %%mm6, %%mm4 \n\t" \
00492 "paddd %%mm1, %%mm3 \n\t" \
00493 "paddd %%mm3, %%mm6 \n\t" \
00494 "psubd %%mm3, %%mm4 \n\t" \
00495 "psrad $" #shift ", %%mm6 \n\t"\
00496 "packssdw %%mm6, %%mm2 \n\t" \
00497 "movq %%mm2, 8+" #dst " \n\t"\
00498 "psrad $" #shift ", %%mm4 \n\t"\
00499 "packssdw %%mm0, %%mm4 \n\t" \
00500 "movq %%mm4, 16+" #dst " \n\t"\
00501
00502 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00503 "movq " #src0 ", %%mm0 \n\t" \
00504 "movq " #src4 ", %%mm1 \n\t" \
00505 "movq " #src1 ", %%mm2 \n\t" \
00506 "movq " #src5 ", %%mm3 \n\t" \
00507 "movq 16(%2), %%mm4 \n\t" \
00508 "pmaddwd %%mm0, %%mm4 \n\t" \
00509 "movq 24(%2), %%mm5 \n\t" \
00510 "pmaddwd %%mm5, %%mm0 \n\t" \
00511 "movq 32(%2), %%mm5 \n\t" \
00512 "pmaddwd %%mm1, %%mm5 \n\t" \
00513 "movq 40(%2), %%mm6 \n\t" \
00514 "pmaddwd %%mm6, %%mm1 \n\t" \
00515 "movq 48(%2), %%mm7 \n\t" \
00516 "pmaddwd %%mm2, %%mm7 \n\t" \
00517 #rounder ", %%mm4 \n\t"\
00518 "movq %%mm4, %%mm6 \n\t" \
00519 "paddd %%mm5, %%mm4 \n\t" \
00520 "psubd %%mm5, %%mm6 \n\t" \
00521 "movq 56(%2), %%mm5 \n\t" \
00522 "pmaddwd %%mm3, %%mm5 \n\t" \
00523 #rounder ", %%mm0 \n\t"\
00524 "paddd %%mm0, %%mm1 \n\t" \
00525 "paddd %%mm0, %%mm0 \n\t" \
00526 "psubd %%mm1, %%mm0 \n\t" \
00527 "pmaddwd 64(%2), %%mm2 \n\t" \
00528 "paddd %%mm5, %%mm7 \n\t" \
00529 "movq 72(%2), %%mm5 \n\t" \
00530 "pmaddwd %%mm3, %%mm5 \n\t" \
00531 "paddd %%mm4, %%mm7 \n\t" \
00532 "paddd %%mm4, %%mm4 \n\t" \
00533 "psubd %%mm7, %%mm4 \n\t" \
00534 "paddd %%mm2, %%mm5 \n\t" \
00535 "psrad $" #shift ", %%mm7 \n\t"\
00536 "psrad $" #shift ", %%mm4 \n\t"\
00537 "movq %%mm1, %%mm2 \n\t" \
00538 "paddd %%mm5, %%mm1 \n\t" \
00539 "psubd %%mm5, %%mm2 \n\t" \
00540 "psrad $" #shift ", %%mm1 \n\t"\
00541 "psrad $" #shift ", %%mm2 \n\t"\
00542 "packssdw %%mm1, %%mm7 \n\t" \
00543 "packssdw %%mm4, %%mm2 \n\t" \
00544 "movq %%mm7, " #dst " \n\t"\
00545 "movq " #src1 ", %%mm1 \n\t" \
00546 "movq 80(%2), %%mm4 \n\t" \
00547 "movq %%mm2, 24+" #dst " \n\t"\
00548 "pmaddwd %%mm1, %%mm4 \n\t" \
00549 "movq 88(%2), %%mm7 \n\t" \
00550 "pmaddwd 96(%2), %%mm1 \n\t" \
00551 "pmaddwd %%mm3, %%mm7 \n\t" \
00552 "movq %%mm0, %%mm2 \n\t" \
00553 "pmaddwd 104(%2), %%mm3 \n\t" \
00554 "paddd %%mm7, %%mm4 \n\t" \
00555 "paddd %%mm4, %%mm2 \n\t" \
00556 "psubd %%mm4, %%mm0 \n\t" \
00557 "psrad $" #shift ", %%mm2 \n\t"\
00558 "psrad $" #shift ", %%mm0 \n\t"\
00559 "movq %%mm6, %%mm4 \n\t" \
00560 "paddd %%mm1, %%mm3 \n\t" \
00561 "paddd %%mm3, %%mm6 \n\t" \
00562 "psubd %%mm3, %%mm4 \n\t" \
00563 "psrad $" #shift ", %%mm6 \n\t"\
00564 "packssdw %%mm6, %%mm2 \n\t" \
00565 "movq %%mm2, 8+" #dst " \n\t"\
00566 "psrad $" #shift ", %%mm4 \n\t"\
00567 "packssdw %%mm0, %%mm4 \n\t" \
00568 "movq %%mm4, 16+" #dst " \n\t"\
00569
00570
00571 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
00572 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
00573 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
00574 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
00575
00576 #undef IDCT
00577 #define IDCT(src0, src4, src1, src5, dst, shift) \
00578 "movq " #src0 ", %%mm0 \n\t" \
00579 "movq " #src4 ", %%mm1 \n\t" \
00580 "movq " #src1 ", %%mm2 \n\t" \
00581 "movq " #src5 ", %%mm3 \n\t" \
00582 "movq 16(%2), %%mm4 \n\t" \
00583 "pmaddwd %%mm0, %%mm4 \n\t" \
00584 "movq 24(%2), %%mm5 \n\t" \
00585 "pmaddwd %%mm5, %%mm0 \n\t" \
00586 "movq 32(%2), %%mm5 \n\t" \
00587 "pmaddwd %%mm1, %%mm5 \n\t" \
00588 "movq 40(%2), %%mm6 \n\t" \
00589 "pmaddwd %%mm6, %%mm1 \n\t" \
00590 "movq %%mm4, %%mm6 \n\t" \
00591 "movq 48(%2), %%mm7 \n\t" \
00592 "pmaddwd %%mm2, %%mm7 \n\t" \
00593 "paddd %%mm5, %%mm4 \n\t" \
00594 "psubd %%mm5, %%mm6 \n\t" \
00595 "movq %%mm0, %%mm5 \n\t" \
00596 "paddd %%mm1, %%mm0 \n\t" \
00597 "psubd %%mm1, %%mm5 \n\t" \
00598 "movq 56(%2), %%mm1 \n\t" \
00599 "pmaddwd %%mm3, %%mm1 \n\t" \
00600 "pmaddwd 64(%2), %%mm2 \n\t" \
00601 "paddd %%mm1, %%mm7 \n\t" \
00602 "movq 72(%2), %%mm1 \n\t" \
00603 "pmaddwd %%mm3, %%mm1 \n\t" \
00604 "paddd %%mm4, %%mm7 \n\t" \
00605 "paddd %%mm4, %%mm4 \n\t" \
00606 "psubd %%mm7, %%mm4 \n\t" \
00607 "paddd %%mm2, %%mm1 \n\t" \
00608 "psrad $" #shift ", %%mm7 \n\t"\
00609 "psrad $" #shift ", %%mm4 \n\t"\
00610 "movq %%mm0, %%mm2 \n\t" \
00611 "paddd %%mm1, %%mm0 \n\t" \
00612 "psubd %%mm1, %%mm2 \n\t" \
00613 "psrad $" #shift ", %%mm0 \n\t"\
00614 "psrad $" #shift ", %%mm2 \n\t"\
00615 "packssdw %%mm7, %%mm7 \n\t" \
00616 "movd %%mm7, " #dst " \n\t"\
00617 "packssdw %%mm0, %%mm0 \n\t" \
00618 "movd %%mm0, 16+" #dst " \n\t"\
00619 "packssdw %%mm2, %%mm2 \n\t" \
00620 "movd %%mm2, 96+" #dst " \n\t"\
00621 "packssdw %%mm4, %%mm4 \n\t" \
00622 "movd %%mm4, 112+" #dst " \n\t"\
00623 "movq " #src1 ", %%mm0 \n\t" \
00624 "movq 80(%2), %%mm4 \n\t" \
00625 "pmaddwd %%mm0, %%mm4 \n\t" \
00626 "movq 88(%2), %%mm7 \n\t" \
00627 "pmaddwd 96(%2), %%mm0 \n\t" \
00628 "pmaddwd %%mm3, %%mm7 \n\t" \
00629 "movq %%mm5, %%mm2 \n\t" \
00630 "pmaddwd 104(%2), %%mm3 \n\t" \
00631 "paddd %%mm7, %%mm4 \n\t" \
00632 "paddd %%mm4, %%mm2 \n\t" \
00633 "psubd %%mm4, %%mm5 \n\t" \
00634 "psrad $" #shift ", %%mm2 \n\t"\
00635 "psrad $" #shift ", %%mm5 \n\t"\
00636 "movq %%mm6, %%mm4 \n\t" \
00637 "paddd %%mm0, %%mm3 \n\t" \
00638 "paddd %%mm3, %%mm6 \n\t" \
00639 "psubd %%mm3, %%mm4 \n\t" \
00640 "psrad $" #shift ", %%mm6 \n\t"\
00641 "psrad $" #shift ", %%mm4 \n\t"\
00642 "packssdw %%mm2, %%mm2 \n\t" \
00643 "packssdw %%mm6, %%mm6 \n\t" \
00644 "movd %%mm2, 32+" #dst " \n\t"\
00645 "packssdw %%mm4, %%mm4 \n\t" \
00646 "packssdw %%mm5, %%mm5 \n\t" \
00647 "movd %%mm6, 48+" #dst " \n\t"\
00648 "movd %%mm4, 64+" #dst " \n\t"\
00649 "movd %%mm5, 80+" #dst " \n\t"
00650
00651
00652
00653 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00654 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00655 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00656 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00657 "jmp 9f \n\t"
00658
00659 "# .p2align 4 \n\t"\
00660 "4: \n\t"
00661 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
00662 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
00663
00664 #undef IDCT
00665 #define IDCT(src0, src4, src1, src5, dst, shift) \
00666 "movq " #src0 ", %%mm0 \n\t" \
00667 "movq " #src4 ", %%mm1 \n\t" \
00668 "movq " #src5 ", %%mm3 \n\t" \
00669 "movq 16(%2), %%mm4 \n\t" \
00670 "pmaddwd %%mm0, %%mm4 \n\t" \
00671 "movq 24(%2), %%mm5 \n\t" \
00672 "pmaddwd %%mm5, %%mm0 \n\t" \
00673 "movq 32(%2), %%mm5 \n\t" \
00674 "pmaddwd %%mm1, %%mm5 \n\t" \
00675 "movq 40(%2), %%mm6 \n\t" \
00676 "pmaddwd %%mm6, %%mm1 \n\t" \
00677 "movq %%mm4, %%mm6 \n\t" \
00678 "paddd %%mm5, %%mm4 \n\t" \
00679 "psubd %%mm5, %%mm6 \n\t" \
00680 "movq %%mm0, %%mm5 \n\t" \
00681 "paddd %%mm1, %%mm0 \n\t" \
00682 "psubd %%mm1, %%mm5 \n\t" \
00683 "movq 56(%2), %%mm1 \n\t" \
00684 "pmaddwd %%mm3, %%mm1 \n\t" \
00685 "movq 72(%2), %%mm7 \n\t" \
00686 "pmaddwd %%mm3, %%mm7 \n\t" \
00687 "paddd %%mm4, %%mm1 \n\t" \
00688 "paddd %%mm4, %%mm4 \n\t" \
00689 "psubd %%mm1, %%mm4 \n\t" \
00690 "psrad $" #shift ", %%mm1 \n\t"\
00691 "psrad $" #shift ", %%mm4 \n\t"\
00692 "movq %%mm0, %%mm2 \n\t" \
00693 "paddd %%mm7, %%mm0 \n\t" \
00694 "psubd %%mm7, %%mm2 \n\t" \
00695 "psrad $" #shift ", %%mm0 \n\t"\
00696 "psrad $" #shift ", %%mm2 \n\t"\
00697 "packssdw %%mm1, %%mm1 \n\t" \
00698 "movd %%mm1, " #dst " \n\t"\
00699 "packssdw %%mm0, %%mm0 \n\t" \
00700 "movd %%mm0, 16+" #dst " \n\t"\
00701 "packssdw %%mm2, %%mm2 \n\t" \
00702 "movd %%mm2, 96+" #dst " \n\t"\
00703 "packssdw %%mm4, %%mm4 \n\t" \
00704 "movd %%mm4, 112+" #dst " \n\t"\
00705 "movq 88(%2), %%mm1 \n\t" \
00706 "pmaddwd %%mm3, %%mm1 \n\t" \
00707 "movq %%mm5, %%mm2 \n\t" \
00708 "pmaddwd 104(%2), %%mm3 \n\t" \
00709 "paddd %%mm1, %%mm2 \n\t" \
00710 "psubd %%mm1, %%mm5 \n\t" \
00711 "psrad $" #shift ", %%mm2 \n\t"\
00712 "psrad $" #shift ", %%mm5 \n\t"\
00713 "movq %%mm6, %%mm1 \n\t" \
00714 "paddd %%mm3, %%mm6 \n\t" \
00715 "psubd %%mm3, %%mm1 \n\t" \
00716 "psrad $" #shift ", %%mm6 \n\t"\
00717 "psrad $" #shift ", %%mm1 \n\t"\
00718 "packssdw %%mm2, %%mm2 \n\t" \
00719 "packssdw %%mm6, %%mm6 \n\t" \
00720 "movd %%mm2, 32+" #dst " \n\t"\
00721 "packssdw %%mm1, %%mm1 \n\t" \
00722 "packssdw %%mm5, %%mm5 \n\t" \
00723 "movd %%mm6, 48+" #dst " \n\t"\
00724 "movd %%mm1, 64+" #dst " \n\t"\
00725 "movd %%mm5, 80+" #dst " \n\t"
00726
00727
00728 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00729 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00730 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00731 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00732 "jmp 9f \n\t"
00733
00734 "# .p2align 4 \n\t"\
00735 "6: \n\t"
00736 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
00737
00738 #undef IDCT
00739 #define IDCT(src0, src4, src1, src5, dst, shift) \
00740 "movq " #src0 ", %%mm0 \n\t" \
00741 "movq " #src5 ", %%mm3 \n\t" \
00742 "movq 16(%2), %%mm4 \n\t" \
00743 "pmaddwd %%mm0, %%mm4 \n\t" \
00744 "movq 24(%2), %%mm5 \n\t" \
00745 "pmaddwd %%mm5, %%mm0 \n\t" \
00746 "movq %%mm4, %%mm6 \n\t" \
00747 "movq %%mm0, %%mm5 \n\t" \
00748 "movq 56(%2), %%mm1 \n\t" \
00749 "pmaddwd %%mm3, %%mm1 \n\t" \
00750 "movq 72(%2), %%mm7 \n\t" \
00751 "pmaddwd %%mm3, %%mm7 \n\t" \
00752 "paddd %%mm4, %%mm1 \n\t" \
00753 "paddd %%mm4, %%mm4 \n\t" \
00754 "psubd %%mm1, %%mm4 \n\t" \
00755 "psrad $" #shift ", %%mm1 \n\t"\
00756 "psrad $" #shift ", %%mm4 \n\t"\
00757 "movq %%mm0, %%mm2 \n\t" \
00758 "paddd %%mm7, %%mm0 \n\t" \
00759 "psubd %%mm7, %%mm2 \n\t" \
00760 "psrad $" #shift ", %%mm0 \n\t"\
00761 "psrad $" #shift ", %%mm2 \n\t"\
00762 "packssdw %%mm1, %%mm1 \n\t" \
00763 "movd %%mm1, " #dst " \n\t"\
00764 "packssdw %%mm0, %%mm0 \n\t" \
00765 "movd %%mm0, 16+" #dst " \n\t"\
00766 "packssdw %%mm2, %%mm2 \n\t" \
00767 "movd %%mm2, 96+" #dst " \n\t"\
00768 "packssdw %%mm4, %%mm4 \n\t" \
00769 "movd %%mm4, 112+" #dst " \n\t"\
00770 "movq 88(%2), %%mm1 \n\t" \
00771 "pmaddwd %%mm3, %%mm1 \n\t" \
00772 "movq %%mm5, %%mm2 \n\t" \
00773 "pmaddwd 104(%2), %%mm3 \n\t" \
00774 "paddd %%mm1, %%mm2 \n\t" \
00775 "psubd %%mm1, %%mm5 \n\t" \
00776 "psrad $" #shift ", %%mm2 \n\t"\
00777 "psrad $" #shift ", %%mm5 \n\t"\
00778 "movq %%mm6, %%mm1 \n\t" \
00779 "paddd %%mm3, %%mm6 \n\t" \
00780 "psubd %%mm3, %%mm1 \n\t" \
00781 "psrad $" #shift ", %%mm6 \n\t"\
00782 "psrad $" #shift ", %%mm1 \n\t"\
00783 "packssdw %%mm2, %%mm2 \n\t" \
00784 "packssdw %%mm6, %%mm6 \n\t" \
00785 "movd %%mm2, 32+" #dst " \n\t"\
00786 "packssdw %%mm1, %%mm1 \n\t" \
00787 "packssdw %%mm5, %%mm5 \n\t" \
00788 "movd %%mm6, 48+" #dst " \n\t"\
00789 "movd %%mm1, 64+" #dst " \n\t"\
00790 "movd %%mm5, 80+" #dst " \n\t"
00791
00792
00793
00794 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00795 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00796 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00797 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00798 "jmp 9f \n\t"
00799
00800 "# .p2align 4 \n\t"\
00801 "2: \n\t"
00802 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
00803
00804 #undef IDCT
00805 #define IDCT(src0, src4, src1, src5, dst, shift) \
00806 "movq " #src0 ", %%mm0 \n\t" \
00807 "movq " #src1 ", %%mm2 \n\t" \
00808 "movq " #src5 ", %%mm3 \n\t" \
00809 "movq 16(%2), %%mm4 \n\t" \
00810 "pmaddwd %%mm0, %%mm4 \n\t" \
00811 "movq 24(%2), %%mm5 \n\t" \
00812 "pmaddwd %%mm5, %%mm0 \n\t" \
00813 "movq %%mm4, %%mm6 \n\t" \
00814 "movq 48(%2), %%mm7 \n\t" \
00815 "pmaddwd %%mm2, %%mm7 \n\t" \
00816 "movq %%mm0, %%mm5 \n\t" \
00817 "movq 56(%2), %%mm1 \n\t" \
00818 "pmaddwd %%mm3, %%mm1 \n\t" \
00819 "pmaddwd 64(%2), %%mm2 \n\t" \
00820 "paddd %%mm1, %%mm7 \n\t" \
00821 "movq 72(%2), %%mm1 \n\t" \
00822 "pmaddwd %%mm3, %%mm1 \n\t" \
00823 "paddd %%mm4, %%mm7 \n\t" \
00824 "paddd %%mm4, %%mm4 \n\t" \
00825 "psubd %%mm7, %%mm4 \n\t" \
00826 "paddd %%mm2, %%mm1 \n\t" \
00827 "psrad $" #shift ", %%mm7 \n\t"\
00828 "psrad $" #shift ", %%mm4 \n\t"\
00829 "movq %%mm0, %%mm2 \n\t" \
00830 "paddd %%mm1, %%mm0 \n\t" \
00831 "psubd %%mm1, %%mm2 \n\t" \
00832 "psrad $" #shift ", %%mm0 \n\t"\
00833 "psrad $" #shift ", %%mm2 \n\t"\
00834 "packssdw %%mm7, %%mm7 \n\t" \
00835 "movd %%mm7, " #dst " \n\t"\
00836 "packssdw %%mm0, %%mm0 \n\t" \
00837 "movd %%mm0, 16+" #dst " \n\t"\
00838 "packssdw %%mm2, %%mm2 \n\t" \
00839 "movd %%mm2, 96+" #dst " \n\t"\
00840 "packssdw %%mm4, %%mm4 \n\t" \
00841 "movd %%mm4, 112+" #dst " \n\t"\
00842 "movq " #src1 ", %%mm0 \n\t" \
00843 "movq 80(%2), %%mm4 \n\t" \
00844 "pmaddwd %%mm0, %%mm4 \n\t" \
00845 "movq 88(%2), %%mm7 \n\t" \
00846 "pmaddwd 96(%2), %%mm0 \n\t" \
00847 "pmaddwd %%mm3, %%mm7 \n\t" \
00848 "movq %%mm5, %%mm2 \n\t" \
00849 "pmaddwd 104(%2), %%mm3 \n\t" \
00850 "paddd %%mm7, %%mm4 \n\t" \
00851 "paddd %%mm4, %%mm2 \n\t" \
00852 "psubd %%mm4, %%mm5 \n\t" \
00853 "psrad $" #shift ", %%mm2 \n\t"\
00854 "psrad $" #shift ", %%mm5 \n\t"\
00855 "movq %%mm6, %%mm4 \n\t" \
00856 "paddd %%mm0, %%mm3 \n\t" \
00857 "paddd %%mm3, %%mm6 \n\t" \
00858 "psubd %%mm3, %%mm4 \n\t" \
00859 "psrad $" #shift ", %%mm6 \n\t"\
00860 "psrad $" #shift ", %%mm4 \n\t"\
00861 "packssdw %%mm2, %%mm2 \n\t" \
00862 "packssdw %%mm6, %%mm6 \n\t" \
00863 "movd %%mm2, 32+" #dst " \n\t"\
00864 "packssdw %%mm4, %%mm4 \n\t" \
00865 "packssdw %%mm5, %%mm5 \n\t" \
00866 "movd %%mm6, 48+" #dst " \n\t"\
00867 "movd %%mm4, 64+" #dst " \n\t"\
00868 "movd %%mm5, 80+" #dst " \n\t"
00869
00870
00871 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00872 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00873 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00874 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00875 "jmp 9f \n\t"
00876
00877 "# .p2align 4 \n\t"\
00878 "3: \n\t"
00879 #undef IDCT
00880 #define IDCT(src0, src4, src1, src5, dst, shift) \
00881 "movq " #src0 ", %%mm0 \n\t" \
00882 "movq " #src1 ", %%mm2 \n\t" \
00883 "movq 16(%2), %%mm4 \n\t" \
00884 "pmaddwd %%mm0, %%mm4 \n\t" \
00885 "movq 24(%2), %%mm5 \n\t" \
00886 "pmaddwd %%mm5, %%mm0 \n\t" \
00887 "movq %%mm4, %%mm6 \n\t" \
00888 "movq 48(%2), %%mm7 \n\t" \
00889 "pmaddwd %%mm2, %%mm7 \n\t" \
00890 "movq %%mm0, %%mm5 \n\t" \
00891 "movq 64(%2), %%mm3 \n\t"\
00892 "pmaddwd %%mm2, %%mm3 \n\t" \
00893 "paddd %%mm4, %%mm7 \n\t" \
00894 "paddd %%mm4, %%mm4 \n\t" \
00895 "psubd %%mm7, %%mm4 \n\t" \
00896 "psrad $" #shift ", %%mm7 \n\t"\
00897 "psrad $" #shift ", %%mm4 \n\t"\
00898 "movq %%mm0, %%mm1 \n\t" \
00899 "paddd %%mm3, %%mm0 \n\t" \
00900 "psubd %%mm3, %%mm1 \n\t" \
00901 "psrad $" #shift ", %%mm0 \n\t"\
00902 "psrad $" #shift ", %%mm1 \n\t"\
00903 "packssdw %%mm7, %%mm7 \n\t" \
00904 "movd %%mm7, " #dst " \n\t"\
00905 "packssdw %%mm0, %%mm0 \n\t" \
00906 "movd %%mm0, 16+" #dst " \n\t"\
00907 "packssdw %%mm1, %%mm1 \n\t" \
00908 "movd %%mm1, 96+" #dst " \n\t"\
00909 "packssdw %%mm4, %%mm4 \n\t" \
00910 "movd %%mm4, 112+" #dst " \n\t"\
00911 "movq 80(%2), %%mm4 \n\t" \
00912 "pmaddwd %%mm2, %%mm4 \n\t" \
00913 "pmaddwd 96(%2), %%mm2 \n\t" \
00914 "movq %%mm5, %%mm1 \n\t" \
00915 "paddd %%mm4, %%mm1 \n\t" \
00916 "psubd %%mm4, %%mm5 \n\t" \
00917 "psrad $" #shift ", %%mm1 \n\t"\
00918 "psrad $" #shift ", %%mm5 \n\t"\
00919 "movq %%mm6, %%mm4 \n\t" \
00920 "paddd %%mm2, %%mm6 \n\t" \
00921 "psubd %%mm2, %%mm4 \n\t" \
00922 "psrad $" #shift ", %%mm6 \n\t"\
00923 "psrad $" #shift ", %%mm4 \n\t"\
00924 "packssdw %%mm1, %%mm1 \n\t" \
00925 "packssdw %%mm6, %%mm6 \n\t" \
00926 "movd %%mm1, 32+" #dst " \n\t"\
00927 "packssdw %%mm4, %%mm4 \n\t" \
00928 "packssdw %%mm5, %%mm5 \n\t" \
00929 "movd %%mm6, 48+" #dst " \n\t"\
00930 "movd %%mm4, 64+" #dst " \n\t"\
00931 "movd %%mm5, 80+" #dst " \n\t"
00932
00933
00934
00935 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00936 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00937 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00938 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00939 "jmp 9f \n\t"
00940
00941 "# .p2align 4 \n\t"\
00942 "5: \n\t"
00943 #undef IDCT
00944 #define IDCT(src0, src4, src1, src5, dst, shift) \
00945 "movq " #src0 ", %%mm0 \n\t" \
00946 "movq " #src4 ", %%mm1 \n\t" \
00947 "movq 16(%2), %%mm4 \n\t" \
00948 "pmaddwd %%mm0, %%mm4 \n\t" \
00949 "movq 24(%2), %%mm5 \n\t" \
00950 "pmaddwd %%mm5, %%mm0 \n\t" \
00951 "movq 32(%2), %%mm5 \n\t" \
00952 "pmaddwd %%mm1, %%mm5 \n\t" \
00953 "movq 40(%2), %%mm6 \n\t" \
00954 "pmaddwd %%mm6, %%mm1 \n\t" \
00955 "movq %%mm4, %%mm6 \n\t" \
00956 "paddd %%mm5, %%mm4 \n\t" \
00957 "psubd %%mm5, %%mm6 \n\t" \
00958 "movq %%mm0, %%mm5 \n\t" \
00959 "paddd %%mm1, %%mm0 \n\t" \
00960 "psubd %%mm1, %%mm5 \n\t" \
00961 "movq 8+" #src0 ", %%mm2 \n\t" \
00962 "movq 8+" #src4 ", %%mm3 \n\t" \
00963 "movq 16(%2), %%mm1 \n\t" \
00964 "pmaddwd %%mm2, %%mm1 \n\t" \
00965 "movq 24(%2), %%mm7 \n\t" \
00966 "pmaddwd %%mm7, %%mm2 \n\t" \
00967 "movq 32(%2), %%mm7 \n\t" \
00968 "pmaddwd %%mm3, %%mm7 \n\t" \
00969 "pmaddwd 40(%2), %%mm3 \n\t" \
00970 "paddd %%mm1, %%mm7 \n\t" \
00971 "paddd %%mm1, %%mm1 \n\t" \
00972 "psubd %%mm7, %%mm1 \n\t" \
00973 "paddd %%mm2, %%mm3 \n\t" \
00974 "paddd %%mm2, %%mm2 \n\t" \
00975 "psubd %%mm3, %%mm2 \n\t" \
00976 "psrad $" #shift ", %%mm4 \n\t"\
00977 "psrad $" #shift ", %%mm7 \n\t"\
00978 "psrad $" #shift ", %%mm3 \n\t"\
00979 "packssdw %%mm7, %%mm4 \n\t" \
00980 "movq %%mm4, " #dst " \n\t"\
00981 "psrad $" #shift ", %%mm0 \n\t"\
00982 "packssdw %%mm3, %%mm0 \n\t" \
00983 "movq %%mm0, 16+" #dst " \n\t"\
00984 "movq %%mm0, 96+" #dst " \n\t"\
00985 "movq %%mm4, 112+" #dst " \n\t"\
00986 "psrad $" #shift ", %%mm5 \n\t"\
00987 "psrad $" #shift ", %%mm6 \n\t"\
00988 "psrad $" #shift ", %%mm2 \n\t"\
00989 "packssdw %%mm2, %%mm5 \n\t" \
00990 "movq %%mm5, 32+" #dst " \n\t"\
00991 "psrad $" #shift ", %%mm1 \n\t"\
00992 "packssdw %%mm1, %%mm6 \n\t" \
00993 "movq %%mm6, 48+" #dst " \n\t"\
00994 "movq %%mm6, 64+" #dst " \n\t"\
00995 "movq %%mm5, 80+" #dst " \n\t"
00996
00997
00998
00999 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01000
01001 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01002
01003 "jmp 9f \n\t"
01004
01005
01006 "# .p2align 4 \n\t"\
01007 "1: \n\t"
01008 #undef IDCT
01009 #define IDCT(src0, src4, src1, src5, dst, shift) \
01010 "movq " #src0 ", %%mm0 \n\t" \
01011 "movq " #src4 ", %%mm1 \n\t" \
01012 "movq " #src1 ", %%mm2 \n\t" \
01013 "movq 16(%2), %%mm4 \n\t" \
01014 "pmaddwd %%mm0, %%mm4 \n\t" \
01015 "movq 24(%2), %%mm5 \n\t" \
01016 "pmaddwd %%mm5, %%mm0 \n\t" \
01017 "movq 32(%2), %%mm5 \n\t" \
01018 "pmaddwd %%mm1, %%mm5 \n\t" \
01019 "movq 40(%2), %%mm6 \n\t" \
01020 "pmaddwd %%mm6, %%mm1 \n\t" \
01021 "movq %%mm4, %%mm6 \n\t" \
01022 "movq 48(%2), %%mm7 \n\t" \
01023 "pmaddwd %%mm2, %%mm7 \n\t" \
01024 "paddd %%mm5, %%mm4 \n\t" \
01025 "psubd %%mm5, %%mm6 \n\t" \
01026 "movq %%mm0, %%mm5 \n\t" \
01027 "paddd %%mm1, %%mm0 \n\t" \
01028 "psubd %%mm1, %%mm5 \n\t" \
01029 "movq 64(%2), %%mm1 \n\t"\
01030 "pmaddwd %%mm2, %%mm1 \n\t" \
01031 "paddd %%mm4, %%mm7 \n\t" \
01032 "paddd %%mm4, %%mm4 \n\t" \
01033 "psubd %%mm7, %%mm4 \n\t" \
01034 "psrad $" #shift ", %%mm7 \n\t"\
01035 "psrad $" #shift ", %%mm4 \n\t"\
01036 "movq %%mm0, %%mm3 \n\t" \
01037 "paddd %%mm1, %%mm0 \n\t" \
01038 "psubd %%mm1, %%mm3 \n\t" \
01039 "psrad $" #shift ", %%mm0 \n\t"\
01040 "psrad $" #shift ", %%mm3 \n\t"\
01041 "packssdw %%mm7, %%mm7 \n\t" \
01042 "movd %%mm7, " #dst " \n\t"\
01043 "packssdw %%mm0, %%mm0 \n\t" \
01044 "movd %%mm0, 16+" #dst " \n\t"\
01045 "packssdw %%mm3, %%mm3 \n\t" \
01046 "movd %%mm3, 96+" #dst " \n\t"\
01047 "packssdw %%mm4, %%mm4 \n\t" \
01048 "movd %%mm4, 112+" #dst " \n\t"\
01049 "movq 80(%2), %%mm4 \n\t" \
01050 "pmaddwd %%mm2, %%mm4 \n\t" \
01051 "pmaddwd 96(%2), %%mm2 \n\t" \
01052 "movq %%mm5, %%mm3 \n\t" \
01053 "paddd %%mm4, %%mm3 \n\t" \
01054 "psubd %%mm4, %%mm5 \n\t" \
01055 "psrad $" #shift ", %%mm3 \n\t"\
01056 "psrad $" #shift ", %%mm5 \n\t"\
01057 "movq %%mm6, %%mm4 \n\t" \
01058 "paddd %%mm2, %%mm6 \n\t" \
01059 "psubd %%mm2, %%mm4 \n\t" \
01060 "psrad $" #shift ", %%mm6 \n\t"\
01061 "packssdw %%mm3, %%mm3 \n\t" \
01062 "movd %%mm3, 32+" #dst " \n\t"\
01063 "psrad $" #shift ", %%mm4 \n\t"\
01064 "packssdw %%mm6, %%mm6 \n\t" \
01065 "movd %%mm6, 48+" #dst " \n\t"\
01066 "packssdw %%mm4, %%mm4 \n\t" \
01067 "packssdw %%mm5, %%mm5 \n\t" \
01068 "movd %%mm4, 64+" #dst " \n\t"\
01069 "movd %%mm5, 80+" #dst " \n\t"
01070
01071
01072
01073 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01074 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
01075 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01076 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
01077 "jmp 9f \n\t"
01078
01079
01080 "# .p2align 4 \n\t"
01081 "7: \n\t"
01082 #undef IDCT
01083 #define IDCT(src0, src4, src1, src5, dst, shift) \
01084 "movq " #src0 ", %%mm0 \n\t" \
01085 "movq 16(%2), %%mm4 \n\t" \
01086 "pmaddwd %%mm0, %%mm4 \n\t" \
01087 "movq 24(%2), %%mm5 \n\t" \
01088 "pmaddwd %%mm5, %%mm0 \n\t" \
01089 "psrad $" #shift ", %%mm4 \n\t"\
01090 "psrad $" #shift ", %%mm0 \n\t"\
01091 "movq 8+" #src0 ", %%mm2 \n\t" \
01092 "movq 16(%2), %%mm1 \n\t" \
01093 "pmaddwd %%mm2, %%mm1 \n\t" \
01094 "movq 24(%2), %%mm7 \n\t" \
01095 "pmaddwd %%mm7, %%mm2 \n\t" \
01096 "movq 32(%2), %%mm7 \n\t" \
01097 "psrad $" #shift ", %%mm1 \n\t"\
01098 "packssdw %%mm1, %%mm4 \n\t" \
01099 "movq %%mm4, " #dst " \n\t"\
01100 "psrad $" #shift ", %%mm2 \n\t"\
01101 "packssdw %%mm2, %%mm0 \n\t" \
01102 "movq %%mm0, 16+" #dst " \n\t"\
01103 "movq %%mm0, 96+" #dst " \n\t"\
01104 "movq %%mm4, 112+" #dst " \n\t"\
01105 "movq %%mm0, 32+" #dst " \n\t"\
01106 "movq %%mm4, 48+" #dst " \n\t"\
01107 "movq %%mm4, 64+" #dst " \n\t"\
01108 "movq %%mm0, 80+" #dst " \n\t"
01109
01110
01111 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01112
01113 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01114
01115
01116
01117 #endif
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141 "9: \n\t"
01142 :: "r" (block), "r" (temp), "r" (coeffs)
01143 : "%eax"
01144 );
01145 }
01146
01147 void ff_simple_idct_mmx(int16_t *block)
01148 {
01149 idct(block);
01150 }
01151
01152
01153
01154 void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
01155 {
01156 idct(block);
01157 ff_put_pixels_clamped_mmx(block, dest, line_size);
01158 }
01159 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
01160 {
01161 idct(block);
01162 ff_add_pixels_clamped_mmx(block, dest, line_size);
01163 }