61 13036, 13036, 13036, 13036,
62 27146, 27146, 27146, 27146,
63 -21746, -21746, -21746, -21746,
64 23170, 23170, 23170, 23170
139 16384, 16384, 16384, -16384,
140 21407, 8867, 8867, -21407,
141 16384, -16384, 16384, 16384,
142 -8867, 21407, -21407, -8867,
143 22725, 12873, 19266, -22725,
144 19266, 4520, -4520, -12873,
145 12873, 4520, 4520, 19266,
146 -22725, 19266, -12873, -22725,
148 22725, 22725, 22725, -22725,
149 29692, 12299, 12299, -29692,
150 22725, -22725, 22725, 22725,
151 -12299, 29692, -29692, -12299,
152 31521, 17855, 26722, -31521,
153 26722, 6270, -6270, -17855,
154 17855, 6270, 6270, 26722,
155 -31521, 26722, -17855, -31521,
157 21407, 21407, 21407, -21407,
158 27969, 11585, 11585, -27969,
159 21407, -21407, 21407, 21407,
160 -11585, 27969, -27969, -11585,
161 29692, 16819, 25172, -29692,
162 25172, 5906, -5906, -16819,
163 16819, 5906, 5906, 25172,
164 -29692, 25172, -16819, -29692,
166 19266, 19266, 19266, -19266,
167 25172, 10426, 10426, -25172,
168 19266, -19266, 19266, 19266,
169 -10426, 25172, -25172, -10426,
170 26722, 15137, 22654, -26722,
171 22654, 5315, -5315, -15137,
172 15137, 5315, 5315, 22654,
173 -26722, 22654, -15137, -26722,
181 16384, 21407, 16384, 8867,
182 16384, 8867, -16384, -21407,
183 16384, -8867, 16384, -21407,
184 -16384, 21407, 16384, -8867,
185 22725, 19266, 19266, -4520,
186 12873, 4520, -22725, -12873,
187 12873, -22725, 4520, -12873,
188 4520, 19266, 19266, -22725,
190 22725, 29692, 22725, 12299,
191 22725, 12299, -22725, -29692,
192 22725, -12299, 22725, -29692,
193 -22725, 29692, 22725, -12299,
194 31521, 26722, 26722, -6270,
195 17855, 6270, -31521, -17855,
196 17855, -31521, 6270, -17855,
197 6270, 26722, 26722, -31521,
199 21407, 27969, 21407, 11585,
200 21407, 11585, -21407, -27969,
201 21407, -11585, 21407, -27969,
202 -21407, 27969, 21407, -11585,
203 29692, 25172, 25172, -5906,
204 16819, 5906, -29692, -16819,
205 16819, -29692, 5906, -16819,
206 5906, 25172, 25172, -29692,
208 19266, 25172, 19266, 10426,
209 19266, 10426, -19266, -25172,
210 19266, -10426, 19266, -25172,
211 -19266, 25172, 19266, -10426,
212 26722, 22654, 22654, -5315,
213 15137, 5315, -26722, -15137,
214 15137, -26722, 5315, -15137,
215 5315, 22654, 22654, -26722,
225 #define DCT_8_INV_ROW_MMX(A1, A2, A3, A4) \
226 "movq "#A1", %%mm0 \n\t" \
227 "movq 8+"#A1", %%mm1 \n\t" \
228 "movq %%mm0, %%mm2 \n\t" \
229 "movq "#A3", %%mm3 \n\t" \
230 "punpcklwd %%mm1, %%mm0 \n\t" \
231 "movq %%mm0, %%mm5 \n\t" \
232 "punpckldq %%mm0, %%mm0 \n\t" \
233 "movq 8+"#A3", %%mm4 \n\t" \
234 "punpckhwd %%mm1, %%mm2 \n\t" \
235 "pmaddwd %%mm0, %%mm3 \n\t" \
236 "movq %%mm2, %%mm6 \n\t" \
237 "movq 32+"#A3", %%mm1 \n\t" \
238 "punpckldq %%mm2, %%mm2 \n\t" \
239 "pmaddwd %%mm2, %%mm4 \n\t" \
240 "punpckhdq %%mm5, %%mm5 \n\t" \
241 "pmaddwd 16+"#A3", %%mm0 \n\t" \
242 "punpckhdq %%mm6, %%mm6 \n\t" \
243 "movq 40+ "#A3", %%mm7 \n\t" \
244 "pmaddwd %%mm5, %%mm1 \n\t" \
245 "paddd "#A4", %%mm3 \n\t" \
246 "pmaddwd %%mm6, %%mm7 \n\t" \
247 "pmaddwd 24+"#A3", %%mm2 \n\t" \
248 "paddd %%mm4, %%mm3 \n\t" \
249 "pmaddwd 48+"#A3", %%mm5 \n\t" \
250 "movq %%mm3, %%mm4 \n\t" \
251 "pmaddwd 56+"#A3", %%mm6 \n\t" \
252 "paddd %%mm7, %%mm1 \n\t" \
253 "paddd "#A4", %%mm0 \n\t" \
254 "psubd %%mm1, %%mm3 \n\t" \
255 "psrad $11, %%mm3 \n\t" \
256 "paddd %%mm4, %%mm1 \n\t" \
257 "paddd %%mm2, %%mm0 \n\t" \
258 "psrad $11, %%mm1 \n\t" \
259 "paddd %%mm6, %%mm5 \n\t" \
260 "movq %%mm0, %%mm4 \n\t" \
261 "paddd %%mm5, %%mm0 \n\t" \
262 "psubd %%mm5, %%mm4 \n\t" \
263 "psrad $11, %%mm0 \n\t" \
264 "psrad $11, %%mm4 \n\t" \
265 "packssdw %%mm0, %%mm1 \n\t" \
266 "packssdw %%mm3, %%mm4 \n\t" \
267 "movq %%mm4, %%mm7 \n\t" \
268 "psrld $16, %%mm4 \n\t" \
269 "pslld $16, %%mm7 \n\t" \
270 "movq %%mm1, "#A2" \n\t" \
271 "por %%mm4, %%mm7 \n\t" \
272 "movq %%mm7, 8+"#A2" \n\t" \
279 #define DCT_8_INV_ROW_XMM(A1, A2, A3, A4) \
280 "movq "#A1", %%mm0 \n\t" \
281 "movq 8+"#A1", %%mm1 \n\t" \
282 "movq %%mm0, %%mm2 \n\t" \
283 "movq "#A3", %%mm3 \n\t" \
284 "pshufw $0x88, %%mm0, %%mm0 \n\t" \
285 "movq 8+"#A3", %%mm4 \n\t" \
286 "movq %%mm1, %%mm5 \n\t" \
287 "pmaddwd %%mm0, %%mm3 \n\t" \
288 "movq 32+"#A3", %%mm6 \n\t" \
289 "pshufw $0x88, %%mm1, %%mm1 \n\t" \
290 "pmaddwd %%mm1, %%mm4 \n\t" \
291 "movq 40+"#A3", %%mm7 \n\t" \
292 "pshufw $0xdd, %%mm2, %%mm2 \n\t" \
293 "pmaddwd %%mm2, %%mm6 \n\t" \
294 "pshufw $0xdd, %%mm5, %%mm5 \n\t" \
295 "pmaddwd %%mm5, %%mm7 \n\t" \
296 "paddd "#A4", %%mm3 \n\t" \
297 "pmaddwd 16+"#A3", %%mm0 \n\t" \
298 "paddd %%mm4, %%mm3 \n\t" \
299 "pmaddwd 24+"#A3", %%mm1 \n\t" \
300 "movq %%mm3, %%mm4 \n\t" \
301 "pmaddwd 48+"#A3", %%mm2 \n\t" \
302 "paddd %%mm7, %%mm6 \n\t" \
303 "pmaddwd 56+"#A3", %%mm5 \n\t" \
304 "paddd %%mm6, %%mm3 \n\t" \
305 "paddd "#A4", %%mm0 \n\t" \
306 "psrad $11, %%mm3 \n\t" \
307 "paddd %%mm1, %%mm0 \n\t" \
308 "psubd %%mm6, %%mm4 \n\t" \
309 "movq %%mm0, %%mm7 \n\t" \
310 "paddd %%mm5, %%mm2 \n\t" \
311 "paddd %%mm2, %%mm0 \n\t" \
312 "psrad $11, %%mm4 \n\t" \
313 "psubd %%mm2, %%mm7 \n\t" \
314 "psrad $11, %%mm0 \n\t" \
315 "psrad $11, %%mm7 \n\t" \
316 "packssdw %%mm0, %%mm3 \n\t" \
317 "packssdw %%mm4, %%mm7 \n\t" \
318 "movq %%mm3, "#A2" \n\t" \
319 "pshufw $0xb1, %%mm7, %%mm7 \n\t" \
320 "movq %%mm7, 8+"#A2" \n\t" \
386 #define DCT_8_INV_COL(A1, A2) \
387 "movq 2*8(%3), %%mm0 \n\t" \
388 "movq 16*3+"#A1", %%mm3 \n\t" \
389 "movq %%mm0, %%mm1 \n\t" \
390 "movq 16*5+"#A1", %%mm5 \n\t" \
391 "pmulhw %%mm3, %%mm0 \n\t" \
392 "movq (%3), %%mm4 \n\t" \
393 "pmulhw %%mm5, %%mm1 \n\t" \
394 "movq 16*7+"#A1", %%mm7 \n\t" \
395 "movq %%mm4, %%mm2 \n\t" \
396 "movq 16*1+"#A1", %%mm6 \n\t" \
397 "pmulhw %%mm7, %%mm4 \n\t" \
398 "paddsw %%mm3, %%mm0 \n\t" \
399 "pmulhw %%mm6, %%mm2 \n\t" \
400 "paddsw %%mm3, %%mm1 \n\t" \
401 "psubsw %%mm5, %%mm0 \n\t" \
402 "movq 3*8(%3), %%mm3 \n\t" \
403 "paddsw %%mm5, %%mm1 \n\t" \
404 "paddsw %%mm6, %%mm4 \n\t" \
405 "psubsw %%mm7, %%mm2 \n\t" \
406 "movq %%mm4, %%mm5 \n\t" \
407 "movq %%mm2, %%mm6 \n\t" \
408 "paddsw %%mm1, %%mm5 \n\t" \
409 "psubsw %%mm0, %%mm6 \n\t" \
410 "psubsw %%mm1, %%mm4 \n\t" \
411 "paddsw %%mm0, %%mm2 \n\t" \
412 "movq 1*8(%3), %%mm7 \n\t" \
413 "movq %%mm4, %%mm1 \n\t" \
414 "movq %%mm5, 3*16+"#A2" \n\t" \
415 "paddsw %%mm2, %%mm1 \n\t" \
416 "movq %%mm6, 5*16+"#A2" \n\t" \
417 "psubsw %%mm2, %%mm4 \n\t" \
418 "movq 2*16+"#A1", %%mm5 \n\t" \
419 "movq %%mm7, %%mm0 \n\t" \
420 "movq 6*16+"#A1", %%mm6 \n\t" \
421 "pmulhw %%mm5, %%mm0 \n\t" \
422 "pmulhw %%mm6, %%mm7 \n\t" \
423 "pmulhw %%mm3, %%mm1 \n\t" \
424 "movq 0*16+"#A1", %%mm2 \n\t" \
425 "pmulhw %%mm3, %%mm4 \n\t" \
426 "psubsw %%mm6, %%mm0 \n\t" \
427 "movq %%mm2, %%mm3 \n\t" \
428 "movq 4*16+"#A1", %%mm6 \n\t" \
429 "paddsw %%mm5, %%mm7 \n\t" \
430 "paddsw %%mm6, %%mm2 \n\t" \
431 "psubsw %%mm6, %%mm3 \n\t" \
432 "movq %%mm2, %%mm5 \n\t" \
433 "movq %%mm3, %%mm6 \n\t" \
434 "psubsw %%mm7, %%mm2 \n\t" \
435 "paddsw %%mm0, %%mm3 \n\t" \
436 "paddsw %%mm1, %%mm1 \n\t" \
437 "paddsw %%mm4, %%mm4 \n\t" \
438 "paddsw %%mm7, %%mm5 \n\t" \
439 "psubsw %%mm0, %%mm6 \n\t" \
440 "movq %%mm3, %%mm7 \n\t" \
441 "movq %%mm6, %%mm0 \n\t" \
442 "paddsw %%mm1, %%mm3 \n\t" \
443 "paddsw %%mm4, %%mm6 \n\t" \
444 "psraw $6, %%mm3 \n\t" \
445 "psubsw %%mm1, %%mm7 \n\t" \
446 "psraw $6, %%mm6 \n\t" \
447 "psubsw %%mm4, %%mm0 \n\t" \
448 "movq 3*16+"#A2", %%mm1 \n\t" \
449 "psraw $6, %%mm7 \n\t" \
450 "movq %%mm5, %%mm4 \n\t" \
451 "psraw $6, %%mm0 \n\t" \
452 "movq %%mm3, 1*16+"#A2" \n\t" \
453 "paddsw %%mm1, %%mm5 \n\t" \
454 "movq %%mm6, 2*16+"#A2" \n\t" \
455 "psubsw %%mm1, %%mm4 \n\t" \
456 "movq 5*16+"#A2", %%mm3 \n\t" \
457 "psraw $6, %%mm5 \n\t" \
458 "movq %%mm2, %%mm6 \n\t" \
459 "psraw $6, %%mm4 \n\t" \
460 "movq %%mm0, 5*16+"#A2" \n\t" \
461 "paddsw %%mm3, %%mm2 \n\t" \
462 "movq %%mm7, 6*16+"#A2" \n\t" \
463 "psubsw %%mm3, %%mm6 \n\t" \
464 "movq %%mm5, 0*16+"#A2" \n\t" \
465 "psraw $6, %%mm2 \n\t" \
466 "movq %%mm4, 7*16+"#A2" \n\t" \
467 "psraw $6, %%mm6 \n\t" \
468 "movq %%mm2, 3*16+"#A2" \n\t" \
469 "movq %%mm6, 4*16+"#A2" \n\t" \
483 DCT_8_INV_ROW_MMX(0 * 16(%0), 0 * 16(%0), 64 * 0(%2), 8 * 0(%1))
484 DCT_8_INV_ROW_MMX(1 * 16(%0), 1 * 16(%0), 64 * 1(%2), 8 * 1(%1))
485 DCT_8_INV_ROW_MMX(2 * 16(%0), 2 * 16(%0), 64 * 2(%2), 8 * 2(%1))
486 DCT_8_INV_ROW_MMX(3 * 16(%0), 3 * 16(%0), 64 * 3(%2), 8 * 3(%1))
487 DCT_8_INV_ROW_MMX(4 * 16(%0), 4 * 16(%0), 64 * 0(%2), 8 * 4(%1))
488 DCT_8_INV_ROW_MMX(5 * 16(%0), 5 * 16(%0), 64 * 3(%2), 8 * 5(%1))
489 DCT_8_INV_ROW_MMX(6 * 16(%0), 6 * 16(%0), 64 * 2(%2), 8 * 6(%1))
490 DCT_8_INV_ROW_MMX(7 * 16(%0), 7 * 16(%0), 64 * 1(%2), 8 * 7(%1))
493 DCT_8_INV_COL(0(%0), 0(%0))
494 DCT_8_INV_COL(8(%0), 8(%0))
495 :: "
r" (block), "
r" (rounder_0), "
r" (tab_i_04_mmx), "
r" (tg_1_16));
512 #if HAVE_MMXEXT_INLINE
522 DCT_8_INV_ROW_XMM(0 * 16(%0), 0 * 16(%0), 64 * 0(%2), 8 * 0(%1))
523 DCT_8_INV_ROW_XMM(1 * 16(%0), 1 * 16(%0), 64 * 1(%2), 8 * 1(%1))
524 DCT_8_INV_ROW_XMM(2 * 16(%0), 2 * 16(%0), 64 * 2(%2), 8 * 2(%1))
525 DCT_8_INV_ROW_XMM(3 * 16(%0), 3 * 16(%0), 64 * 3(%2), 8 * 3(%1))
526 DCT_8_INV_ROW_XMM(4 * 16(%0), 4 * 16(%0), 64 * 0(%2), 8 * 4(%1))
527 DCT_8_INV_ROW_XMM(5 * 16(%0), 5 * 16(%0), 64 * 3(%2), 8 * 5(%1))
528 DCT_8_INV_ROW_XMM(6 * 16(%0), 6 * 16(%0), 64 * 2(%2), 8 * 6(%1))
529 DCT_8_INV_ROW_XMM(7 * 16(%0), 7 * 16(%0), 64 * 1(%2), 8 * 7(%1))
532 DCT_8_INV_COL(0(%0), 0(%0))
533 DCT_8_INV_COL(8(%0), 8(%0))
534 :: "
r" (block), "
r" (rounder_0), "
r" (tab_i_04_xmm), "
r" (tg_1_16));