00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00029 {
00030 MOVQ_BFE(mm6);
00031 __asm__ volatile(
00032 "lea (%3, %3), %%"REG_a" \n\t"
00033 ".p2align 3 \n\t"
00034 "1: \n\t"
00035 "movq (%1), %%mm0 \n\t"
00036 "movq 1(%1), %%mm1 \n\t"
00037 "movq (%1, %3), %%mm2 \n\t"
00038 "movq 1(%1, %3), %%mm3 \n\t"
00039 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00040 "movq %%mm4, (%2) \n\t"
00041 "movq %%mm5, (%2, %3) \n\t"
00042 "add %%"REG_a", %1 \n\t"
00043 "add %%"REG_a", %2 \n\t"
00044 "movq (%1), %%mm0 \n\t"
00045 "movq 1(%1), %%mm1 \n\t"
00046 "movq (%1, %3), %%mm2 \n\t"
00047 "movq 1(%1, %3), %%mm3 \n\t"
00048 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00049 "movq %%mm4, (%2) \n\t"
00050 "movq %%mm5, (%2, %3) \n\t"
00051 "add %%"REG_a", %1 \n\t"
00052 "add %%"REG_a", %2 \n\t"
00053 "subl $4, %0 \n\t"
00054 "jnz 1b \n\t"
00055 :"+g"(h), "+S"(pixels), "+D"(block)
00056 :"r"((x86_reg)line_size)
00057 :REG_a, "memory");
00058 }
00059
00060 static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00061 {
00062 MOVQ_BFE(mm6);
00063 __asm__ volatile(
00064 "testl $1, %0 \n\t"
00065 " jz 1f \n\t"
00066 "movq (%1), %%mm0 \n\t"
00067 "movq (%2), %%mm1 \n\t"
00068 "add %4, %1 \n\t"
00069 "add $8, %2 \n\t"
00070 PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
00071 "movq %%mm4, (%3) \n\t"
00072 "add %5, %3 \n\t"
00073 "decl %0 \n\t"
00074 ".p2align 3 \n\t"
00075 "1: \n\t"
00076 "movq (%1), %%mm0 \n\t"
00077 "movq (%2), %%mm1 \n\t"
00078 "add %4, %1 \n\t"
00079 "movq (%1), %%mm2 \n\t"
00080 "movq 8(%2), %%mm3 \n\t"
00081 "add %4, %1 \n\t"
00082 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00083 "movq %%mm4, (%3) \n\t"
00084 "add %5, %3 \n\t"
00085 "movq %%mm5, (%3) \n\t"
00086 "add %5, %3 \n\t"
00087 "movq (%1), %%mm0 \n\t"
00088 "movq 16(%2), %%mm1 \n\t"
00089 "add %4, %1 \n\t"
00090 "movq (%1), %%mm2 \n\t"
00091 "movq 24(%2), %%mm3 \n\t"
00092 "add %4, %1 \n\t"
00093 "add $32, %2 \n\t"
00094 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00095 "movq %%mm4, (%3) \n\t"
00096 "add %5, %3 \n\t"
00097 "movq %%mm5, (%3) \n\t"
00098 "add %5, %3 \n\t"
00099 "subl $4, %0 \n\t"
00100 "jnz 1b \n\t"
00101 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00102 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00103 #else
00104 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00105 #endif
00106 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00107 :"memory");
00108 }
00109
00110 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00111 {
00112 MOVQ_BFE(mm6);
00113 __asm__ volatile(
00114 "lea (%3, %3), %%"REG_a" \n\t"
00115 ".p2align 3 \n\t"
00116 "1: \n\t"
00117 "movq (%1), %%mm0 \n\t"
00118 "movq 1(%1), %%mm1 \n\t"
00119 "movq (%1, %3), %%mm2 \n\t"
00120 "movq 1(%1, %3), %%mm3 \n\t"
00121 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00122 "movq %%mm4, (%2) \n\t"
00123 "movq %%mm5, (%2, %3) \n\t"
00124 "movq 8(%1), %%mm0 \n\t"
00125 "movq 9(%1), %%mm1 \n\t"
00126 "movq 8(%1, %3), %%mm2 \n\t"
00127 "movq 9(%1, %3), %%mm3 \n\t"
00128 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00129 "movq %%mm4, 8(%2) \n\t"
00130 "movq %%mm5, 8(%2, %3) \n\t"
00131 "add %%"REG_a", %1 \n\t"
00132 "add %%"REG_a", %2 \n\t"
00133 "movq (%1), %%mm0 \n\t"
00134 "movq 1(%1), %%mm1 \n\t"
00135 "movq (%1, %3), %%mm2 \n\t"
00136 "movq 1(%1, %3), %%mm3 \n\t"
00137 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00138 "movq %%mm4, (%2) \n\t"
00139 "movq %%mm5, (%2, %3) \n\t"
00140 "movq 8(%1), %%mm0 \n\t"
00141 "movq 9(%1), %%mm1 \n\t"
00142 "movq 8(%1, %3), %%mm2 \n\t"
00143 "movq 9(%1, %3), %%mm3 \n\t"
00144 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00145 "movq %%mm4, 8(%2) \n\t"
00146 "movq %%mm5, 8(%2, %3) \n\t"
00147 "add %%"REG_a", %1 \n\t"
00148 "add %%"REG_a", %2 \n\t"
00149 "subl $4, %0 \n\t"
00150 "jnz 1b \n\t"
00151 :"+g"(h), "+S"(pixels), "+D"(block)
00152 :"r"((x86_reg)line_size)
00153 :REG_a, "memory");
00154 }
00155
00156 static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00157 {
00158 MOVQ_BFE(mm6);
00159 __asm__ volatile(
00160 "testl $1, %0 \n\t"
00161 " jz 1f \n\t"
00162 "movq (%1), %%mm0 \n\t"
00163 "movq (%2), %%mm1 \n\t"
00164 "movq 8(%1), %%mm2 \n\t"
00165 "movq 8(%2), %%mm3 \n\t"
00166 "add %4, %1 \n\t"
00167 "add $16, %2 \n\t"
00168 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00169 "movq %%mm4, (%3) \n\t"
00170 "movq %%mm5, 8(%3) \n\t"
00171 "add %5, %3 \n\t"
00172 "decl %0 \n\t"
00173 ".p2align 3 \n\t"
00174 "1: \n\t"
00175 "movq (%1), %%mm0 \n\t"
00176 "movq (%2), %%mm1 \n\t"
00177 "movq 8(%1), %%mm2 \n\t"
00178 "movq 8(%2), %%mm3 \n\t"
00179 "add %4, %1 \n\t"
00180 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00181 "movq %%mm4, (%3) \n\t"
00182 "movq %%mm5, 8(%3) \n\t"
00183 "add %5, %3 \n\t"
00184 "movq (%1), %%mm0 \n\t"
00185 "movq 16(%2), %%mm1 \n\t"
00186 "movq 8(%1), %%mm2 \n\t"
00187 "movq 24(%2), %%mm3 \n\t"
00188 "add %4, %1 \n\t"
00189 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00190 "movq %%mm4, (%3) \n\t"
00191 "movq %%mm5, 8(%3) \n\t"
00192 "add %5, %3 \n\t"
00193 "add $32, %2 \n\t"
00194 "subl $2, %0 \n\t"
00195 "jnz 1b \n\t"
00196 #if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
00197 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00198 #else
00199 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00200 #endif
00201 :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
00202 :"memory");
00203 }
00204
00205 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00206 {
00207 MOVQ_BFE(mm6);
00208 __asm__ volatile(
00209 "lea (%3, %3), %%"REG_a" \n\t"
00210 "movq (%1), %%mm0 \n\t"
00211 ".p2align 3 \n\t"
00212 "1: \n\t"
00213 "movq (%1, %3), %%mm1 \n\t"
00214 "movq (%1, %%"REG_a"),%%mm2 \n\t"
00215 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
00216 "movq %%mm4, (%2) \n\t"
00217 "movq %%mm5, (%2, %3) \n\t"
00218 "add %%"REG_a", %1 \n\t"
00219 "add %%"REG_a", %2 \n\t"
00220 "movq (%1, %3), %%mm1 \n\t"
00221 "movq (%1, %%"REG_a"),%%mm0 \n\t"
00222 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
00223 "movq %%mm4, (%2) \n\t"
00224 "movq %%mm5, (%2, %3) \n\t"
00225 "add %%"REG_a", %1 \n\t"
00226 "add %%"REG_a", %2 \n\t"
00227 "subl $4, %0 \n\t"
00228 "jnz 1b \n\t"
00229 :"+g"(h), "+S"(pixels), "+D"(block)
00230 :"r"((x86_reg)line_size)
00231 :REG_a, "memory");
00232 }
00233
00234 static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00235 {
00236 MOVQ_ZERO(mm7);
00237 SET_RND(mm6);
00238 __asm__ volatile(
00239 "movq (%1), %%mm0 \n\t"
00240 "movq 1(%1), %%mm4 \n\t"
00241 "movq %%mm0, %%mm1 \n\t"
00242 "movq %%mm4, %%mm5 \n\t"
00243 "punpcklbw %%mm7, %%mm0 \n\t"
00244 "punpcklbw %%mm7, %%mm4 \n\t"
00245 "punpckhbw %%mm7, %%mm1 \n\t"
00246 "punpckhbw %%mm7, %%mm5 \n\t"
00247 "paddusw %%mm0, %%mm4 \n\t"
00248 "paddusw %%mm1, %%mm5 \n\t"
00249 "xor %%"REG_a", %%"REG_a" \n\t"
00250 "add %3, %1 \n\t"
00251 ".p2align 3 \n\t"
00252 "1: \n\t"
00253 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00254 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
00255 "movq %%mm0, %%mm1 \n\t"
00256 "movq %%mm2, %%mm3 \n\t"
00257 "punpcklbw %%mm7, %%mm0 \n\t"
00258 "punpcklbw %%mm7, %%mm2 \n\t"
00259 "punpckhbw %%mm7, %%mm1 \n\t"
00260 "punpckhbw %%mm7, %%mm3 \n\t"
00261 "paddusw %%mm2, %%mm0 \n\t"
00262 "paddusw %%mm3, %%mm1 \n\t"
00263 "paddusw %%mm6, %%mm4 \n\t"
00264 "paddusw %%mm6, %%mm5 \n\t"
00265 "paddusw %%mm0, %%mm4 \n\t"
00266 "paddusw %%mm1, %%mm5 \n\t"
00267 "psrlw $2, %%mm4 \n\t"
00268 "psrlw $2, %%mm5 \n\t"
00269 "packuswb %%mm5, %%mm4 \n\t"
00270 "movq %%mm4, (%2, %%"REG_a") \n\t"
00271 "add %3, %%"REG_a" \n\t"
00272
00273 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00274 "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
00275 "movq %%mm2, %%mm3 \n\t"
00276 "movq %%mm4, %%mm5 \n\t"
00277 "punpcklbw %%mm7, %%mm2 \n\t"
00278 "punpcklbw %%mm7, %%mm4 \n\t"
00279 "punpckhbw %%mm7, %%mm3 \n\t"
00280 "punpckhbw %%mm7, %%mm5 \n\t"
00281 "paddusw %%mm2, %%mm4 \n\t"
00282 "paddusw %%mm3, %%mm5 \n\t"
00283 "paddusw %%mm6, %%mm0 \n\t"
00284 "paddusw %%mm6, %%mm1 \n\t"
00285 "paddusw %%mm4, %%mm0 \n\t"
00286 "paddusw %%mm5, %%mm1 \n\t"
00287 "psrlw $2, %%mm0 \n\t"
00288 "psrlw $2, %%mm1 \n\t"
00289 "packuswb %%mm1, %%mm0 \n\t"
00290 "movq %%mm0, (%2, %%"REG_a") \n\t"
00291 "add %3, %%"REG_a" \n\t"
00292
00293 "subl $2, %0 \n\t"
00294 "jnz 1b \n\t"
00295 :"+g"(h), "+S"(pixels)
00296 :"D"(block), "r"((x86_reg)line_size)
00297 :REG_a, "memory");
00298 }
00299
00300
00301 static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00302 {
00303 MOVQ_BFE(mm6);
00304 JUMPALIGN();
00305 do {
00306 __asm__ volatile(
00307 "movd %0, %%mm0 \n\t"
00308 "movd %1, %%mm1 \n\t"
00309 OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
00310 "movd %%mm2, %0 \n\t"
00311 :"+m"(*block)
00312 :"m"(*pixels)
00313 :"memory");
00314 pixels += line_size;
00315 block += line_size;
00316 }
00317 while (--h);
00318 }
00319
00320
00321 static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00322 {
00323 MOVQ_BFE(mm6);
00324 JUMPALIGN();
00325 do {
00326 __asm__ volatile(
00327 "movq %0, %%mm0 \n\t"
00328 "movq %1, %%mm1 \n\t"
00329 OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
00330 "movq %%mm2, %0 \n\t"
00331 :"+m"(*block)
00332 :"m"(*pixels)
00333 :"memory");
00334 pixels += line_size;
00335 block += line_size;
00336 }
00337 while (--h);
00338 }
00339
00340 static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00341 {
00342 MOVQ_BFE(mm6);
00343 JUMPALIGN();
00344 do {
00345 __asm__ volatile(
00346 "movq %0, %%mm0 \n\t"
00347 "movq %1, %%mm1 \n\t"
00348 OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
00349 "movq %%mm2, %0 \n\t"
00350 "movq 8%0, %%mm0 \n\t"
00351 "movq 8%1, %%mm1 \n\t"
00352 OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
00353 "movq %%mm2, 8%0 \n\t"
00354 :"+m"(*block)
00355 :"m"(*pixels)
00356 :"memory");
00357 pixels += line_size;
00358 block += line_size;
00359 }
00360 while (--h);
00361 }
00362
00363 static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00364 {
00365 MOVQ_BFE(mm6);
00366 JUMPALIGN();
00367 do {
00368 __asm__ volatile(
00369 "movq %1, %%mm0 \n\t"
00370 "movq 1%1, %%mm1 \n\t"
00371 "movq %0, %%mm3 \n\t"
00372 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00373 OP_AVG(%%mm3, %%mm2, %%mm0, %%mm6)
00374 "movq %%mm0, %0 \n\t"
00375 :"+m"(*block)
00376 :"m"(*pixels)
00377 :"memory");
00378 pixels += line_size;
00379 block += line_size;
00380 } while (--h);
00381 }
00382
00383 static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00384 {
00385 MOVQ_BFE(mm6);
00386 JUMPALIGN();
00387 do {
00388 __asm__ volatile(
00389 "movq %1, %%mm0 \n\t"
00390 "movq %2, %%mm1 \n\t"
00391 "movq %0, %%mm3 \n\t"
00392 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00393 OP_AVG(%%mm3, %%mm2, %%mm0, %%mm6)
00394 "movq %%mm0, %0 \n\t"
00395 :"+m"(*dst)
00396 :"m"(*src1), "m"(*src2)
00397 :"memory");
00398 dst += dstStride;
00399 src1 += src1Stride;
00400 src2 += 8;
00401 } while (--h);
00402 }
00403
00404 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00405 {
00406 MOVQ_BFE(mm6);
00407 JUMPALIGN();
00408 do {
00409 __asm__ volatile(
00410 "movq %1, %%mm0 \n\t"
00411 "movq 1%1, %%mm1 \n\t"
00412 "movq %0, %%mm3 \n\t"
00413 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00414 OP_AVG(%%mm3, %%mm2, %%mm0, %%mm6)
00415 "movq %%mm0, %0 \n\t"
00416 "movq 8%1, %%mm0 \n\t"
00417 "movq 9%1, %%mm1 \n\t"
00418 "movq 8%0, %%mm3 \n\t"
00419 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00420 OP_AVG(%%mm3, %%mm2, %%mm0, %%mm6)
00421 "movq %%mm0, 8%0 \n\t"
00422 :"+m"(*block)
00423 :"m"(*pixels)
00424 :"memory");
00425 pixels += line_size;
00426 block += line_size;
00427 } while (--h);
00428 }
00429
00430 static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00431 {
00432 MOVQ_BFE(mm6);
00433 JUMPALIGN();
00434 do {
00435 __asm__ volatile(
00436 "movq %1, %%mm0 \n\t"
00437 "movq %2, %%mm1 \n\t"
00438 "movq %0, %%mm3 \n\t"
00439 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00440 OP_AVG(%%mm3, %%mm2, %%mm0, %%mm6)
00441 "movq %%mm0, %0 \n\t"
00442 "movq 8%1, %%mm0 \n\t"
00443 "movq 8%2, %%mm1 \n\t"
00444 "movq 8%0, %%mm3 \n\t"
00445 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00446 OP_AVG(%%mm3, %%mm2, %%mm0, %%mm6)
00447 "movq %%mm0, 8%0 \n\t"
00448 :"+m"(*dst)
00449 :"m"(*src1), "m"(*src2)
00450 :"memory");
00451 dst += dstStride;
00452 src1 += src1Stride;
00453 src2 += 16;
00454 } while (--h);
00455 }
00456
00457 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00458 {
00459 MOVQ_BFE(mm6);
00460 __asm__ volatile(
00461 "lea (%3, %3), %%"REG_a" \n\t"
00462 "movq (%1), %%mm0 \n\t"
00463 ".p2align 3 \n\t"
00464 "1: \n\t"
00465 "movq (%1, %3), %%mm1 \n\t"
00466 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00467 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
00468 "movq (%2), %%mm3 \n\t"
00469 OP_AVG(%%mm3, %%mm4, %%mm0, %%mm6)
00470 "movq (%2, %3), %%mm3 \n\t"
00471 OP_AVG(%%mm3, %%mm5, %%mm1, %%mm6)
00472 "movq %%mm0, (%2) \n\t"
00473 "movq %%mm1, (%2, %3) \n\t"
00474 "add %%"REG_a", %1 \n\t"
00475 "add %%"REG_a", %2 \n\t"
00476
00477 "movq (%1, %3), %%mm1 \n\t"
00478 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00479 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
00480 "movq (%2), %%mm3 \n\t"
00481 OP_AVG(%%mm3, %%mm4, %%mm2, %%mm6)
00482 "movq (%2, %3), %%mm3 \n\t"
00483 OP_AVG(%%mm3, %%mm5, %%mm1, %%mm6)
00484 "movq %%mm2, (%2) \n\t"
00485 "movq %%mm1, (%2, %3) \n\t"
00486 "add %%"REG_a", %1 \n\t"
00487 "add %%"REG_a", %2 \n\t"
00488
00489 "subl $4, %0 \n\t"
00490 "jnz 1b \n\t"
00491 :"+g"(h), "+S"(pixels), "+D"(block)
00492 :"r"((x86_reg)line_size)
00493 :REG_a, "memory");
00494 }
00495
00496
00497 static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00498 {
00499 MOVQ_ZERO(mm7);
00500 SET_RND(mm6);
00501 __asm__ volatile(
00502 "movq (%1), %%mm0 \n\t"
00503 "movq 1(%1), %%mm4 \n\t"
00504 "movq %%mm0, %%mm1 \n\t"
00505 "movq %%mm4, %%mm5 \n\t"
00506 "punpcklbw %%mm7, %%mm0 \n\t"
00507 "punpcklbw %%mm7, %%mm4 \n\t"
00508 "punpckhbw %%mm7, %%mm1 \n\t"
00509 "punpckhbw %%mm7, %%mm5 \n\t"
00510 "paddusw %%mm0, %%mm4 \n\t"
00511 "paddusw %%mm1, %%mm5 \n\t"
00512 "xor %%"REG_a", %%"REG_a" \n\t"
00513 "add %3, %1 \n\t"
00514 ".p2align 3 \n\t"
00515 "1: \n\t"
00516 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00517 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
00518 "movq %%mm0, %%mm1 \n\t"
00519 "movq %%mm2, %%mm3 \n\t"
00520 "punpcklbw %%mm7, %%mm0 \n\t"
00521 "punpcklbw %%mm7, %%mm2 \n\t"
00522 "punpckhbw %%mm7, %%mm1 \n\t"
00523 "punpckhbw %%mm7, %%mm3 \n\t"
00524 "paddusw %%mm2, %%mm0 \n\t"
00525 "paddusw %%mm3, %%mm1 \n\t"
00526 "paddusw %%mm6, %%mm4 \n\t"
00527 "paddusw %%mm6, %%mm5 \n\t"
00528 "paddusw %%mm0, %%mm4 \n\t"
00529 "paddusw %%mm1, %%mm5 \n\t"
00530 "psrlw $2, %%mm4 \n\t"
00531 "psrlw $2, %%mm5 \n\t"
00532 "movq (%2, %%"REG_a"), %%mm3 \n\t"
00533 "packuswb %%mm5, %%mm4 \n\t"
00534 "pcmpeqd %%mm2, %%mm2 \n\t"
00535 "paddb %%mm2, %%mm2 \n\t"
00536 OP_AVG(%%mm3, %%mm4, %%mm5, %%mm2)
00537 "movq %%mm5, (%2, %%"REG_a") \n\t"
00538 "add %3, %%"REG_a" \n\t"
00539
00540 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00541 "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
00542 "movq %%mm2, %%mm3 \n\t"
00543 "movq %%mm4, %%mm5 \n\t"
00544 "punpcklbw %%mm7, %%mm2 \n\t"
00545 "punpcklbw %%mm7, %%mm4 \n\t"
00546 "punpckhbw %%mm7, %%mm3 \n\t"
00547 "punpckhbw %%mm7, %%mm5 \n\t"
00548 "paddusw %%mm2, %%mm4 \n\t"
00549 "paddusw %%mm3, %%mm5 \n\t"
00550 "paddusw %%mm6, %%mm0 \n\t"
00551 "paddusw %%mm6, %%mm1 \n\t"
00552 "paddusw %%mm4, %%mm0 \n\t"
00553 "paddusw %%mm5, %%mm1 \n\t"
00554 "psrlw $2, %%mm0 \n\t"
00555 "psrlw $2, %%mm1 \n\t"
00556 "movq (%2, %%"REG_a"), %%mm3 \n\t"
00557 "packuswb %%mm1, %%mm0 \n\t"
00558 "pcmpeqd %%mm2, %%mm2 \n\t"
00559 "paddb %%mm2, %%mm2 \n\t"
00560 OP_AVG(%%mm3, %%mm0, %%mm1, %%mm2)
00561 "movq %%mm1, (%2, %%"REG_a") \n\t"
00562 "add %3, %%"REG_a" \n\t"
00563
00564 "subl $2, %0 \n\t"
00565 "jnz 1b \n\t"
00566 :"+g"(h), "+S"(pixels)
00567 :"D"(block), "r"((x86_reg)line_size)
00568 :REG_a, "memory");
00569 }
00570
00571
00572 static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00573 DEF(put, pixels8_y2)(block , pixels , line_size, h);
00574 DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
00575 }
00576
00577 static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00578 DEF(put, pixels8_xy2)(block , pixels , line_size, h);
00579 DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h);
00580 }
00581
00582 static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00583 DEF(avg, pixels8_y2)(block , pixels , line_size, h);
00584 DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h);
00585 }
00586
00587 static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00588 DEF(avg, pixels8_xy2)(block , pixels , line_size, h);
00589 DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h);
00590 }