00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavcodec/dsputil.h"
00024 #include "dsputil_sh4.h"
00025 #include "sh4.h"
00026
00027 #define c1 1.38703984532214752434
00028 #define c2 1.30656296487637657577
00029 #define c3 1.17587560241935884520
00030 #define c4 1.00000000000000000000
00031 #define c5 0.78569495838710234903
00032 #define c6 0.54119610014619712324
00033 #define c7 0.27589937928294311353
00034
00035 static const float even_table[] __attribute__ ((aligned(8))) = {
00036 c4, c4, c4, c4,
00037 c2, c6,-c6,-c2,
00038 c4,-c4,-c4, c4,
00039 c6,-c2, c2,-c6
00040 };
00041
00042 static const float odd_table[] __attribute__ ((aligned(8))) = {
00043 c1, c3, c5, c7,
00044 c3,-c7,-c1,-c5,
00045 c5,-c1, c7, c3,
00046 c7,-c5, c3,-c1
00047 };
00048
00049 #undef c1
00050 #undef c2
00051 #undef c3
00052 #undef c4
00053 #undef c5
00054 #undef c6
00055 #undef c7
00056
00057 #define load_matrix(table) \
00058 do { \
00059 const float *t = table; \
00060 __asm__ volatile( \
00061 " fschg\n" \
00062 " fmov @%0+,xd0\n" \
00063 " fmov @%0+,xd2\n" \
00064 " fmov @%0+,xd4\n" \
00065 " fmov @%0+,xd6\n" \
00066 " fmov @%0+,xd8\n" \
00067 " fmov @%0+,xd10\n" \
00068 " fmov @%0+,xd12\n" \
00069 " fmov @%0+,xd14\n" \
00070 " fschg\n" \
00071 : "+r"(t) \
00072 ); \
00073 } while (0)
00074
00075 #define ftrv() \
00076 __asm__ volatile("ftrv xmtrx,fv0" \
00077 : "+f"(fr0),"+f"(fr1),"+f"(fr2),"+f"(fr3));
00078
00079 #define DEFREG \
00080 register float fr0 __asm__("fr0"); \
00081 register float fr1 __asm__("fr1"); \
00082 register float fr2 __asm__("fr2"); \
00083 register float fr3 __asm__("fr3")
00084
00085 #define DESCALE(x,n) (x)*(1.0f/(1<<(n)))
00086
00087
00088
00089
00090
00091
00092 void idct_sh4(DCTELEM *block)
00093 {
00094 DEFREG;
00095
00096 int i;
00097 float tblock[8*8],*fblock;
00098 int ofs1,ofs2,ofs3;
00099 int fpscr;
00100
00101 fp_single_enter(fpscr);
00102
00103
00104
00105
00106 load_matrix(even_table);
00107
00108 fblock = tblock+4;
00109 i = 8;
00110 do {
00111 fr0 = block[0];
00112 fr1 = block[2];
00113 fr2 = block[4];
00114 fr3 = block[6];
00115 block+=8;
00116 ftrv();
00117 *--fblock = fr3;
00118 *--fblock = fr2;
00119 *--fblock = fr1;
00120 *--fblock = fr0;
00121 fblock+=8+4;
00122 } while(--i);
00123 block-=8*8;
00124 fblock-=8*8+4;
00125
00126 load_matrix(odd_table);
00127
00128 i = 8;
00129
00130 do {
00131 float t0,t1,t2,t3;
00132 fr0 = block[1];
00133 fr1 = block[3];
00134 fr2 = block[5];
00135 fr3 = block[7];
00136 block+=8;
00137 ftrv();
00138 t0 = *fblock++;
00139 t1 = *fblock++;
00140 t2 = *fblock++;
00141 t3 = *fblock++;
00142 fblock+=4;
00143 *--fblock = t0 - fr0;
00144 *--fblock = t1 - fr1;
00145 *--fblock = t2 - fr2;
00146 *--fblock = t3 - fr3;
00147 *--fblock = t3 + fr3;
00148 *--fblock = t2 + fr2;
00149 *--fblock = t1 + fr1;
00150 *--fblock = t0 + fr0;
00151 fblock+=8;
00152 } while(--i);
00153 block-=8*8;
00154 fblock-=8*8;
00155
00156
00157
00158
00159 load_matrix(even_table);
00160
00161 ofs1 = sizeof(float)*2*8;
00162 ofs2 = sizeof(float)*4*8;
00163 ofs3 = sizeof(float)*6*8;
00164
00165 i = 8;
00166
00167 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs)
00168
00169 do {
00170 fr0 = OA(fblock, 0);
00171 fr1 = OA(fblock,ofs1);
00172 fr2 = OA(fblock,ofs2);
00173 fr3 = OA(fblock,ofs3);
00174 ftrv();
00175 OA(fblock,0 ) = fr0;
00176 OA(fblock,ofs1) = fr1;
00177 OA(fblock,ofs2) = fr2;
00178 OA(fblock,ofs3) = fr3;
00179 fblock++;
00180 } while(--i);
00181 fblock-=8;
00182
00183 load_matrix(odd_table);
00184
00185 i=8;
00186 do {
00187 float t0,t1,t2,t3;
00188 t0 = OA(fblock, 0);
00189 t1 = OA(fblock,ofs1);
00190 t2 = OA(fblock,ofs2);
00191 t3 = OA(fblock,ofs3);
00192 fblock+=8;
00193 fr0 = OA(fblock, 0);
00194 fr1 = OA(fblock,ofs1);
00195 fr2 = OA(fblock,ofs2);
00196 fr3 = OA(fblock,ofs3);
00197 fblock+=-8+1;
00198 ftrv();
00199 block[8*0] = DESCALE(t0 + fr0,3);
00200 block[8*7] = DESCALE(t0 - fr0,3);
00201 block[8*1] = DESCALE(t1 + fr1,3);
00202 block[8*6] = DESCALE(t1 - fr1,3);
00203 block[8*2] = DESCALE(t2 + fr2,3);
00204 block[8*5] = DESCALE(t2 - fr2,3);
00205 block[8*3] = DESCALE(t3 + fr3,3);
00206 block[8*4] = DESCALE(t3 - fr3,3);
00207 block++;
00208 } while(--i);
00209
00210 fp_single_leave(fpscr);
00211 }