00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavutil/x86_cpu.h"
00024 #include "dsputil_mmx.h"
00025 #include "dwt.h"
00026
00027 #define COMPOSE_VERTICAL(ext, align) \
00028 void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
00029 void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
00030 void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
00031 void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
00032 void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
00033 void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
00034 void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
00035 \
00036 static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
00037 { \
00038 int i, width_align = width&~(align-1); \
00039 \
00040 for(i=width_align; i<width; i++) \
00041 b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
00042 \
00043 ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
00044 } \
00045 \
00046 static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
00047 { \
00048 int i, width_align = width&~(align-1); \
00049 \
00050 for(i=width_align; i<width; i++) \
00051 b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
00052 \
00053 ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
00054 } \
00055 \
00056 static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
00057 IDWTELEM *b3, IDWTELEM *b4, int width) \
00058 { \
00059 int i, width_align = width&~(align-1); \
00060 \
00061 for(i=width_align; i<width; i++) \
00062 b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
00063 \
00064 ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
00065 } \
00066 \
00067 static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
00068 IDWTELEM *b3, IDWTELEM *b4, int width) \
00069 { \
00070 int i, width_align = width&~(align-1); \
00071 \
00072 for(i=width_align; i<width; i++) \
00073 b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
00074 \
00075 ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
00076 } \
00077 static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
00078 { \
00079 int i, width_align = width&~(align-1); \
00080 \
00081 for(i=width_align; i<width; i++) { \
00082 b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
00083 b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
00084 } \
00085 \
00086 ff_vertical_compose_haar##ext(b0, b1, width_align); \
00087 } \
00088 static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
00089 {\
00090 int w2= w>>1;\
00091 int x= w2 - (w2&(align-1));\
00092 ff_horizontal_compose_haar0i##ext(b, tmp, w);\
00093 \
00094 for (; x < w2; x++) {\
00095 b[2*x ] = tmp[x];\
00096 b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
00097 }\
00098 }\
00099 static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
00100 {\
00101 int w2= w>>1;\
00102 int x= w2 - (w2&(align-1));\
00103 ff_horizontal_compose_haar1i##ext(b, tmp, w);\
00104 \
00105 for (; x < w2; x++) {\
00106 b[2*x ] = (tmp[x] + 1)>>1;\
00107 b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
00108 }\
00109 }\
00110 \
00111
00112 #if HAVE_YASM
00113 #if !ARCH_X86_64
00114 COMPOSE_VERTICAL(_mmx, 4)
00115 #endif
00116 COMPOSE_VERTICAL(_sse2, 8)
00117 #endif
00118
00119
00120 void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
00121
00122 static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w)
00123 {
00124 int w2= w>>1;
00125 int x= w2 - (w2&7);
00126 ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
00127
00128 for (; x < w2; x++) {
00129 b[2*x ] = (tmp[x] + 1)>>1;
00130 b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
00131 }
00132 }
00133
00134 void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
00135 {
00136 #if HAVE_YASM
00137 int mm_flags = av_get_cpu_flags();
00138
00139 #if !ARCH_X86_64
00140 if (!(mm_flags & AV_CPU_FLAG_MMX))
00141 return;
00142
00143 switch (type) {
00144 case DWT_DIRAC_DD9_7:
00145 d->vertical_compose_l0 = vertical_compose53iL0_mmx;
00146 d->vertical_compose_h0 = vertical_compose_dd97iH0_mmx;
00147 break;
00148 case DWT_DIRAC_LEGALL5_3:
00149 d->vertical_compose_l0 = vertical_compose53iL0_mmx;
00150 d->vertical_compose_h0 = vertical_compose_dirac53iH0_mmx;
00151 break;
00152 case DWT_DIRAC_DD13_7:
00153 d->vertical_compose_l0 = vertical_compose_dd137iL0_mmx;
00154 d->vertical_compose_h0 = vertical_compose_dd97iH0_mmx;
00155 break;
00156 case DWT_DIRAC_HAAR0:
00157 d->vertical_compose = vertical_compose_haar_mmx;
00158 d->horizontal_compose = horizontal_compose_haar0i_mmx;
00159 break;
00160 case DWT_DIRAC_HAAR1:
00161 d->vertical_compose = vertical_compose_haar_mmx;
00162 d->horizontal_compose = horizontal_compose_haar1i_mmx;
00163 break;
00164 }
00165 #endif
00166
00167 if (!(mm_flags & AV_CPU_FLAG_SSE2))
00168 return;
00169
00170 switch (type) {
00171 case DWT_DIRAC_DD9_7:
00172 d->vertical_compose_l0 = vertical_compose53iL0_sse2;
00173 d->vertical_compose_h0 = vertical_compose_dd97iH0_sse2;
00174 break;
00175 case DWT_DIRAC_LEGALL5_3:
00176 d->vertical_compose_l0 = vertical_compose53iL0_sse2;
00177 d->vertical_compose_h0 = vertical_compose_dirac53iH0_sse2;
00178 break;
00179 case DWT_DIRAC_DD13_7:
00180 d->vertical_compose_l0 = vertical_compose_dd137iL0_sse2;
00181 d->vertical_compose_h0 = vertical_compose_dd97iH0_sse2;
00182 break;
00183 case DWT_DIRAC_HAAR0:
00184 d->vertical_compose = vertical_compose_haar_sse2;
00185 d->horizontal_compose = horizontal_compose_haar0i_sse2;
00186 break;
00187 case DWT_DIRAC_HAAR1:
00188 d->vertical_compose = vertical_compose_haar_sse2;
00189 d->horizontal_compose = horizontal_compose_haar1i_sse2;
00190 break;
00191 }
00192
00193 if (!(mm_flags & AV_CPU_FLAG_SSSE3))
00194 return;
00195
00196 switch (type) {
00197 case DWT_DIRAC_DD9_7:
00198 d->horizontal_compose = horizontal_compose_dd97i_ssse3;
00199 break;
00200 }
00201 #endif // HAVE_YASM
00202 }