00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "libavcodec/dsputil.h"
00022
00023 #include "dsputil_altivec.h"
00024 #include "util_altivec.h"
00025
00026 static void vector_fmul_altivec(float *dst, const float *src0, const float *src1, int len)
00027 {
00028 int i;
00029 vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);
00030 for(i=0; i<len-7; i+=8) {
00031 d0 = vec_ld(0, src0+i);
00032 s = vec_ld(0, src1+i);
00033 d1 = vec_ld(16, src0+i);
00034 d0 = vec_madd(d0, s, zero);
00035 d1 = vec_madd(d1, vec_ld(16,src1+i), zero);
00036 vec_st(d0, 0, dst+i);
00037 vec_st(d1, 16, dst+i);
00038 }
00039 }
00040
00041 static void vector_fmul_reverse_altivec(float *dst, const float *src0,
00042 const float *src1, int len)
00043 {
00044 int i;
00045 vector float d, s0, s1, h0, l0,
00046 s2, s3, zero = (vector float)vec_splat_u32(0);
00047 src1 += len-4;
00048 for(i=0; i<len-7; i+=8) {
00049 s1 = vec_ld(0, src1-i);
00050 s0 = vec_ld(0, src0+i);
00051 l0 = vec_mergel(s1, s1);
00052 s3 = vec_ld(-16, src1-i);
00053 h0 = vec_mergeh(s1, s1);
00054 s2 = vec_ld(16, src0+i);
00055 s1 = vec_mergeh(vec_mergel(l0,h0),
00056 vec_mergeh(l0,h0));
00057
00058 l0 = vec_mergel(s3, s3);
00059 d = vec_madd(s0, s1, zero);
00060 h0 = vec_mergeh(s3, s3);
00061 vec_st(d, 0, dst+i);
00062 s3 = vec_mergeh(vec_mergel(l0,h0),
00063 vec_mergeh(l0,h0));
00064 d = vec_madd(s2, s3, zero);
00065 vec_st(d, 16, dst+i);
00066 }
00067 }
00068
00069 static void vector_fmul_add_altivec(float *dst, const float *src0,
00070 const float *src1, const float *src2,
00071 int len)
00072 {
00073 int i;
00074 vector float d, s0, s1, s2, t0, t1, edges;
00075 vector unsigned char align = vec_lvsr(0,dst),
00076 mask = vec_lvsl(0, dst);
00077
00078 for (i=0; i<len-3; i+=4) {
00079 t0 = vec_ld(0, dst+i);
00080 t1 = vec_ld(15, dst+i);
00081 s0 = vec_ld(0, src0+i);
00082 s1 = vec_ld(0, src1+i);
00083 s2 = vec_ld(0, src2+i);
00084 edges = vec_perm(t1 ,t0, mask);
00085 d = vec_madd(s0,s1,s2);
00086 t1 = vec_perm(d, edges, align);
00087 t0 = vec_perm(edges, d, align);
00088 vec_st(t1, 15, dst+i);
00089 vec_st(t0, 0, dst+i);
00090 }
00091 }
00092
00093 static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len)
00094 {
00095 vector float zero, t0, t1, s0, s1, wi, wj;
00096 const vector unsigned char reverse = vcprm(3,2,1,0);
00097 int i,j;
00098
00099 dst += len;
00100 win += len;
00101 src0+= len;
00102
00103 zero = (vector float)vec_splat_u32(0);
00104
00105 for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
00106 s0 = vec_ld(i, src0);
00107 s1 = vec_ld(j, src1);
00108 wi = vec_ld(i, win);
00109 wj = vec_ld(j, win);
00110
00111 s1 = vec_perm(s1, s1, reverse);
00112 wj = vec_perm(wj, wj, reverse);
00113
00114 t0 = vec_madd(s0, wj, zero);
00115 t0 = vec_nmsub(s1, wi, t0);
00116 t1 = vec_madd(s0, wi, zero);
00117 t1 = vec_madd(s1, wj, t1);
00118 t1 = vec_perm(t1, t1, reverse);
00119
00120 vec_st(t0, i, dst);
00121 vec_st(t1, j, dst);
00122 }
00123 }
00124
00125 void float_init_altivec(DSPContext* c, AVCodecContext *avctx)
00126 {
00127 c->vector_fmul = vector_fmul_altivec;
00128 c->vector_fmul_reverse = vector_fmul_reverse_altivec;
00129 c->vector_fmul_add = vector_fmul_add_altivec;
00130 if(!(avctx->flags & CODEC_FLAG_BITEXACT)) {
00131 c->vector_fmul_window = vector_fmul_window_altivec;
00132 }
00133 }