00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavutil/cpu.h"
00024 #include "libavcodec/dsputil.h"
00025 #include "dsputil_altivec.h"
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
00048 {
00049 register int misal = ((unsigned long)blocks & 0x00000010);
00050 register int i = 0;
00051 if (misal) {
00052 ((unsigned long*)blocks)[0] = 0L;
00053 ((unsigned long*)blocks)[1] = 0L;
00054 ((unsigned long*)blocks)[2] = 0L;
00055 ((unsigned long*)blocks)[3] = 0L;
00056 i += 16;
00057 }
00058 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
00059 __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
00060 }
00061 if (misal) {
00062 ((unsigned long*)blocks)[188] = 0L;
00063 ((unsigned long*)blocks)[189] = 0L;
00064 ((unsigned long*)blocks)[190] = 0L;
00065 ((unsigned long*)blocks)[191] = 0L;
00066 i += 16;
00067 }
00068 }
00069
00070
00071
00072 #if HAVE_DCBZL
00073 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00074 {
00075 register int misal = ((unsigned long)blocks & 0x0000007f);
00076 register int i = 0;
00077 if (misal) {
00078
00079
00080
00081 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00082 }
00083 else
00084 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
00085 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
00086 }
00087 }
00088 #else
00089 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00090 {
00091 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00092 }
00093 #endif
00094
00095 #if HAVE_DCBZL
00096
00097
00098
00099
00100
00101 static long check_dcbzl_effect(void)
00102 {
00103 register char *fakedata = av_malloc(1024);
00104 register char *fakedata_middle;
00105 register long zero = 0;
00106 register long i = 0;
00107 long count = 0;
00108
00109 if (!fakedata) {
00110 return 0L;
00111 }
00112
00113 fakedata_middle = (fakedata + 512);
00114
00115 memset(fakedata, 0xFF, 1024);
00116
00117
00118
00119 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
00120
00121 for (i = 0; i < 1024 ; i ++) {
00122 if (fakedata[i] == (char)0)
00123 count++;
00124 }
00125
00126 av_free(fakedata);
00127
00128 return count;
00129 }
00130 #else
00131 static long check_dcbzl_effect(void)
00132 {
00133 return 0;
00134 }
00135 #endif
00136
00137 static void prefetch_ppc(void *mem, int stride, int h)
00138 {
00139 register const uint8_t *p = mem;
00140 do {
00141 __asm__ volatile ("dcbt 0,%0" : : "r" (p));
00142 p+= stride;
00143 } while(--h);
00144 }
00145
00146 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
00147 {
00148 const int high_bit_depth = avctx->bits_per_raw_sample > 8;
00149 int mm_flags = av_get_cpu_flags();
00150
00151 if (avctx->dsp_mask) {
00152 if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
00153 mm_flags |= (avctx->dsp_mask & 0xffff);
00154 else
00155 mm_flags &= ~(avctx->dsp_mask & 0xffff);
00156 }
00157
00158
00159 c->prefetch = prefetch_ppc;
00160 if (!high_bit_depth) {
00161 switch (check_dcbzl_effect()) {
00162 case 32:
00163 c->clear_blocks = clear_blocks_dcbz32_ppc;
00164 break;
00165 case 128:
00166 c->clear_blocks = clear_blocks_dcbz128_ppc;
00167 break;
00168 default:
00169 break;
00170 }
00171 }
00172
00173 #if HAVE_ALTIVEC
00174 if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
00175
00176 if (mm_flags & AV_CPU_FLAG_ALTIVEC) {
00177 dsputil_init_altivec(c, avctx);
00178 float_init_altivec(c, avctx);
00179 int_init_altivec(c, avctx);
00180 c->gmc1 = gmc1_altivec;
00181
00182 #if CONFIG_ENCODERS
00183 if (avctx->bits_per_raw_sample <= 8 &&
00184 (avctx->dct_algo == FF_DCT_AUTO ||
00185 avctx->dct_algo == FF_DCT_ALTIVEC)) {
00186 c->fdct = fdct_altivec;
00187 }
00188 #endif //CONFIG_ENCODERS
00189
00190 if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) {
00191 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
00192 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
00193 c->idct_put = idct_put_altivec;
00194 c->idct_add = idct_add_altivec;
00195 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00196 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
00197 avctx->idct_algo==FF_IDCT_VP3){
00198 c->idct_put = ff_vp3_idct_put_altivec;
00199 c->idct_add = ff_vp3_idct_add_altivec;
00200 c->idct = ff_vp3_idct_altivec;
00201 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00202 }
00203 }
00204
00205 }
00206 #endif
00207 }