00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "libavcodec/dsputil.h"
00026 #include "mmi.h"
00027
00028 void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
00029 void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
00030 void ff_mmi_idct(DCTELEM *block);
00031
00032 static void clear_blocks_mmi(DCTELEM * blocks)
00033 {
00034 __asm__ volatile(
00035 ".set noreorder \n"
00036 "addiu $9, %0, 768 \n"
00037 "nop \n"
00038 "1: \n"
00039 "sq $0, 0(%0) \n"
00040 "move $8, %0 \n"
00041 "addi %0, %0, 64 \n"
00042 "sq $0, 16($8) \n"
00043 "slt $10, %0, $9 \n"
00044 "sq $0, 32($8) \n"
00045 "bnez $10, 1b \n"
00046 "sq $0, 48($8) \n"
00047 ".set reorder \n"
00048 : "+r" (blocks) :: "$8", "$9", "memory" );
00049 }
00050
00051
00052 static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
00053 {
00054 __asm__ volatile(
00055 ".set push \n\t"
00056 ".set mips3 \n\t"
00057 "ld $8, 0(%0) \n\t"
00058 "add %0, %0, %2 \n\t"
00059 "ld $9, 0(%0) \n\t"
00060 "add %0, %0, %2 \n\t"
00061 "ld $10, 0(%0) \n\t"
00062 "pextlb $8, $0, $8 \n\t"
00063 "sq $8, 0(%1) \n\t"
00064 "add %0, %0, %2 \n\t"
00065 "ld $8, 0(%0) \n\t"
00066 "pextlb $9, $0, $9 \n\t"
00067 "sq $9, 16(%1) \n\t"
00068 "add %0, %0, %2 \n\t"
00069 "ld $9, 0(%0) \n\t"
00070 "pextlb $10, $0, $10 \n\t"
00071 "sq $10, 32(%1) \n\t"
00072 "add %0, %0, %2 \n\t"
00073 "ld $10, 0(%0) \n\t"
00074 "pextlb $8, $0, $8 \n\t"
00075 "sq $8, 48(%1) \n\t"
00076 "add %0, %0, %2 \n\t"
00077 "ld $8, 0(%0) \n\t"
00078 "pextlb $9, $0, $9 \n\t"
00079 "sq $9, 64(%1) \n\t"
00080 "add %0, %0, %2 \n\t"
00081 "ld $9, 0(%0) \n\t"
00082 "pextlb $10, $0, $10 \n\t"
00083 "sq $10, 80(%1) \n\t"
00084 "pextlb $8, $0, $8 \n\t"
00085 "sq $8, 96(%1) \n\t"
00086 "pextlb $9, $0, $9 \n\t"
00087 "sq $9, 112(%1) \n\t"
00088 ".set pop \n\t"
00089 : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" );
00090 }
00091
00092
00093 static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00094 {
00095 __asm__ volatile(
00096 ".set push \n\t"
00097 ".set mips3 \n\t"
00098 "1: \n\t"
00099 "ldr $8, 0(%1) \n\t"
00100 "addiu %2, %2, -1 \n\t"
00101 "ldl $8, 7(%1) \n\t"
00102 "add %1, %1, %3 \n\t"
00103 "sd $8, 0(%0) \n\t"
00104 "add %0, %0, %3 \n\t"
00105 "bgtz %2, 1b \n\t"
00106 ".set pop \n\t"
00107 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
00108 : "$8", "memory" );
00109 }
00110
00111
00112 static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00113 {
00114 __asm__ volatile (
00115 ".set push \n\t"
00116 ".set mips3 \n\t"
00117 "1: \n\t"
00118 "ldr $8, 0(%1) \n\t"
00119 "add $11, %1, %3 \n\t"
00120 "ldl $8, 7(%1) \n\t"
00121 "add $10, %0, %3 \n\t"
00122 "ldr $9, 8(%1) \n\t"
00123 "ldl $9, 15(%1) \n\t"
00124 "ldr $12, 0($11) \n\t"
00125 "add %1, $11, %3 \n\t"
00126 "ldl $12, 7($11) \n\t"
00127 "pcpyld $8, $9, $8 \n\t"
00128 "sq $8, 0(%0) \n\t"
00129 "ldr $13, 8($11) \n\t"
00130 "addiu %2, %2, -2 \n\t"
00131 "ldl $13, 15($11) \n\t"
00132 "add %0, $10, %3 \n\t"
00133 "pcpyld $12, $13, $12 \n\t"
00134 "sq $12, 0($10) \n\t"
00135 "bgtz %2, 1b \n\t"
00136 ".set pop \n\t"
00137 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
00138 : "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
00139 }
00140
00141
00142 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
00143 {
00144 const int idct_algo= avctx->idct_algo;
00145 const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
00146
00147 if (!high_bit_depth) {
00148 c->clear_blocks = clear_blocks_mmi;
00149
00150 c->put_pixels_tab[1][0] = put_pixels8_mmi;
00151 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi;
00152
00153 c->put_pixels_tab[0][0] = put_pixels16_mmi;
00154 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
00155 }
00156
00157 c->get_pixels = get_pixels_mmi;
00158
00159 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
00160 c->idct_put= ff_mmi_idct_put;
00161 c->idct_add= ff_mmi_idct_add;
00162 c->idct = ff_mmi_idct;
00163 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
00164 }
00165 }
00166