FFmpeg
rv30dsp.c
Go to the documentation of this file.
1 /*
2  * RV30 decoder motion compensation functions
3  * Copyright (c) 2007 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * RV30 decoder motion compensation functions
25  */
26 
27 #include <stddef.h>
28 #include <stdint.h>
29 #include "libavutil/attributes.h"
30 #include "h264chroma.h"
31 #include "h264qpel.h"
32 #include "mathops.h"
33 #include "qpeldsp.h"
34 #include "rv34dsp.h"
35 
36 #define RV30_LOWPASS(OPNAME, OP) \
37 static void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
38  const int h = 8;\
39  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
40  int i;\
41  for(i = 0; i < h; i++)\
42  {\
43  OP(dst[0], (-(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + 8)>>4);\
44  OP(dst[1], (-(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + 8)>>4);\
45  OP(dst[2], (-(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + 8)>>4);\
46  OP(dst[3], (-(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + 8)>>4);\
47  OP(dst[4], (-(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + 8)>>4);\
48  OP(dst[5], (-(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + 8)>>4);\
49  OP(dst[6], (-(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + 8)>>4);\
50  OP(dst[7], (-(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + 8)>>4);\
51  dst += dstStride;\
52  src += srcStride;\
53  }\
54 }\
55 \
56 static void OPNAME ## rv30_tpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
57  const int w = 8;\
58  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
59  int i;\
60  for(i = 0; i < w; i++)\
61  {\
62  const int srcA = src[-1*srcStride];\
63  const int src0 = src[0 *srcStride];\
64  const int src1 = src[1 *srcStride];\
65  const int src2 = src[2 *srcStride];\
66  const int src3 = src[3 *srcStride];\
67  const int src4 = src[4 *srcStride];\
68  const int src5 = src[5 *srcStride];\
69  const int src6 = src[6 *srcStride];\
70  const int src7 = src[7 *srcStride];\
71  const int src8 = src[8 *srcStride];\
72  const int src9 = src[9 *srcStride];\
73  OP(dst[0*dstStride], (-(srcA+src2) + src0*C1 + src1*C2 + 8)>>4);\
74  OP(dst[1*dstStride], (-(src0+src3) + src1*C1 + src2*C2 + 8)>>4);\
75  OP(dst[2*dstStride], (-(src1+src4) + src2*C1 + src3*C2 + 8)>>4);\
76  OP(dst[3*dstStride], (-(src2+src5) + src3*C1 + src4*C2 + 8)>>4);\
77  OP(dst[4*dstStride], (-(src3+src6) + src4*C1 + src5*C2 + 8)>>4);\
78  OP(dst[5*dstStride], (-(src4+src7) + src5*C1 + src6*C2 + 8)>>4);\
79  OP(dst[6*dstStride], (-(src5+src8) + src6*C1 + src7*C2 + 8)>>4);\
80  OP(dst[7*dstStride], (-(src6+src9) + src7*C1 + src8*C2 + 8)>>4);\
81  dst++;\
82  src++;\
83  }\
84 }\
85 \
86 static void OPNAME ## rv30_tpel8_hv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
87  const int w = 8;\
88  const int h = 8;\
89  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
90  int i, j;\
91  for(j = 0; j < h; j++){\
92  for(i = 0; i < w; i++){\
93  OP(dst[i], (\
94  src[srcStride*-1+i-1] -12*src[srcStride*-1+i] -6*src[srcStride*-1+i+1] +src[srcStride*-1+i+2]+\
95  -12*src[srcStride* 0+i-1] +144*src[srcStride* 0+i] +72*src[srcStride* 0+i+1] -12*src[srcStride* 0+i+2] +\
96  -6*src[srcStride* 1+i-1] +72*src[srcStride* 1+i] +36*src[srcStride* 1+i+1] -6*src[srcStride* 1+i+2] +\
97  src[srcStride* 2+i-1] -12*src[srcStride* 2+i] -6*src[srcStride* 2+i+1] +src[srcStride* 2+i+2] +\
98  128)>>8);\
99  }\
100  src += srcStride;\
101  dst += dstStride;\
102  }\
103 }\
104 \
105 static void OPNAME ## rv30_tpel8_hhv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
106  const int w = 8;\
107  const int h = 8;\
108  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
109  int i, j;\
110  for(j = 0; j < h; j++){\
111  for(i = 0; i < w; i++){\
112  OP(dst[i], (\
113  src[srcStride*-1+i-1] -12*src[srcStride*-1+i+1] -6*src[srcStride*-1+i] +src[srcStride*-1+i+2]+\
114  -12*src[srcStride* 0+i-1] +144*src[srcStride* 0+i+1] +72*src[srcStride* 0+i] -12*src[srcStride* 0+i+2]+\
115  -6*src[srcStride* 1+i-1] +72*src[srcStride* 1+i+1] +36*src[srcStride* 1+i] -6*src[srcStride* 1+i+2]+\
116  src[srcStride* 2+i-1] -12*src[srcStride* 2+i+1] -6*src[srcStride* 2+i] +src[srcStride* 2+i+2]+\
117  128)>>8);\
118  }\
119  src += srcStride;\
120  dst += dstStride;\
121  }\
122 }\
123 \
124 static void OPNAME ## rv30_tpel8_hvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
125  const int w = 8;\
126  const int h = 8;\
127  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
128  int i, j;\
129  for(j = 0; j < h; j++){\
130  for(i = 0; i < w; i++){\
131  OP(dst[i], (\
132  src[srcStride*-1+i-1] -12*src[srcStride*-1+i] -6*src[srcStride*-1+i+1] +src[srcStride*-1+i+2]+\
133  -6*src[srcStride* 0+i-1] +72*src[srcStride* 0+i] +36*src[srcStride* 0+i+1] -6*src[srcStride* 0+i+2]+\
134  -12*src[srcStride* 1+i-1] +144*src[srcStride* 1+i] +72*src[srcStride* 1+i+1] -12*src[srcStride* 1+i+2]+\
135  src[srcStride* 2+i-1] -12*src[srcStride* 2+i] -6*src[srcStride* 2+i+1] +src[srcStride* 2+i+2]+\
136  128)>>8);\
137  }\
138  src += srcStride;\
139  dst += dstStride;\
140  }\
141 }\
142 \
143 static void OPNAME ## rv30_tpel8_hhvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
144  const int w = 8;\
145  const int h = 8;\
146  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
147  int i, j;\
148  for(j = 0; j < h; j++){\
149  for(i = 0; i < w; i++){\
150  OP(dst[i], (\
151  36*src[i+srcStride*0] +54*src[i+1+srcStride*0] +6*src[i+2+srcStride*0]+\
152  54*src[i+srcStride*1] +81*src[i+1+srcStride*1] +9*src[i+2+srcStride*1]+\
153  6*src[i+srcStride*2] + 9*src[i+1+srcStride*2] + src[i+2+srcStride*2]+\
154  128)>>8);\
155  }\
156  src += srcStride;\
157  dst += dstStride;\
158  }\
159 }\
160 \
161 static void OPNAME ## rv30_tpel16_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
162  OPNAME ## rv30_tpel8_v_lowpass(dst , src , dstStride, srcStride, C1, C2);\
163  OPNAME ## rv30_tpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\
164  src += 8*srcStride;\
165  dst += 8*dstStride;\
166  OPNAME ## rv30_tpel8_v_lowpass(dst , src , dstStride, srcStride, C1, C2);\
167  OPNAME ## rv30_tpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\
168 }\
169 \
170 static void OPNAME ## rv30_tpel16_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
171  OPNAME ## rv30_tpel8_h_lowpass(dst , src , dstStride, srcStride, C1, C2);\
172  OPNAME ## rv30_tpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\
173  src += 8*srcStride;\
174  dst += 8*dstStride;\
175  OPNAME ## rv30_tpel8_h_lowpass(dst , src , dstStride, srcStride, C1, C2);\
176  OPNAME ## rv30_tpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\
177 }\
178 \
179 static void OPNAME ## rv30_tpel16_hv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
180  OPNAME ## rv30_tpel8_hv_lowpass(dst , src , dstStride, srcStride);\
181  OPNAME ## rv30_tpel8_hv_lowpass(dst+8, src+8, dstStride, srcStride);\
182  src += 8*srcStride;\
183  dst += 8*dstStride;\
184  OPNAME ## rv30_tpel8_hv_lowpass(dst , src , dstStride, srcStride);\
185  OPNAME ## rv30_tpel8_hv_lowpass(dst+8, src+8, dstStride, srcStride);\
186 }\
187 \
188 static void OPNAME ## rv30_tpel16_hhv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
189  OPNAME ## rv30_tpel8_hhv_lowpass(dst , src , dstStride, srcStride);\
190  OPNAME ## rv30_tpel8_hhv_lowpass(dst+8, src+8, dstStride, srcStride);\
191  src += 8*srcStride;\
192  dst += 8*dstStride;\
193  OPNAME ## rv30_tpel8_hhv_lowpass(dst , src , dstStride, srcStride);\
194  OPNAME ## rv30_tpel8_hhv_lowpass(dst+8, src+8, dstStride, srcStride);\
195 }\
196 \
197 static void OPNAME ## rv30_tpel16_hvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
198  OPNAME ## rv30_tpel8_hvv_lowpass(dst , src , dstStride, srcStride);\
199  OPNAME ## rv30_tpel8_hvv_lowpass(dst+8, src+8, dstStride, srcStride);\
200  src += 8*srcStride;\
201  dst += 8*dstStride;\
202  OPNAME ## rv30_tpel8_hvv_lowpass(dst , src , dstStride, srcStride);\
203  OPNAME ## rv30_tpel8_hvv_lowpass(dst+8, src+8, dstStride, srcStride);\
204 }\
205 \
206 static void OPNAME ## rv30_tpel16_hhvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
207  OPNAME ## rv30_tpel8_hhvv_lowpass(dst , src , dstStride, srcStride);\
208  OPNAME ## rv30_tpel8_hhvv_lowpass(dst+8, src+8, dstStride, srcStride);\
209  src += 8*srcStride;\
210  dst += 8*dstStride;\
211  OPNAME ## rv30_tpel8_hhvv_lowpass(dst , src , dstStride, srcStride);\
212  OPNAME ## rv30_tpel8_hhvv_lowpass(dst+8, src+8, dstStride, srcStride);\
213 }\
214 \
215 
216 #define RV30_MC(OPNAME, SIZE) \
217 static void OPNAME ## rv30_tpel ## SIZE ## _mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
218 {\
219  OPNAME ## rv30_tpel ## SIZE ## _h_lowpass(dst, src, stride, stride, 12, 6);\
220 }\
221 \
222 static void OPNAME ## rv30_tpel ## SIZE ## _mc20_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
223 {\
224  OPNAME ## rv30_tpel ## SIZE ## _h_lowpass(dst, src, stride, stride, 6, 12);\
225 }\
226 \
227 static void OPNAME ## rv30_tpel ## SIZE ## _mc01_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
228 {\
229  OPNAME ## rv30_tpel ## SIZE ## _v_lowpass(dst, src, stride, stride, 12, 6);\
230 }\
231 \
232 static void OPNAME ## rv30_tpel ## SIZE ## _mc02_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
233 {\
234  OPNAME ## rv30_tpel ## SIZE ## _v_lowpass(dst, src, stride, stride, 6, 12);\
235 }\
236 \
237 static void OPNAME ## rv30_tpel ## SIZE ## _mc11_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
238 {\
239  OPNAME ## rv30_tpel ## SIZE ## _hv_lowpass(dst, src, stride, stride);\
240 }\
241 \
242 static void OPNAME ## rv30_tpel ## SIZE ## _mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
243 {\
244  OPNAME ## rv30_tpel ## SIZE ## _hvv_lowpass(dst, src, stride, stride);\
245 }\
246 \
247 static void OPNAME ## rv30_tpel ## SIZE ## _mc21_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
248 {\
249  OPNAME ## rv30_tpel ## SIZE ## _hhv_lowpass(dst, src, stride, stride);\
250 }\
251 \
252 static void OPNAME ## rv30_tpel ## SIZE ## _mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
253 {\
254  OPNAME ## rv30_tpel ## SIZE ## _hhvv_lowpass(dst, src, stride, stride);\
255 }\
256 \
257 
258 #define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
259 #define op_put(a, b) a = cm[b]
260 
261 RV30_LOWPASS(put_ , op_put)
262 RV30_LOWPASS(avg_ , op_avg)
263 RV30_MC(put_, 8)
264 RV30_MC(put_, 16)
265 RV30_MC(avg_, 8)
266 RV30_MC(avg_, 16)
267 
269 {
270  H264ChromaContext h264chroma;
271  H264QpelContext qpel;
272 
274  ff_h264chroma_init(&h264chroma, 8);
275  ff_h264qpel_init(&qpel, 8);
276 
277  c->put_pixels_tab[0][ 0] = qpel.put_h264_qpel_pixels_tab[0][0];
278  c->put_pixels_tab[0][ 1] = put_rv30_tpel16_mc10_c;
279  c->put_pixels_tab[0][ 2] = put_rv30_tpel16_mc20_c;
280  c->put_pixels_tab[0][ 4] = put_rv30_tpel16_mc01_c;
281  c->put_pixels_tab[0][ 5] = put_rv30_tpel16_mc11_c;
282  c->put_pixels_tab[0][ 6] = put_rv30_tpel16_mc21_c;
283  c->put_pixels_tab[0][ 8] = put_rv30_tpel16_mc02_c;
284  c->put_pixels_tab[0][ 9] = put_rv30_tpel16_mc12_c;
285  c->put_pixels_tab[0][10] = put_rv30_tpel16_mc22_c;
286  c->avg_pixels_tab[0][ 0] = qpel.avg_h264_qpel_pixels_tab[0][0];
287  c->avg_pixels_tab[0][ 1] = avg_rv30_tpel16_mc10_c;
288  c->avg_pixels_tab[0][ 2] = avg_rv30_tpel16_mc20_c;
289  c->avg_pixels_tab[0][ 4] = avg_rv30_tpel16_mc01_c;
290  c->avg_pixels_tab[0][ 5] = avg_rv30_tpel16_mc11_c;
291  c->avg_pixels_tab[0][ 6] = avg_rv30_tpel16_mc21_c;
292  c->avg_pixels_tab[0][ 8] = avg_rv30_tpel16_mc02_c;
293  c->avg_pixels_tab[0][ 9] = avg_rv30_tpel16_mc12_c;
294  c->avg_pixels_tab[0][10] = avg_rv30_tpel16_mc22_c;
295  c->put_pixels_tab[1][ 0] = qpel.put_h264_qpel_pixels_tab[1][0];
296  c->put_pixels_tab[1][ 1] = put_rv30_tpel8_mc10_c;
297  c->put_pixels_tab[1][ 2] = put_rv30_tpel8_mc20_c;
298  c->put_pixels_tab[1][ 4] = put_rv30_tpel8_mc01_c;
299  c->put_pixels_tab[1][ 5] = put_rv30_tpel8_mc11_c;
300  c->put_pixels_tab[1][ 6] = put_rv30_tpel8_mc21_c;
301  c->put_pixels_tab[1][ 8] = put_rv30_tpel8_mc02_c;
302  c->put_pixels_tab[1][ 9] = put_rv30_tpel8_mc12_c;
303  c->put_pixels_tab[1][10] = put_rv30_tpel8_mc22_c;
304  c->avg_pixels_tab[1][ 0] = qpel.avg_h264_qpel_pixels_tab[1][0];
305  c->avg_pixels_tab[1][ 1] = avg_rv30_tpel8_mc10_c;
306  c->avg_pixels_tab[1][ 2] = avg_rv30_tpel8_mc20_c;
307  c->avg_pixels_tab[1][ 4] = avg_rv30_tpel8_mc01_c;
308  c->avg_pixels_tab[1][ 5] = avg_rv30_tpel8_mc11_c;
309  c->avg_pixels_tab[1][ 6] = avg_rv30_tpel8_mc21_c;
310  c->avg_pixels_tab[1][ 8] = avg_rv30_tpel8_mc02_c;
311  c->avg_pixels_tab[1][ 9] = avg_rv30_tpel8_mc12_c;
312  c->avg_pixels_tab[1][10] = avg_rv30_tpel8_mc22_c;
313 
314  c->put_chroma_pixels_tab[0] = h264chroma.put_h264_chroma_pixels_tab[0];
315  c->put_chroma_pixels_tab[1] = h264chroma.put_h264_chroma_pixels_tab[1];
316  c->avg_chroma_pixels_tab[0] = h264chroma.avg_h264_chroma_pixels_tab[0];
317  c->avg_chroma_pixels_tab[1] = h264chroma.avg_h264_chroma_pixels_tab[1];
318 }
H264ChromaContext::avg_h264_chroma_pixels_tab
h264_chroma_mc_func avg_h264_chroma_pixels_tab[4]
Definition: h264chroma.h:29
H264QpelContext::avg_h264_qpel_pixels_tab
qpel_mc_func avg_h264_qpel_pixels_tab[4][16]
Definition: h264qpel.h:29
h264qpel.h
ff_rv30dsp_init
av_cold void ff_rv30dsp_init(RV34DSPContext *c)
Definition: rv30dsp.c:268
op_put
#define op_put(a, b)
Definition: rv30dsp.c:259
op_avg
#define op_avg(a, b)
Definition: rv30dsp.c:258
ff_h264qpel_init
av_cold void ff_h264qpel_init(H264QpelContext *c, int bit_depth)
Definition: h264qpel.c:49
av_cold
#define av_cold
Definition: attributes.h:90
ff_rv34dsp_init
av_cold void ff_rv34dsp_init(RV34DSPContext *c)
Definition: rv34dsp.c:131
mathops.h
qpeldsp.h
H264ChromaContext::put_h264_chroma_pixels_tab
h264_chroma_mc_func put_h264_chroma_pixels_tab[4]
Definition: h264chroma.h:28
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
RV34DSPContext
Definition: rv34dsp.h:57
RV30_MC
#define RV30_MC(OPNAME, SIZE)
Definition: rv30dsp.c:216
rv34dsp.h
h264chroma.h
attributes.h
H264QpelContext
Definition: h264qpel.h:27
H264QpelContext::put_h264_qpel_pixels_tab
qpel_mc_func put_h264_qpel_pixels_tab[4][16]
Definition: h264qpel.h:28
ff_h264chroma_init
av_cold void ff_h264chroma_init(H264ChromaContext *c, int bit_depth)
Definition: h264chroma.c:41
H264ChromaContext
Definition: h264chroma.h:27
RV30_LOWPASS
#define RV30_LOWPASS(OPNAME, OP)
Definition: rv30dsp.c:36