FFmpeg
h264qpel.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 
23 #include "libavutil/attributes.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/intreadwrite.h"
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/ppc/cpu.h"
29 
30 #include "libavcodec/h264qpel.h"
31 
32 #include "hpeldsp_altivec.h"
33 
34 #if HAVE_ALTIVEC
35 
36 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
37 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
38 
39 #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
40 #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
41 #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
42 #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
43 #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
44 #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
45 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
46 #include "h264qpel_template.c"
47 #undef OP_U8_ALTIVEC
48 #undef PREFIX_h264_qpel16_h_lowpass_altivec
49 #undef PREFIX_h264_qpel16_h_lowpass_num
50 #undef PREFIX_h264_qpel16_v_lowpass_altivec
51 #undef PREFIX_h264_qpel16_v_lowpass_num
52 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
53 #undef PREFIX_h264_qpel16_hv_lowpass_num
54 
55 #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
56 #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
57 #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
58 #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
59 #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
60 #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
61 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
62 #include "h264qpel_template.c"
63 #undef OP_U8_ALTIVEC
64 #undef PREFIX_h264_qpel16_h_lowpass_altivec
65 #undef PREFIX_h264_qpel16_h_lowpass_num
66 #undef PREFIX_h264_qpel16_v_lowpass_altivec
67 #undef PREFIX_h264_qpel16_v_lowpass_num
68 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
69 #undef PREFIX_h264_qpel16_hv_lowpass_num
70 
71 #define H264_MC(OPNAME, SIZE, CODETYPE) \
72 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
73 {\
74  ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
75 }\
76 \
77 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
78 { \
79  DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
80  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
81  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
82 }\
83 \
84 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
85 {\
86  OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
87 }\
88 \
89 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
90 {\
91  DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
92  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
93  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
94 }\
95 \
96 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
97 {\
98  DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
99  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
100  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
101 }\
102 \
103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
104 {\
105  OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
106 }\
107 \
108 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
109 {\
110  DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
111  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
112  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
113 }\
114 \
115 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
116 {\
117  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
118  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
119  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
120  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
121  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
122 }\
123 \
124 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
125 {\
126  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
127  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
128  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
129  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
130  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
131 }\
132 \
133 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
134 {\
135  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
136  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
137  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
138  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
139  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
140 }\
141 \
142 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
143 {\
144  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
145  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
146  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
147  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
148  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
149 }\
150 \
151 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
152 {\
153  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
154  OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
155 }\
156 \
157 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
158 {\
159  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
160  DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
161  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
162  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
163  put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
164  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
165 }\
166 \
167 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
168 {\
169  DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
170  DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
171  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
172  put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
173  put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
174  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
175 }\
176 \
177 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
178 {\
179  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
180  DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
181  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
182  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
183  put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
184  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
185 }\
186 \
187 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
188 {\
189  DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
190  DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
191  DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
192  put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
193  put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
194  OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
195 }\
196 
197 #if HAVE_BIGENDIAN
198 #define put_unligned_store(s, dest) { \
199  tmp1 = vec_ld(0, dest); \
200  mask = vec_lvsl(0, dest); \
201  tmp2 = vec_ld(15, dest); \
202  edges = vec_perm(tmp2, tmp1, mask); \
203  align = vec_lvsr(0, dest); \
204  tmp2 = vec_perm(s, edges, align); \
205  tmp1 = vec_perm(edges, s, align); \
206  vec_st(tmp2, 15, dest); \
207  vec_st(tmp1, 0 , dest); \
208  }
209 #else
210 #define put_unligned_store(s, dest) vec_vsx_st(s, 0, dest);
211 #endif /* HAVE_BIGENDIAN */
212 
213 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
214  const uint8_t * src2, int dst_stride,
215  int src_stride1, int h)
216 {
217  int i;
218  vec_u8 a, b, d, mask_;
219 #if HAVE_BIGENDIAN
220  vec_u8 tmp1, tmp2, mask, edges, align;
221  mask_ = vec_lvsl(0, src2);
222 #endif
223 
224  for (i = 0; i < h; i++) {
225  a = unaligned_load(i * src_stride1, src1);
226  b = load_with_perm_vec(i * 16, src2, mask_);
227  d = vec_avg(a, b);
228  put_unligned_store(d, dst);
229  dst += dst_stride;
230  }
231 }
232 
233 #if HAVE_BIGENDIAN
234 #define avg_unligned_store(s, dest){ \
235  tmp1 = vec_ld(0, dest); \
236  mask = vec_lvsl(0, dest); \
237  tmp2 = vec_ld(15, dest); \
238  a = vec_avg(vec_perm(tmp1, tmp2, mask), s); \
239  edges = vec_perm(tmp2, tmp1, mask); \
240  align = vec_lvsr(0, dest); \
241  tmp2 = vec_perm(a, edges, align); \
242  tmp1 = vec_perm(edges, a, align); \
243  vec_st(tmp2, 15, dest); \
244  vec_st(tmp1, 0 , dest); \
245  }
246 #else
247 #define avg_unligned_store(s, dest){ \
248  a = vec_avg(vec_vsx_ld(0, dst), s); \
249  vec_vsx_st(a, 0, dst); \
250  }
251 #endif /* HAVE_BIGENDIAN */
252 
253 static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
254  const uint8_t * src2, int dst_stride,
255  int src_stride1, int h)
256 {
257  int i;
258  vec_u8 a, b, d, mask_;
259 
260 #if HAVE_BIGENDIAN
261  vec_u8 tmp1, tmp2, mask, edges, align;
262  mask_ = vec_lvsl(0, src2);
263 #endif
264 
265  for (i = 0; i < h; i++) {
266  a = unaligned_load(i * src_stride1, src1);
267  b = load_with_perm_vec(i * 16, src2, mask_);
268  d = vec_avg(a, b);
269  avg_unligned_store(d, dst);
270  dst += dst_stride;
271  }
272 }
273 
274 /* Implemented but could be faster
275 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
276 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
277  */
278 
279 H264_MC(put_, 16, altivec)
280 H264_MC(avg_, 16, altivec)
281 #endif /* HAVE_ALTIVEC */
282 
284 {
285 #if HAVE_ALTIVEC
286  const int high_bit_depth = bit_depth > 8;
287 
289  return;
290 
291  if (!high_bit_depth) {
292 #define dspfunc(PFX, IDX, NUM) \
293  c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
294  c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
295  c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
296  c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
297  c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
298  c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
299  c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
300  c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
301  c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
302  c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
303  c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
304  c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
305  c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
306  c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
307  c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
308  c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
309 
310  dspfunc(put_h264_qpel, 0, 16);
311  dspfunc(avg_h264_qpel, 0, 16);
312 #undef dspfunc
313  }
314 #endif /* HAVE_ALTIVEC */
315 }
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:421
b
#define b
Definition: input.c:41
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:245
h264qpel.h
av_cold
#define av_cold
Definition: attributes.h:90
mask
static const uint16_t mask[17]
Definition: lzw.c:38
ff_h264qpel_init_ppc
av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
Definition: h264qpel.c:283
intreadwrite.h
H264_MC
#define H264_MC(OPNAME, SIZE)
Definition: h264qpel_template.c:380
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
PPC_ALTIVEC
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
cpu.h
align
static const uint8_t *BS_FUNC() align(BSCTX *bc)
Skip bits to a byte boundary.
Definition: bitstream_template.h:411
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
vec_u8
#define vec_u8
Definition: util_altivec.h:34
attributes.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
dspfunc
#define dspfunc(PFX, IDX, NUM)
src2
const pixel * src2
Definition: h264pred_template.c:422
H264QpelContext
Definition: h264qpel.h:27
hpeldsp_altivec.h
util_altivec.h
d
d
Definition: ffmpeg_filter.c:368
cpu.h
h
h
Definition: vp9dsp_template.c:2038