FFmpeg
h264dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 
23 #include <stdint.h>
24 #include <string.h>
25 
26 #include "libavutil/attributes.h"
27 #include "libavutil/cpu.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/mem_internal.h"
30 #include "libavutil/ppc/cpu.h"
32 
33 #include "libavcodec/h264dec.h"
34 #include "libavcodec/h264dsp.h"
35 
36 #if HAVE_ALTIVEC
37 
38 /****************************************************************************
39  * IDCT transform:
40  ****************************************************************************/
41 
42 #define VEC_1D_DCT(vb0,vb1,vb2,vb3,va0,va1,va2,va3) \
43  /* 1st stage */ \
44  vz0 = vec_add(vb0,vb2); /* temp[0] = Y[0] + Y[2] */ \
45  vz1 = vec_sub(vb0,vb2); /* temp[1] = Y[0] - Y[2] */ \
46  vz2 = vec_sra(vb1,vec_splat_u16(1)); \
47  vz2 = vec_sub(vz2,vb3); /* temp[2] = Y[1].1/2 - Y[3] */ \
48  vz3 = vec_sra(vb3,vec_splat_u16(1)); \
49  vz3 = vec_add(vb1,vz3); /* temp[3] = Y[1] + Y[3].1/2 */ \
50  /* 2nd stage: output */ \
51  va0 = vec_add(vz0,vz3); /* x[0] = temp[0] + temp[3] */ \
52  va1 = vec_add(vz1,vz2); /* x[1] = temp[1] + temp[2] */ \
53  va2 = vec_sub(vz1,vz2); /* x[2] = temp[1] - temp[2] */ \
54  va3 = vec_sub(vz0,vz3) /* x[3] = temp[0] - temp[3] */
55 
56 #define VEC_TRANSPOSE_4(a0,a1,a2,a3,b0,b1,b2,b3) \
57  b0 = vec_mergeh( a0, a0 ); \
58  b1 = vec_mergeh( a1, a0 ); \
59  b2 = vec_mergeh( a2, a0 ); \
60  b3 = vec_mergeh( a3, a0 ); \
61  a0 = vec_mergeh( b0, b2 ); \
62  a1 = vec_mergel( b0, b2 ); \
63  a2 = vec_mergeh( b1, b3 ); \
64  a3 = vec_mergel( b1, b3 ); \
65  b0 = vec_mergeh( a0, a2 ); \
66  b1 = vec_mergel( a0, a2 ); \
67  b2 = vec_mergeh( a1, a3 ); \
68  b3 = vec_mergel( a1, a3 )
69 
70 #if HAVE_BIGENDIAN
71 #define vdst_load(d) \
72  vdst_orig = vec_ld(0, dst); \
73  vdst = vec_perm(vdst_orig, zero_u8v, vdst_mask);
74 #else
75 #define vdst_load(d) vdst = vec_vsx_ld(0, dst)
76 #endif
77 
78 #define VEC_LOAD_U8_ADD_S16_STORE_U8(va) \
79  vdst_load(); \
80  vdst_ss = (vec_s16) VEC_MERGEH(zero_u8v, vdst); \
81  va = vec_add(va, vdst_ss); \
82  va_u8 = vec_packsu(va, zero_s16v); \
83  va_u32 = vec_splat((vec_u32)va_u8, 0); \
84  vec_ste(va_u32, element, (uint32_t*)dst);
85 
86 static void h264_idct_add_altivec(uint8_t *dst, int16_t *block, int stride)
87 {
88  vec_s16 va0, va1, va2, va3;
89  vec_s16 vz0, vz1, vz2, vz3;
90  vec_s16 vtmp0, vtmp1, vtmp2, vtmp3;
91  vec_u8 va_u8;
92  vec_u32 va_u32;
93  vec_s16 vdst_ss;
94  const vec_u16 v6us = vec_splat_u16(6);
95  vec_u8 vdst, vdst_orig;
96  vec_u8 vdst_mask = vec_lvsl(0, dst);
97  int element = ((unsigned long)dst & 0xf) >> 2;
98  LOAD_ZERO;
99 
100  block[0] += 32; /* add 32 as a DC-level for rounding */
101 
102  vtmp0 = vec_ld(0,block);
103  vtmp1 = vec_sld(vtmp0, vtmp0, 8);
104  vtmp2 = vec_ld(16,block);
105  vtmp3 = vec_sld(vtmp2, vtmp2, 8);
106  memset(block, 0, 16 * sizeof(int16_t));
107 
108  VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
109  VEC_TRANSPOSE_4(va0,va1,va2,va3,vtmp0,vtmp1,vtmp2,vtmp3);
110  VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
111 
112  va0 = vec_sra(va0,v6us);
113  va1 = vec_sra(va1,v6us);
114  va2 = vec_sra(va2,v6us);
115  va3 = vec_sra(va3,v6us);
116 
117  VEC_LOAD_U8_ADD_S16_STORE_U8(va0);
118  dst += stride;
119  VEC_LOAD_U8_ADD_S16_STORE_U8(va1);
120  dst += stride;
121  VEC_LOAD_U8_ADD_S16_STORE_U8(va2);
122  dst += stride;
123  VEC_LOAD_U8_ADD_S16_STORE_U8(va3);
124 }
125 
126 #define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7) {\
127  /* a0 = SRC(0) + SRC(4); */ \
128  vec_s16 a0v = vec_add(s0, s4); \
129  /* a2 = SRC(0) - SRC(4); */ \
130  vec_s16 a2v = vec_sub(s0, s4); \
131  /* a4 = (SRC(2)>>1) - SRC(6); */ \
132  vec_s16 a4v = vec_sub(vec_sra(s2, onev), s6); \
133  /* a6 = (SRC(6)>>1) + SRC(2); */ \
134  vec_s16 a6v = vec_add(vec_sra(s6, onev), s2); \
135  /* b0 = a0 + a6; */ \
136  vec_s16 b0v = vec_add(a0v, a6v); \
137  /* b2 = a2 + a4; */ \
138  vec_s16 b2v = vec_add(a2v, a4v); \
139  /* b4 = a2 - a4; */ \
140  vec_s16 b4v = vec_sub(a2v, a4v); \
141  /* b6 = a0 - a6; */ \
142  vec_s16 b6v = vec_sub(a0v, a6v); \
143  /* a1 = SRC(5) - SRC(3) - SRC(7) - (SRC(7)>>1); */ \
144  /* a1 = (SRC(5)-SRC(3)) - (SRC(7) + (SRC(7)>>1)); */ \
145  vec_s16 a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \
146  /* a3 = SRC(7) + SRC(1) - SRC(3) - (SRC(3)>>1); */ \
147  /* a3 = (SRC(7)+SRC(1)) - (SRC(3) + (SRC(3)>>1)); */ \
148  vec_s16 a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\
149  /* a5 = SRC(7) - SRC(1) + SRC(5) + (SRC(5)>>1); */ \
150  /* a5 = (SRC(7)-SRC(1)) + SRC(5) + (SRC(5)>>1); */ \
151  vec_s16 a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\
152  /* a7 = SRC(5)+SRC(3) + SRC(1) + (SRC(1)>>1); */ \
153  vec_s16 a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\
154  /* b1 = (a7>>2) + a1; */ \
155  vec_s16 b1v = vec_add( vec_sra(a7v, twov), a1v); \
156  /* b3 = a3 + (a5>>2); */ \
157  vec_s16 b3v = vec_add(a3v, vec_sra(a5v, twov)); \
158  /* b5 = (a3>>2) - a5; */ \
159  vec_s16 b5v = vec_sub( vec_sra(a3v, twov), a5v); \
160  /* b7 = a7 - (a1>>2); */ \
161  vec_s16 b7v = vec_sub( a7v, vec_sra(a1v, twov)); \
162  /* DST(0, b0 + b7); */ \
163  d0 = vec_add(b0v, b7v); \
164  /* DST(1, b2 + b5); */ \
165  d1 = vec_add(b2v, b5v); \
166  /* DST(2, b4 + b3); */ \
167  d2 = vec_add(b4v, b3v); \
168  /* DST(3, b6 + b1); */ \
169  d3 = vec_add(b6v, b1v); \
170  /* DST(4, b6 - b1); */ \
171  d4 = vec_sub(b6v, b1v); \
172  /* DST(5, b4 - b3); */ \
173  d5 = vec_sub(b4v, b3v); \
174  /* DST(6, b2 - b5); */ \
175  d6 = vec_sub(b2v, b5v); \
176  /* DST(7, b0 - b7); */ \
177  d7 = vec_sub(b0v, b7v); \
178 }
179 
180 #if HAVE_BIGENDIAN
181 #define GET_2PERM(ldv, stv, d) \
182  ldv = vec_lvsl(0, d); \
183  stv = vec_lvsr(8, d);
184 #define dstv_load(d) \
185  vec_u8 hv = vec_ld( 0, d ); \
186  vec_u8 lv = vec_ld( 7, d); \
187  vec_u8 dstv = vec_perm( hv, lv, (vec_u8)perm_ldv );
188 #define dest_unligned_store(d) \
189  vec_u8 edgehv; \
190  vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv ); \
191  vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \
192  lv = vec_sel( lv, bodyv, edgelv ); \
193  vec_st( lv, 7, d ); \
194  hv = vec_ld( 0, d ); \
195  edgehv = vec_perm( zero_u8v, sel, perm_stv ); \
196  hv = vec_sel( hv, bodyv, edgehv ); \
197  vec_st( hv, 0, d );
198 #else
199 
200 #define GET_2PERM(ldv, stv, d) {}
201 #define dstv_load(d) vec_u8 dstv = vec_vsx_ld(0, d)
202 #define dest_unligned_store(d)\
203  vec_u8 dst8 = vec_perm((vec_u8)idstsum8, dstv, vcprm(2,3,s2,s3));\
204  vec_vsx_st(dst8, 0, d)
205 #endif /* HAVE_BIGENDIAN */
206 
207 #define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \
208  /* unaligned load */ \
209  dstv_load(dest); \
210  vec_s16 idct_sh6 = vec_sra(idctv, sixv); \
211  vec_u16 dst16 = (vec_u16)VEC_MERGEH(zero_u8v, dstv); \
212  vec_s16 idstsum = vec_adds(idct_sh6, (vec_s16)dst16); \
213  vec_u8 idstsum8 = vec_packsu(zero_s16v, idstsum); \
214  /* unaligned store */ \
215  dest_unligned_store(dest);\
216 }
217 
218 static void h264_idct8_add_altivec(uint8_t *dst, int16_t *dct, int stride)
219 {
220  vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;
221  vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;
222  vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
223 
224  vec_u8 perm_ldv, perm_stv;
225  GET_2PERM(perm_ldv, perm_stv, dst);
226 
227  const vec_u16 onev = vec_splat_u16(1);
228  const vec_u16 twov = vec_splat_u16(2);
229  const vec_u16 sixv = vec_splat_u16(6);
230 
231  const vec_u8 sel = (vec_u8) {0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1};
232  LOAD_ZERO;
233 
234  dct[0] += 32; // rounding for the >>6 at the end
235 
236  s0 = vec_ld(0x00, (int16_t*)dct);
237  s1 = vec_ld(0x10, (int16_t*)dct);
238  s2 = vec_ld(0x20, (int16_t*)dct);
239  s3 = vec_ld(0x30, (int16_t*)dct);
240  s4 = vec_ld(0x40, (int16_t*)dct);
241  s5 = vec_ld(0x50, (int16_t*)dct);
242  s6 = vec_ld(0x60, (int16_t*)dct);
243  s7 = vec_ld(0x70, (int16_t*)dct);
244  memset(dct, 0, 64 * sizeof(int16_t));
245 
246  IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,
247  d0, d1, d2, d3, d4, d5, d6, d7);
248 
249  TRANSPOSE8( d0, d1, d2, d3, d4, d5, d6, d7 );
250 
251  IDCT8_1D_ALTIVEC(d0, d1, d2, d3, d4, d5, d6, d7,
252  idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);
253 
254  ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel);
255  ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel);
256  ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel);
257  ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel);
258  ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel);
259  ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);
260  ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);
261  ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
262 }
263 
264 #if HAVE_BIGENDIAN
265 #define DST_LD vec_ld
266 #else
267 #define DST_LD vec_vsx_ld
268 #endif
269 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size)
270 {
271  vec_s16 dc16;
272  vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
273  vec_s32 v_dc32;
274  LOAD_ZERO;
275  DECLARE_ALIGNED(16, int, dc);
276  int i;
277 
278  dc = (block[0] + 32) >> 6;
279  block[0] = 0;
280  v_dc32 = vec_lde(0, &dc);
281  dc16 = VEC_SPLAT16((vec_s16)v_dc32, 1);
282 
283  if (size == 4)
284  dc16 = VEC_SLD16(dc16, zero_s16v, 8);
285  dcplus = vec_packsu(dc16, zero_s16v);
286  dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);
287 
288 #if HAVE_BIGENDIAN
289  aligner = vec_lvsr(0, dst);
290  dcplus = vec_perm(dcplus, dcplus, aligner);
291  dcminus = vec_perm(dcminus, dcminus, aligner);
292 #endif
293 
294  for (i = 0; i < size; i += 4) {
295  v0 = DST_LD(0, dst+0*stride);
296  v1 = DST_LD(0, dst+1*stride);
297  v2 = DST_LD(0, dst+2*stride);
298  v3 = DST_LD(0, dst+3*stride);
299 
300  v0 = vec_adds(v0, dcplus);
301  v1 = vec_adds(v1, dcplus);
302  v2 = vec_adds(v2, dcplus);
303  v3 = vec_adds(v3, dcplus);
304 
305  v0 = vec_subs(v0, dcminus);
306  v1 = vec_subs(v1, dcminus);
307  v2 = vec_subs(v2, dcminus);
308  v3 = vec_subs(v3, dcminus);
309 
310  VEC_ST(v0, 0, dst+0*stride);
311  VEC_ST(v1, 0, dst+1*stride);
312  VEC_ST(v2, 0, dst+2*stride);
313  VEC_ST(v3, 0, dst+3*stride);
314 
315  dst += 4*stride;
316  }
317 }
318 
319 static void h264_idct_dc_add_altivec(uint8_t *dst, int16_t *block, int stride)
320 {
321  h264_idct_dc_add_internal(dst, block, stride, 4);
322 }
323 
324 static void h264_idct8_dc_add_altivec(uint8_t *dst, int16_t *block, int stride)
325 {
326  h264_idct_dc_add_internal(dst, block, stride, 8);
327 }
328 
329 static void h264_idct_add16_altivec(uint8_t *dst, const int *block_offset,
330  int16_t *block, int stride,
331  const uint8_t nnzc[5 * 8])
332 {
333  int i;
334  for(i=0; i<16; i++){
335  int nnz = nnzc[ scan8[i] ];
336  if(nnz){
337  if(nnz==1 && block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
338  else h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
339  }
340  }
341 }
342 
343 static void h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset,
344  int16_t *block, int stride,
345  const uint8_t nnzc[5 * 8])
346 {
347  int i;
348  for(i=0; i<16; i++){
349  if(nnzc[ scan8[i] ]) h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
350  else if(block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
351  }
352 }
353 
354 static void h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset,
355  int16_t *block, int stride,
356  const uint8_t nnzc[5 * 8])
357 {
358  int i;
359  for(i=0; i<16; i+=4){
360  int nnz = nnzc[ scan8[i] ];
361  if(nnz){
362  if(nnz==1 && block[i*16]) h264_idct8_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
363  else h264_idct8_add_altivec(dst + block_offset[i], block + i*16, stride);
364  }
365  }
366 }
367 
368 static void h264_idct_add8_altivec(uint8_t **dest, const int *block_offset,
369  int16_t *block, int stride,
370  const uint8_t nnzc[15 * 8])
371 {
372  int i, j;
373  for (j = 1; j < 3; j++) {
374  for(i = j * 16; i < j * 16 + 4; i++){
375  if(nnzc[ scan8[i] ])
376  h264_idct_add_altivec(dest[j-1] + block_offset[i], block + i*16, stride);
377  else if(block[i*16])
378  h264_idct_dc_add_altivec(dest[j-1] + block_offset[i], block + i*16, stride);
379  }
380  }
381 }
382 
383 #define transpose4x16(r0, r1, r2, r3) { \
384  register vec_u8 r4; \
385  register vec_u8 r5; \
386  register vec_u8 r6; \
387  register vec_u8 r7; \
388  \
389  r4 = vec_mergeh(r0, r2); /*0, 2 set 0*/ \
390  r5 = vec_mergel(r0, r2); /*0, 2 set 1*/ \
391  r6 = vec_mergeh(r1, r3); /*1, 3 set 0*/ \
392  r7 = vec_mergel(r1, r3); /*1, 3 set 1*/ \
393  \
394  r0 = vec_mergeh(r4, r6); /*all set 0*/ \
395  r1 = vec_mergel(r4, r6); /*all set 1*/ \
396  r2 = vec_mergeh(r5, r7); /*all set 2*/ \
397  r3 = vec_mergel(r5, r7); /*all set 3*/ \
398 }
399 
400 static inline void write16x4(uint8_t *dst, int dst_stride,
401  register vec_u8 r0, register vec_u8 r1,
402  register vec_u8 r2, register vec_u8 r3) {
403  DECLARE_ALIGNED(16, unsigned char, result)[64];
404  uint32_t *src_int = (uint32_t *)result;
405 
406  vec_st(r0, 0, result);
407  vec_st(r1, 16, result);
408  vec_st(r2, 32, result);
409  vec_st(r3, 48, result);
410  /* FIXME: there has to be a better way!!!! */
411  AV_WN32(dst, AV_RN32A(src_int + 0));
412  AV_WN32(dst + dst_stride, AV_RN32A(src_int + 1));
413  AV_WN32(dst + 2 * dst_stride, AV_RN32A(src_int + 2));
414  AV_WN32(dst + 3 * dst_stride, AV_RN32A(src_int + 3));
415  AV_WN32(dst + 4 * dst_stride, AV_RN32A(src_int + 4));
416  AV_WN32(dst + 5 * dst_stride, AV_RN32A(src_int + 5));
417  AV_WN32(dst + 6 * dst_stride, AV_RN32A(src_int + 6));
418  AV_WN32(dst + 7 * dst_stride, AV_RN32A(src_int + 7));
419  AV_WN32(dst + 8 * dst_stride, AV_RN32A(src_int + 8));
420  AV_WN32(dst + 9 * dst_stride, AV_RN32A(src_int + 9));
421  AV_WN32(dst + 10 * dst_stride, AV_RN32A(src_int + 10));
422  AV_WN32(dst + 11 * dst_stride, AV_RN32A(src_int + 11));
423  AV_WN32(dst + 12 * dst_stride, AV_RN32A(src_int + 12));
424  AV_WN32(dst + 13 * dst_stride, AV_RN32A(src_int + 13));
425  AV_WN32(dst + 14 * dst_stride, AV_RN32A(src_int + 14));
426  AV_WN32(dst + 15 * dst_stride, AV_RN32A(src_int + 15));
427 }
428 
429 /** @brief performs a 6x16 transpose of data in src, and stores it to dst
430  @todo FIXME: see if we can't spare some vec_lvsl() by them factorizing
431  out of unaligned_load() */
432 #define readAndTranspose16x6(src, src_stride, r8, r9, r10, r11, r12, r13) {\
433  register vec_u8 r0 = unaligned_load(0, src); \
434  register vec_u8 r1 = unaligned_load( src_stride, src); \
435  register vec_u8 r2 = unaligned_load(2* src_stride, src); \
436  register vec_u8 r3 = unaligned_load(3* src_stride, src); \
437  register vec_u8 r4 = unaligned_load(4* src_stride, src); \
438  register vec_u8 r5 = unaligned_load(5* src_stride, src); \
439  register vec_u8 r6 = unaligned_load(6* src_stride, src); \
440  register vec_u8 r7 = unaligned_load(7* src_stride, src); \
441  register vec_u8 r14 = unaligned_load(14*src_stride, src); \
442  register vec_u8 r15 = unaligned_load(15*src_stride, src); \
443  \
444  r8 = unaligned_load( 8*src_stride, src); \
445  r9 = unaligned_load( 9*src_stride, src); \
446  r10 = unaligned_load(10*src_stride, src); \
447  r11 = unaligned_load(11*src_stride, src); \
448  r12 = unaligned_load(12*src_stride, src); \
449  r13 = unaligned_load(13*src_stride, src); \
450  \
451  /*Merge first pairs*/ \
452  r0 = vec_mergeh(r0, r8); /*0, 8*/ \
453  r1 = vec_mergeh(r1, r9); /*1, 9*/ \
454  r2 = vec_mergeh(r2, r10); /*2,10*/ \
455  r3 = vec_mergeh(r3, r11); /*3,11*/ \
456  r4 = vec_mergeh(r4, r12); /*4,12*/ \
457  r5 = vec_mergeh(r5, r13); /*5,13*/ \
458  r6 = vec_mergeh(r6, r14); /*6,14*/ \
459  r7 = vec_mergeh(r7, r15); /*7,15*/ \
460  \
461  /*Merge second pairs*/ \
462  r8 = vec_mergeh(r0, r4); /*0,4, 8,12 set 0*/ \
463  r9 = vec_mergel(r0, r4); /*0,4, 8,12 set 1*/ \
464  r10 = vec_mergeh(r1, r5); /*1,5, 9,13 set 0*/ \
465  r11 = vec_mergel(r1, r5); /*1,5, 9,13 set 1*/ \
466  r12 = vec_mergeh(r2, r6); /*2,6,10,14 set 0*/ \
467  r13 = vec_mergel(r2, r6); /*2,6,10,14 set 1*/ \
468  r14 = vec_mergeh(r3, r7); /*3,7,11,15 set 0*/ \
469  r15 = vec_mergel(r3, r7); /*3,7,11,15 set 1*/ \
470  \
471  /*Third merge*/ \
472  r0 = vec_mergeh(r8, r12); /*0,2,4,6,8,10,12,14 set 0*/ \
473  r1 = vec_mergel(r8, r12); /*0,2,4,6,8,10,12,14 set 1*/ \
474  r2 = vec_mergeh(r9, r13); /*0,2,4,6,8,10,12,14 set 2*/ \
475  r4 = vec_mergeh(r10, r14); /*1,3,5,7,9,11,13,15 set 0*/ \
476  r5 = vec_mergel(r10, r14); /*1,3,5,7,9,11,13,15 set 1*/ \
477  r6 = vec_mergeh(r11, r15); /*1,3,5,7,9,11,13,15 set 2*/ \
478  /* Don't need to compute 3 and 7*/ \
479  \
480  /*Final merge*/ \
481  r8 = vec_mergeh(r0, r4); /*all set 0*/ \
482  r9 = vec_mergel(r0, r4); /*all set 1*/ \
483  r10 = vec_mergeh(r1, r5); /*all set 2*/ \
484  r11 = vec_mergel(r1, r5); /*all set 3*/ \
485  r12 = vec_mergeh(r2, r6); /*all set 4*/ \
486  r13 = vec_mergel(r2, r6); /*all set 5*/ \
487  /* Don't need to compute 14 and 15*/ \
488  \
489 }
490 
491 // out: o = |x-y| < a
492 static inline vec_u8 diff_lt_altivec ( register vec_u8 x,
493  register vec_u8 y,
494  register vec_u8 a) {
495 
496  register vec_u8 diff = vec_subs(x, y);
497  register vec_u8 diffneg = vec_subs(y, x);
498  register vec_u8 o = vec_or(diff, diffneg); /* |x-y| */
499  o = (vec_u8)vec_cmplt(o, a);
500  return o;
501 }
502 
503 static inline vec_u8 h264_deblock_mask ( register vec_u8 p0,
504  register vec_u8 p1,
505  register vec_u8 q0,
506  register vec_u8 q1,
507  register vec_u8 alpha,
508  register vec_u8 beta) {
509 
510  register vec_u8 mask;
511  register vec_u8 tempmask;
512 
513  mask = diff_lt_altivec(p0, q0, alpha);
514  tempmask = diff_lt_altivec(p1, p0, beta);
515  mask = vec_and(mask, tempmask);
516  tempmask = diff_lt_altivec(q1, q0, beta);
517  mask = vec_and(mask, tempmask);
518 
519  return mask;
520 }
521 
522 // out: newp1 = clip((p2 + ((p0 + q0 + 1) >> 1)) >> 1, p1-tc0, p1+tc0)
523 static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
524  register vec_u8 p1,
525  register vec_u8 p2,
526  register vec_u8 q0,
527  register vec_u8 tc0) {
528 
529  register vec_u8 average = vec_avg(p0, q0);
530  register vec_u8 temp;
531  register vec_u8 unclipped;
532  register vec_u8 ones;
533  register vec_u8 max;
534  register vec_u8 min;
535  register vec_u8 newp1;
536 
537  temp = vec_xor(average, p2);
538  average = vec_avg(average, p2); /*avg(p2, avg(p0, q0)) */
539  ones = vec_splat_u8(1);
540  temp = vec_and(temp, ones); /*(p2^avg(p0, q0)) & 1 */
541  unclipped = vec_subs(average, temp); /*(p2+((p0+q0+1)>>1))>>1 */
542  max = vec_adds(p1, tc0);
543  min = vec_subs(p1, tc0);
544  newp1 = vec_max(min, unclipped);
545  newp1 = vec_min(max, newp1);
546  return newp1;
547 }
548 
549 #define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \
550  \
551  const vec_u8 A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \
552  \
553  register vec_u8 pq0bit = vec_xor(p0,q0); \
554  register vec_u8 q1minus; \
555  register vec_u8 p0minus; \
556  register vec_u8 stage1; \
557  register vec_u8 stage2; \
558  register vec_u8 vec160; \
559  register vec_u8 delta; \
560  register vec_u8 deltaneg; \
561  \
562  q1minus = vec_nor(q1, q1); /* 255 - q1 */ \
563  stage1 = vec_avg(p1, q1minus); /* (p1 - q1 + 256)>>1 */ \
564  stage2 = vec_sr(stage1, vec_splat_u8(1)); /* (p1 - q1 + 256)>>2 = 64 + (p1 - q1) >> 2 */ \
565  p0minus = vec_nor(p0, p0); /* 255 - p0 */ \
566  stage1 = vec_avg(q0, p0minus); /* (q0 - p0 + 256)>>1 */ \
567  pq0bit = vec_and(pq0bit, vec_splat_u8(1)); \
568  stage2 = vec_avg(stage2, pq0bit); /* 32 + ((q0 - p0)&1 + (p1 - q1) >> 2 + 1) >> 1 */ \
569  stage2 = vec_adds(stage2, stage1); /* 160 + ((p0 - q0) + (p1 - q1) >> 2 + 1) >> 1 */ \
570  vec160 = vec_ld(0, &A0v); \
571  deltaneg = vec_subs(vec160, stage2); /* -d */ \
572  delta = vec_subs(stage2, vec160); /* d */ \
573  deltaneg = vec_min(tc0masked, deltaneg); \
574  delta = vec_min(tc0masked, delta); \
575  p0 = vec_subs(p0, deltaneg); \
576  q0 = vec_subs(q0, delta); \
577  p0 = vec_adds(p0, delta); \
578  q0 = vec_adds(q0, deltaneg); \
579 }
580 
581 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \
582  DECLARE_ALIGNED(16, unsigned char, temp)[16]; \
583  register vec_u8 alphavec; \
584  register vec_u8 betavec; \
585  register vec_u8 mask; \
586  register vec_u8 p1mask; \
587  register vec_u8 q1mask; \
588  register vector signed char tc0vec; \
589  register vec_u8 finaltc0; \
590  register vec_u8 tc0masked; \
591  register vec_u8 newp1; \
592  register vec_u8 newq1; \
593  \
594  temp[0] = alpha; \
595  temp[1] = beta; \
596  alphavec = vec_ld(0, temp); \
597  betavec = vec_splat(alphavec, 0x1); \
598  alphavec = vec_splat(alphavec, 0x0); \
599  mask = h264_deblock_mask(p0, p1, q0, q1, alphavec, betavec); /*if in block */ \
600  \
601  AV_COPY32(temp, tc0); \
602  tc0vec = vec_ld(0, (signed char*)temp); \
603  tc0vec = vec_mergeh(tc0vec, tc0vec); \
604  tc0vec = vec_mergeh(tc0vec, tc0vec); \
605  mask = vec_and(mask, vec_cmpgt(tc0vec, vec_splat_s8(-1))); /* if tc0[i] >= 0 */ \
606  finaltc0 = vec_and((vec_u8)tc0vec, mask); /* tc = tc0 */ \
607  \
608  p1mask = diff_lt_altivec(p2, p0, betavec); \
609  p1mask = vec_and(p1mask, mask); /* if ( |p2 - p0| < beta) */ \
610  tc0masked = vec_and(p1mask, (vec_u8)tc0vec); \
611  finaltc0 = vec_sub(finaltc0, p1mask); /* tc++ */ \
612  newp1 = h264_deblock_q1(p0, p1, p2, q0, tc0masked); \
613  /*end if*/ \
614  \
615  q1mask = diff_lt_altivec(q2, q0, betavec); \
616  q1mask = vec_and(q1mask, mask); /* if ( |q2 - q0| < beta ) */\
617  tc0masked = vec_and(q1mask, (vec_u8)tc0vec); \
618  finaltc0 = vec_sub(finaltc0, q1mask); /* tc++ */ \
619  newq1 = h264_deblock_q1(p0, q1, q2, q0, tc0masked); \
620  /*end if*/ \
621  \
622  h264_deblock_p0_q0(p0, p1, q0, q1, finaltc0); \
623  p1 = newp1; \
624  q1 = newq1; \
625 }
626 
627 static void h264_v_loop_filter_luma_altivec(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) {
628 
629  if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) >= 0) {
630  register vec_u8 p2 = vec_ld(-3*stride, pix);
631  register vec_u8 p1 = vec_ld(-2*stride, pix);
632  register vec_u8 p0 = vec_ld(-1*stride, pix);
633  register vec_u8 q0 = vec_ld(0, pix);
634  register vec_u8 q1 = vec_ld(stride, pix);
635  register vec_u8 q2 = vec_ld(2*stride, pix);
636  h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0);
637  vec_st(p1, -2*stride, pix);
638  vec_st(p0, -1*stride, pix);
639  vec_st(q0, 0, pix);
640  vec_st(q1, stride, pix);
641  }
642 }
643 
644 static void h264_h_loop_filter_luma_altivec(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) {
645 
646  register vec_u8 line0, line1, line2, line3, line4, line5;
647  if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) < 0)
648  return;
649  readAndTranspose16x6(pix-3, stride, line0, line1, line2, line3, line4, line5);
650  h264_loop_filter_luma_altivec(line0, line1, line2, line3, line4, line5, alpha, beta, tc0);
651  transpose4x16(line1, line2, line3, line4);
652  write16x4(pix-2, stride, line1, line2, line3, line4);
653 }
654 
655 static av_always_inline
656 void weight_h264_W_altivec(uint8_t *block, int stride, int height,
657  int log2_denom, int weight, int offset, int w)
658 {
659  int y, aligned;
660  vec_u8 vblock;
661  vec_s16 vtemp, vweight, voffset, v0, v1;
662  vec_u16 vlog2_denom;
663  DECLARE_ALIGNED(16, int32_t, temp)[4];
664  LOAD_ZERO;
665 
666  offset *= 1 << log2_denom;
667  if(log2_denom) offset += 1<<(log2_denom-1);
668  temp[0] = log2_denom;
669  temp[1] = weight;
670  temp[2] = offset;
671 
672  vtemp = (vec_s16)vec_ld(0, temp);
673 #if !HAVE_BIGENDIAN
674  vtemp =(vec_s16)vec_perm(vtemp, vtemp, vcswapi2s(0,1,2,3));
675 #endif
676  vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
677  vweight = vec_splat(vtemp, 3);
678  voffset = vec_splat(vtemp, 5);
679  aligned = !((unsigned long)block & 0xf);
680 
681  for (y = 0; y < height; y++) {
682  vblock = vec_ld(0, block);
683 
684  v0 = (vec_s16)VEC_MERGEH(zero_u8v, vblock);
685  v1 = (vec_s16)VEC_MERGEL(zero_u8v, vblock);
686 
687  if (w == 16 || aligned) {
688  v0 = vec_mladd(v0, vweight, zero_s16v);
689  v0 = vec_adds(v0, voffset);
690  v0 = vec_sra(v0, vlog2_denom);
691  }
692  if (w == 16 || !aligned) {
693  v1 = vec_mladd(v1, vweight, zero_s16v);
694  v1 = vec_adds(v1, voffset);
695  v1 = vec_sra(v1, vlog2_denom);
696  }
697  vblock = vec_packsu(v0, v1);
698  vec_st(vblock, 0, block);
699 
700  block += stride;
701  }
702 }
703 
704 static av_always_inline
705 void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height,
706  int log2_denom, int weightd, int weights, int offset, int w)
707 {
708  int y, dst_aligned, src_aligned;
709  vec_u8 vsrc, vdst;
710  vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;
711  vec_u16 vlog2_denom;
712  DECLARE_ALIGNED(16, int32_t, temp)[4];
713  LOAD_ZERO;
714 
715  offset = ((offset + 1) | 1) * (1 << log2_denom);
716  temp[0] = log2_denom+1;
717  temp[1] = weights;
718  temp[2] = weightd;
719  temp[3] = offset;
720 
721  vtemp = (vec_s16)vec_ld(0, temp);
722 #if !HAVE_BIGENDIAN
723  vtemp =(vec_s16)vec_perm(vtemp, vtemp, vcswapi2s(0,1,2,3));
724 #endif
725  vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
726  vweights = vec_splat(vtemp, 3);
727  vweightd = vec_splat(vtemp, 5);
728  voffset = vec_splat(vtemp, 7);
729  dst_aligned = !((unsigned long)dst & 0xf);
730  src_aligned = !((unsigned long)src & 0xf);
731 
732  for (y = 0; y < height; y++) {
733  vdst = vec_ld(0, dst);
734  vsrc = vec_ld(0, src);
735 
736  v0 = (vec_s16)VEC_MERGEH(zero_u8v, vdst);
737  v1 = (vec_s16)VEC_MERGEL(zero_u8v, vdst);
738  v2 = (vec_s16)VEC_MERGEH(zero_u8v, vsrc);
739  v3 = (vec_s16)VEC_MERGEL(zero_u8v, vsrc);
740 
741  if (w == 8) {
742  if (src_aligned)
743  v3 = v2;
744  else
745  v2 = v3;
746  }
747 
748  if (w == 16 || dst_aligned) {
749  v0 = vec_mladd(v0, vweightd, zero_s16v);
750  v2 = vec_mladd(v2, vweights, zero_s16v);
751 
752  v0 = vec_adds(v0, voffset);
753  v0 = vec_adds(v0, v2);
754  v0 = vec_sra(v0, vlog2_denom);
755  }
756  if (w == 16 || !dst_aligned) {
757  v1 = vec_mladd(v1, vweightd, zero_s16v);
758  v3 = vec_mladd(v3, vweights, zero_s16v);
759 
760  v1 = vec_adds(v1, voffset);
761  v1 = vec_adds(v1, v3);
762  v1 = vec_sra(v1, vlog2_denom);
763  }
764  vdst = vec_packsu(v0, v1);
765  vec_st(vdst, 0, dst);
766 
767  dst += stride;
768  src += stride;
769  }
770 }
771 
772 #define H264_WEIGHT(W) \
773 static void weight_h264_pixels ## W ## _altivec(uint8_t *block, ptrdiff_t stride, int height, \
774  int log2_denom, int weight, int offset) \
775 { \
776  weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \
777 }\
778 static void biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, \
779  int log2_denom, int weightd, int weights, int offset) \
780 { \
781  biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \
782 }
783 
784 H264_WEIGHT(16)
785 H264_WEIGHT( 8)
786 #endif /* HAVE_ALTIVEC */
787 
789  const int chroma_format_idc)
790 {
791 #if HAVE_ALTIVEC
793  return;
794 
795  if (bit_depth == 8) {
796  c->h264_idct_add = h264_idct_add_altivec;
797  if (chroma_format_idc <= 1)
798  c->h264_idct_add8 = h264_idct_add8_altivec;
799  c->h264_idct_add16 = h264_idct_add16_altivec;
800  c->h264_idct_add16intra = h264_idct_add16intra_altivec;
801  c->h264_idct_dc_add= h264_idct_dc_add_altivec;
802  c->h264_idct8_dc_add = h264_idct8_dc_add_altivec;
803  c->h264_idct8_add = h264_idct8_add_altivec;
804  c->h264_idct8_add4 = h264_idct8_add4_altivec;
805  c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
806  c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
807 
808  c->weight_h264_pixels_tab[0] = weight_h264_pixels16_altivec;
809  c->weight_h264_pixels_tab[1] = weight_h264_pixels8_altivec;
810  c->biweight_h264_pixels_tab[0] = biweight_h264_pixels16_altivec;
811  c->biweight_h264_pixels_tab[1] = biweight_h264_pixels8_altivec;
812  }
813 #endif /* HAVE_ALTIVEC */
814 }
q1
static const uint8_t q1[256]
Definition: twofish.c:100
s5
#define s5
Definition: regdef.h:42
mem_internal.h
LOAD_ZERO
#define LOAD_ZERO
Definition: util_altivec.h:45
w
uint8_t w
Definition: llviddspenc.c:38
max
#define max(a, b)
Definition: cuda_runtime.h:33
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:245
s3
#define s3
Definition: regdef.h:40
v0
#define v0
Definition: regdef.h:26
scan8
static const uint8_t scan8[16 *3+3]
Definition: h264_parse.h:40
vec_s32
#define vec_s32
Definition: util_altivec.h:39
aligned
static int aligned(int val)
Definition: dashdec.c:170
vec_s16
#define vec_s16
Definition: util_altivec.h:37
av_cold
#define av_cold
Definition: attributes.h:90
mask
static const uint16_t mask[17]
Definition: lzw.c:38
intreadwrite.h
s1
#define s1
Definition: regdef.h:38
q0
static const uint8_t q0[256]
Definition: twofish.c:81
result
and forward the result(frame or status change) to the corresponding input. If nothing is possible
s6
#define s6
Definition: regdef.h:43
h264dsp.h
idct6
static void idct6(int pre_mant[6])
Calculate 6-point IDCT of the pre-mantissas.
Definition: eac3dec.c:167
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
weight
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1562
zero_u8v
#define zero_u8v
Definition: util_altivec.h:47
H264DSPContext
Context for storing H.264 DSP functions.
Definition: h264dsp.h:42
s2
#define s2
Definition: regdef.h:39
H264_WEIGHT
#define H264_WEIGHT(W)
Definition: h264dsp_template.c:32
PPC_ALTIVEC
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
vec_u32
#define vec_u32
Definition: util_altivec.h:38
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:109
AV_WN32
#define AV_WN32(p, v)
Definition: intreadwrite.h:374
cpu.h
size
int size
Definition: twinvq_data.h:10344
diff
static av_always_inline int diff(const struct color_info *a, const struct color_info *b, const int trans_thresh)
Definition: vf_paletteuse.c:164
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
vec_u8
#define vec_u8
Definition: util_altivec.h:34
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
attributes.h
h264dec.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
weights
static const int weights[]
Definition: hevc_pel.c:32
xf
#define xf(width, name, var, range_min, range_max, subs,...)
Definition: cbs_av1.c:598
av_always_inline
#define av_always_inline
Definition: attributes.h:49
dct
static void dct(AudioRNNContext *s, float *out, const float *in)
Definition: af_arnndn.c:1011
ff_h264dsp_init_ppc
av_cold void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
Definition: h264dsp.c:788
s4
#define s4
Definition: regdef.h:41
AV_RN32A
#define AV_RN32A(p)
Definition: intreadwrite.h:524
stride
#define stride
Definition: h264pred_template.c:537
temp
else temp
Definition: vf_mcdeint.c:263
s0
#define s0
Definition: regdef.h:37
util_altivec.h
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
cpu.h
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
zero_s16v
#define zero_s16v
Definition: util_altivec.h:50
vec_u16
#define vec_u16
Definition: util_altivec.h:36
min
float min
Definition: vorbis_enc_data.h:429