FFmpeg
swscale_altivec.c
Go to the documentation of this file.
1 /*
2  * AltiVec-enhanced yuv2yuvX
3  *
4  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  * based on the equivalent C code in swscale.c
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include <inttypes.h>
25 
26 #include "config.h"
27 #include "libswscale/swscale.h"
29 #include "libavutil/attributes.h"
30 #include "libavutil/cpu.h"
31 #include "libavutil/intfloat.h"
32 #include "yuv2rgb_altivec.h"
34 
35 #if HAVE_ALTIVEC
36 #if HAVE_BIGENDIAN
37 #define vzero vec_splat_s32(0)
38 
39 #define GET_LS(a,b,c,s) {\
40  vector signed short l2 = vec_ld(((b) << 1) + 16, s);\
41  ls = vec_perm(a, l2, c);\
42  a = l2;\
43  }
44 
45 #define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do {\
46  vector signed short ls;\
47  vector signed int vf1, vf2, i1, i2;\
48  GET_LS(l1, x, perm, src);\
49  i1 = vec_mule(filter, ls);\
50  i2 = vec_mulo(filter, ls);\
51  vf1 = vec_mergeh(i1, i2);\
52  vf2 = vec_mergel(i1, i2);\
53  d1 = vec_add(d1, vf1);\
54  d2 = vec_add(d2, vf2);\
55  } while (0)
56 
57 #define LOAD_FILTER(vf,f) {\
58  vector unsigned char perm0 = vec_lvsl(joffset, f);\
59  vf = vec_ld(joffset, f);\
60  vf = vec_perm(vf, vf, perm0);\
61 }
62 #define LOAD_L1(ll1,s,p){\
63  p = vec_lvsl(xoffset, s);\
64  ll1 = vec_ld(xoffset, s);\
65 }
66 
67 // The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
68 
69 // The neat trick: We only care for half the elements,
70 // high or low depending on (i<<3)%16 (it's 0 or 8 here),
71 // and we're going to use vec_mule, so we choose
72 // carefully how to "unpack" the elements into the even slots.
73 #define GET_VF4(a, vf, f) {\
74  vf = vec_ld(a<< 3, f);\
75  if ((a << 3) % 16)\
76  vf = vec_mergel(vf, (vector signed short)vzero);\
77  else\
78  vf = vec_mergeh(vf, (vector signed short)vzero);\
79 }
80 #define FIRST_LOAD(sv, pos, s, per) {\
81  sv = vec_ld(pos, s);\
82  per = vec_lvsl(pos, s);\
83 }
84 #define UPDATE_PTR(s0, d0, s1, d1) {\
85  d0 = s0;\
86  d1 = s1;\
87 }
88 #define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\
89  v1 = vec_ld(pos + a + 16, s);\
90  vf = vec_perm(v0, v1, per);\
91 }
92 #define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) {\
93  if ((((uintptr_t)s + pos) % 16) > 8) {\
94  v1 = vec_ld(pos + a + 16, s);\
95  }\
96  vf = vec_perm(v0, src_v1, per);\
97 }
98 #define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\
99  vf1 = vec_ld((a * 2 * filterSize) + (b * 2) + 16 + off, f);\
100  vf = vec_perm(vf0, vf1, per);\
101 }
102 
103 #define FUNC(name) name ## _altivec
104 #include "swscale_ppc_template.c"
105 #undef FUNC
106 
107 #undef vzero
108 
109 #endif /* HAVE_BIGENDIAN */
110 
111 #define SHIFT 3
112 
113 #define get_pixel(val, bias, signedness) \
114  (bias + av_clip_ ## signedness ## 16(val >> shift))
115 
116 static void
117 yuv2plane1_float_u(const int32_t *src, float *dest, int dstW, int start)
118 {
119  static const int shift = 3;
120  static const float float_mult = 1.0f / 65535.0f;
121  int i, val;
122  uint16_t val_uint;
123 
124  for (i = start; i < dstW; ++i){
125  val = src[i] + (1 << (shift - 1));
126  val_uint = get_pixel(val, 0, uint);
127  dest[i] = float_mult * (float)val_uint;
128  }
129 }
130 
131 static void
132 yuv2plane1_float_bswap_u(const int32_t *src, uint32_t *dest, int dstW, int start)
133 {
134  static const int shift = 3;
135  static const float float_mult = 1.0f / 65535.0f;
136  int i, val;
137  uint16_t val_uint;
138 
139  for (i = start; i < dstW; ++i){
140  val = src[i] + (1 << (shift - 1));
141  val_uint = get_pixel(val, 0, uint);
142  dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
143  }
144 }
145 
146 static void yuv2plane1_float_altivec(const int32_t *src, float *dest, int dstW)
147 {
148  const int dst_u = -(uintptr_t)dest & 3;
149  const int add = (1 << (SHIFT - 1));
150  const int clip = (1 << 16) - 1;
151  const float fmult = 1.0f / 65535.0f;
152  const vec_u32 vadd = (vec_u32) {add, add, add, add};
153  const vec_u32 vshift = (vec_u32) vec_splat_u32(SHIFT);
154  const vec_u32 vlargest = (vec_u32) {clip, clip, clip, clip};
155  const vec_f vmul = (vec_f) {fmult, fmult, fmult, fmult};
156  const vec_f vzero = (vec_f) {0, 0, 0, 0};
157  vec_u32 v;
158  vec_f vd;
159  int i;
160 
161  yuv2plane1_float_u(src, dest, dst_u, 0);
162 
163  for (i = dst_u; i < dstW - 3; i += 4) {
164  v = vec_ld(0, (const uint32_t *) &src[i]);
165  v = vec_add(v, vadd);
166  v = vec_sr(v, vshift);
167  v = vec_min(v, vlargest);
168 
169  vd = vec_ctf(v, 0);
170  vd = vec_madd(vd, vmul, vzero);
171 
172  vec_st(vd, 0, &dest[i]);
173  }
174 
175  yuv2plane1_float_u(src, dest, dstW, i);
176 }
177 
178 static void yuv2plane1_float_bswap_altivec(const int32_t *src, uint32_t *dest, int dstW)
179 {
180  const int dst_u = -(uintptr_t)dest & 3;
181  const int add = (1 << (SHIFT - 1));
182  const int clip = (1 << 16) - 1;
183  const float fmult = 1.0f / 65535.0f;
184  const vec_u32 vadd = (vec_u32) {add, add, add, add};
185  const vec_u32 vshift = (vec_u32) vec_splat_u32(SHIFT);
186  const vec_u32 vlargest = (vec_u32) {clip, clip, clip, clip};
187  const vec_f vmul = (vec_f) {fmult, fmult, fmult, fmult};
188  const vec_f vzero = (vec_f) {0, 0, 0, 0};
189  const vec_u32 vswapbig = (vec_u32) {16, 16, 16, 16};
190  const vec_u16 vswapsmall = vec_splat_u16(8);
191  vec_u32 v;
192  vec_f vd;
193  int i;
194 
195  yuv2plane1_float_bswap_u(src, dest, dst_u, 0);
196 
197  for (i = dst_u; i < dstW - 3; i += 4) {
198  v = vec_ld(0, (const uint32_t *) &src[i]);
199  v = vec_add(v, vadd);
200  v = vec_sr(v, vshift);
201  v = vec_min(v, vlargest);
202 
203  vd = vec_ctf(v, 0);
204  vd = vec_madd(vd, vmul, vzero);
205 
206  vd = (vec_f) vec_rl((vec_u32) vd, vswapbig);
207  vd = (vec_f) vec_rl((vec_u16) vd, vswapsmall);
208 
209  vec_st(vd, 0, (float *) &dest[i]);
210  }
211 
212  yuv2plane1_float_bswap_u(src, dest, dstW, i);
213 }
214 
215 #define yuv2plane1_float(template, dest_type, BE_LE) \
216 static void yuv2plane1_float ## BE_LE ## _altivec(const int16_t *src, uint8_t *dest, \
217  int dstW, \
218  const uint8_t *dither, int offset) \
219 { \
220  template((const int32_t *)src, (dest_type *)dest, dstW); \
221 }
222 
223 #if HAVE_BIGENDIAN
224 yuv2plane1_float(yuv2plane1_float_altivec, float, BE)
225 yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, LE)
226 #else
227 yuv2plane1_float(yuv2plane1_float_altivec, float, LE)
228 yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, BE)
229 #endif
230 
231 #endif /* HAVE_ALTIVEC */
232 
234 {
235 #if HAVE_ALTIVEC
236  enum AVPixelFormat dstFormat = c->opts.dst_format;
237 
239  return;
240 
241 #if HAVE_BIGENDIAN
242  if (c->srcBpc == 8 && c->dstBpc <= 14) {
243  c->hyScale = c->hcScale = hScale_real_altivec;
244  }
245  if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat) &&
246  dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE &&
247  !c->needAlpha) {
248  c->yuv2planeX = yuv2planeX_altivec;
249  }
250 #endif
251 
252  if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
253  c->yuv2plane1 = yuv2plane1_floatBE_altivec;
254  } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
255  c->yuv2plane1 = yuv2plane1_floatLE_altivec;
256  }
257 
258  /* The following list of supported dstFormat values should
259  * match what's found in the body of ff_yuv2packedX_altivec() */
260  if (!(c->opts.flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->needAlpha) {
261  switch (c->opts.dst_format) {
262  case AV_PIX_FMT_ABGR:
263  c->yuv2packedX = ff_yuv2abgr_X_altivec;
264  break;
265  case AV_PIX_FMT_BGRA:
266  c->yuv2packedX = ff_yuv2bgra_X_altivec;
267  break;
268  case AV_PIX_FMT_ARGB:
269  c->yuv2packedX = ff_yuv2argb_X_altivec;
270  break;
271  case AV_PIX_FMT_RGBA:
272  c->yuv2packedX = ff_yuv2rgba_X_altivec;
273  break;
274  case AV_PIX_FMT_BGR24:
275  c->yuv2packedX = ff_yuv2bgr24_X_altivec;
276  break;
277  case AV_PIX_FMT_RGB24:
278  c->yuv2packedX = ff_yuv2rgb24_X_altivec;
279  break;
280  }
281  }
282 #endif /* HAVE_ALTIVEC */
283 
285 }
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:156
SHIFT
#define SHIFT
Definition: median_template.c:40
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
AV_PIX_FMT_BGRA
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:102
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
av_float2int
static av_always_inline uint32_t av_float2int(float f)
Reinterpret a float as a 32-bit integer.
Definition: intfloat.h:50
intfloat.h
AV_PIX_FMT_GRAYF32LE
@ AV_PIX_FMT_GRAYF32LE
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:364
is16BPS
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:727
val
static double val(void *priv, double ch)
Definition: aeval.c:77
isNBPS
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:741
av_cold
#define av_cold
Definition: attributes.h:90
clip
clip
Definition: af_crystalizer.c:122
float
float
Definition: af_crystalizer.c:122
AV_PIX_FMT_RGBA
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:100
isSemiPlanarYUV
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:773
yuv2plane1_float
yuv2plane1_float(yuv2plane1_float_c_template, yuv2plane1_float(float, LE)
Definition: output.c:312
AV_CPU_FLAG_ALTIVEC
#define AV_CPU_FLAG_ALTIVEC
standard
Definition: cpu.h:61
AV_PIX_FMT_ABGR
@ AV_PIX_FMT_ABGR
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
Definition: pixfmt.h:101
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
vec_u32
#define vec_u32
Definition: util_altivec.h:38
ff_sws_init_swscale_vsx
av_cold void ff_sws_init_swscale_vsx(SwsInternal *c)
Definition: swscale_vsx.c:2019
shift
static int shift(int a, int b)
Definition: bonk.c:261
cpu.h
av_bswap32
#define av_bswap32
Definition: bswap.h:47
yuv2rgb_altivec.h
attributes.h
ff_sws_init_swscale_ppc
av_cold void ff_sws_init_swscale_ppc(SwsInternal *c)
Definition: swscale_altivec.c:233
AV_PIX_FMT_ARGB
@ AV_PIX_FMT_ARGB
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
Definition: pixfmt.h:99
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
swscale_internal.h
vshift
static int vshift(enum AVPixelFormat fmt, int plane)
Definition: graph.c:99
SwsInternal
Definition: swscale_internal.h:317
AV_PIX_FMT_GRAYF32BE
@ AV_PIX_FMT_GRAYF32BE
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:363
SWS_FULL_CHR_H_INT
@ SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:132
swscale_ppc_template.c
util_altivec.h
int32_t
int32_t
Definition: audioconvert.c:56
vec_f
#define vec_f
Definition: util_altivec.h:40
src
#define src
Definition: vp8dsp.c:248
vec_u16
#define vec_u16
Definition: util_altivec.h:36
swscale.h