FFmpeg
pixblockdsp_alpha.c
Go to the documentation of this file.
1 /*
2  * SIMD-optimized pixel operations
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/attributes.h"
22 #include "libavcodec/pixblockdsp.h"
23 #include "asm.h"
24 
25 static void get_pixels_mvi(int16_t *restrict block,
26  const uint8_t *restrict pixels, ptrdiff_t stride)
27 {
28  int h = 8;
29 
30  do {
31  uint64_t p;
32 
33  p = ldq(pixels);
34  stq(unpkbw(p), block);
35  stq(unpkbw(p >> 32), block + 4);
36 
37  pixels += stride;
38  block += 8;
39  } while (--h);
40 }
41 
42 static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
43  ptrdiff_t stride)
44 {
45  int h = 8;
46  uint64_t mask = 0x4040;
47 
48  mask |= mask << 16;
49  mask |= mask << 32;
50  do {
51  uint64_t x, y, c, d, a;
52  uint64_t signs;
53 
54  x = ldq(s1);
55  y = ldq(s2);
56  c = cmpbge(x, y);
57  d = x - y;
58  a = zap(mask, c); /* We use 0x4040404040404040 here... */
59  d += 4 * a; /* ...so we can use s4addq here. */
60  signs = zap(-1, c);
61 
62  stq(unpkbw(d) | (unpkbw(signs) << 8), block);
63  stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
64 
65  s1 += stride;
66  s2 += stride;
67  block += 8;
68  } while (--h);
69 }
70 
72  unsigned high_bit_depth)
73 {
74  if (amask(AMASK_MVI) == 0) {
75  if (!high_bit_depth)
76  c->get_pixels = get_pixels_mvi;
77  c->diff_pixels = diff_pixels_mvi;
78  }
79 }
get_pixels_mvi
static void get_pixels_mvi(int16_t *restrict block, const uint8_t *restrict pixels, ptrdiff_t stride)
Definition: pixblockdsp_alpha.c:25
diff_pixels_mvi
static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride)
Definition: pixblockdsp_alpha.c:42
ldq
#define ldq(p)
Definition: asm.h:59
AMASK_MVI
#define AMASK_MVI
Definition: asm.h:40
asm.h
av_cold
#define av_cold
Definition: attributes.h:90
mask
static const uint16_t mask[17]
Definition: lzw.c:38
s1
#define s1
Definition: regdef.h:38
zap
#define zap(a, b)
Definition: asm.h:111
PixblockDSPContext
Definition: pixblockdsp.h:26
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
s2
#define s2
Definition: regdef.h:39
cmpbge
#define cmpbge(a, b)
Definition: asm.h:107
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
attributes.h
ff_pixblockdsp_init_alpha
av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
Definition: pixblockdsp_alpha.c:71
amask
#define amask(a)
Definition: asm.h:113
stride
#define stride
Definition: h264pred_template.c:537
stq
#define stq(l, p)
Definition: asm.h:69
unpkbw
#define unpkbw(a)
Definition: asm.h:146
AVCodecContext
main external API structure.
Definition: avcodec.h:445
d
d
Definition: ffmpeg_filter.c:409
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
pixblockdsp.h