FFmpeg
hevcdsp_init_aarch64.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Reimar Döffinger
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdint.h>
22 
23 #include "libavutil/attributes.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/aarch64/cpu.h"
26 #include "libavcodec/hevcdsp.h"
27 
28 void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs,
29  ptrdiff_t stride);
30 void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs,
31  ptrdiff_t stride);
32 void ff_hevc_add_residual_4x4_12_neon(uint8_t *_dst, const int16_t *coeffs,
33  ptrdiff_t stride);
34 void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs,
35  ptrdiff_t stride);
36 void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs,
37  ptrdiff_t stride);
38 void ff_hevc_add_residual_8x8_12_neon(uint8_t *_dst, const int16_t *coeffs,
39  ptrdiff_t stride);
40 void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs,
41  ptrdiff_t stride);
42 void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs,
43  ptrdiff_t stride);
44 void ff_hevc_add_residual_16x16_12_neon(uint8_t *_dst, const int16_t *coeffs,
45  ptrdiff_t stride);
46 void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs,
47  ptrdiff_t stride);
48 void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs,
49  ptrdiff_t stride);
50 void ff_hevc_add_residual_32x32_12_neon(uint8_t *_dst, const int16_t *coeffs,
51  ptrdiff_t stride);
52 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
53 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
54 void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
55 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
56 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
57 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
58 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
59 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
60 void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
61 void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
62 void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
63 void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
64 void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src,
65  ptrdiff_t stride_dst, ptrdiff_t stride_src,
66  const int16_t *sao_offset_val, int sao_left_class,
67  int width, int height);
68 void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
69  const int16_t *sao_offset_val, int eo, int width, int height);
70 void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
71  const int16_t *sao_offset_val, int eo, int width, int height);
72 void ff_hevc_put_hevc_qpel_h4_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
73  intptr_t mx, intptr_t my, int width);
74 void ff_hevc_put_hevc_qpel_h6_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
75  intptr_t mx, intptr_t my, int width);
76 void ff_hevc_put_hevc_qpel_h8_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
77  intptr_t mx, intptr_t my, int width);
78 void ff_hevc_put_hevc_qpel_h12_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
79  intptr_t mx, intptr_t my, int width);
80 void ff_hevc_put_hevc_qpel_h16_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
81  intptr_t mx, intptr_t my, int width);
82 void ff_hevc_put_hevc_qpel_uni_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
83  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,
84  int width);
85 void ff_hevc_put_hevc_qpel_uni_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
86  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,
87  int width);
88 void ff_hevc_put_hevc_qpel_uni_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
89  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,
90  int width);
91 void ff_hevc_put_hevc_qpel_uni_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
92  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t
93  my, int width);
94 void ff_hevc_put_hevc_qpel_uni_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
95  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t
96  my, int width);
97 void ff_hevc_put_hevc_qpel_bi_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
98  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
99  mx, intptr_t my, int width);
100 void ff_hevc_put_hevc_qpel_bi_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
101  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
102  mx, intptr_t my, int width);
103 void ff_hevc_put_hevc_qpel_bi_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
104  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
105  mx, intptr_t my, int width);
106 void ff_hevc_put_hevc_qpel_bi_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
107  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
108  mx, intptr_t my, int width);
109 void ff_hevc_put_hevc_qpel_bi_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
110  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
111  mx, intptr_t my, int width);
112 
114 {
115  if (!have_neon(av_get_cpu_flags())) return;
116 
117  if (bit_depth == 8) {
118  c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon;
119  c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon;
120  c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon;
121  c->add_residual[3] = ff_hevc_add_residual_32x32_8_neon;
122  c->idct[1] = ff_hevc_idct_8x8_8_neon;
123  c->idct[2] = ff_hevc_idct_16x16_8_neon;
124  c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
125  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
126  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
127  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
128  c->sao_band_filter[0] =
129  c->sao_band_filter[1] =
130  c->sao_band_filter[2] =
131  c->sao_band_filter[3] =
132  c->sao_band_filter[4] = ff_hevc_sao_band_filter_8x8_8_neon;
133  c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8x8_8_neon;
134  c->sao_edge_filter[1] =
135  c->sao_edge_filter[2] =
136  c->sao_edge_filter[3] =
137  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_16x16_8_neon;
138  c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_neon;
139  c->put_hevc_qpel[2][0][1] = ff_hevc_put_hevc_qpel_h6_8_neon;
140  c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_neon;
141  c->put_hevc_qpel[4][0][1] =
142  c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h12_8_neon;
143  c->put_hevc_qpel[5][0][1] =
144  c->put_hevc_qpel[7][0][1] =
145  c->put_hevc_qpel[8][0][1] =
146  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h16_8_neon;
147  c->put_hevc_qpel_uni[1][0][1] = ff_hevc_put_hevc_qpel_uni_h4_8_neon;
148  c->put_hevc_qpel_uni[2][0][1] = ff_hevc_put_hevc_qpel_uni_h6_8_neon;
149  c->put_hevc_qpel_uni[3][0][1] = ff_hevc_put_hevc_qpel_uni_h8_8_neon;
150  c->put_hevc_qpel_uni[4][0][1] =
151  c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_qpel_uni_h12_8_neon;
152  c->put_hevc_qpel_uni[5][0][1] =
153  c->put_hevc_qpel_uni[7][0][1] =
154  c->put_hevc_qpel_uni[8][0][1] =
155  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_qpel_uni_h16_8_neon;
156  c->put_hevc_qpel_bi[1][0][1] = ff_hevc_put_hevc_qpel_bi_h4_8_neon;
157  c->put_hevc_qpel_bi[2][0][1] = ff_hevc_put_hevc_qpel_bi_h6_8_neon;
158  c->put_hevc_qpel_bi[3][0][1] = ff_hevc_put_hevc_qpel_bi_h8_8_neon;
159  c->put_hevc_qpel_bi[4][0][1] =
160  c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_qpel_bi_h12_8_neon;
161  c->put_hevc_qpel_bi[5][0][1] =
162  c->put_hevc_qpel_bi[7][0][1] =
163  c->put_hevc_qpel_bi[8][0][1] =
164  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_qpel_bi_h16_8_neon;
165  }
166  if (bit_depth == 10) {
167  c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
168  c->add_residual[1] = ff_hevc_add_residual_8x8_10_neon;
169  c->add_residual[2] = ff_hevc_add_residual_16x16_10_neon;
170  c->add_residual[3] = ff_hevc_add_residual_32x32_10_neon;
171  c->idct[1] = ff_hevc_idct_8x8_10_neon;
172  c->idct[2] = ff_hevc_idct_16x16_10_neon;
173  c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
174  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon;
175  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon;
176  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon;
177  }
178  if (bit_depth == 12) {
179  c->add_residual[0] = ff_hevc_add_residual_4x4_12_neon;
180  c->add_residual[1] = ff_hevc_add_residual_8x8_12_neon;
181  c->add_residual[2] = ff_hevc_add_residual_16x16_12_neon;
182  c->add_residual[3] = ff_hevc_add_residual_32x32_12_neon;
183  }
184 }
bit_depth
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:227
ff_hevc_idct_4x4_dc_8_neon
void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs)
ff_hevc_put_hevc_qpel_h4_8_neon
void ff_hevc_put_hevc_qpel_h4_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_bi_h6_8_neon
void ff_hevc_put_hevc_qpel_bi_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_sao_edge_filter_8x8_8_neon
void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
ff_hevc_idct_16x16_8_neon
void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_put_hevc_qpel_bi_h12_8_neon
void ff_hevc_put_hevc_qpel_bi_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_h8_8_neon
void ff_hevc_put_hevc_qpel_h8_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
ff_hevc_add_residual_16x16_10_neon
void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_add_residual_4x4_10_neon
void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_8x8_8_neon
void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_idct_16x16_dc_10_neon
void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs)
av_cold
#define av_cold
Definition: attributes.h:90
width
#define width
ff_hevc_add_residual_32x32_12_neon
void ff_hevc_add_residual_32x32_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_put_hevc_qpel_uni_h8_8_neon
void ff_hevc_put_hevc_qpel_uni_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_uni_h12_8_neon
void ff_hevc_put_hevc_qpel_uni_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_bi_h8_8_neon
void ff_hevc_put_hevc_qpel_bi_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
hevcdsp.h
ff_hevc_put_hevc_qpel_bi_h4_8_neon
void ff_hevc_put_hevc_qpel_bi_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_idct_16x16_dc_8_neon
void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs)
ff_hevc_idct_32x32_dc_10_neon
void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_hevc_put_hevc_qpel_uni_h4_8_neon
void ff_hevc_put_hevc_qpel_uni_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_sao_band_filter_8x8_8_neon
void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
ff_hevc_add_residual_8x8_8_neon
void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
cpu.h
ff_hevc_add_residual_16x16_12_neon
void ff_hevc_add_residual_16x16_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_add_residual_8x8_12_neon
void ff_hevc_add_residual_8x8_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_put_hevc_qpel_h6_8_neon
void ff_hevc_put_hevc_qpel_h6_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_idct_4x4_dc_10_neon
void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs)
have_neon
#define have_neon(flags)
Definition: cpu.h:26
height
#define height
ff_hevc_idct_8x8_10_neon
void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit)
HEVCDSPContext
Definition: hevcdsp.h:47
attributes.h
ff_hevc_idct_32x32_dc_8_neon
void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs)
src2
const pixel * src2
Definition: h264pred_template.c:422
ff_hevc_idct_8x8_dc_8_neon
void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs)
ff_hevc_put_hevc_qpel_uni_h6_8_neon
void ff_hevc_put_hevc_qpel_uni_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_h12_8_neon
void ff_hevc_put_hevc_qpel_h12_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_uni_h16_8_neon
void ff_hevc_put_hevc_qpel_uni_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_idct_8x8_dc_10_neon
void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs)
ff_hevc_dsp_init_aarch64
av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
Definition: hevcdsp_init_aarch64.c:113
stride
#define stride
Definition: h264pred_template.c:537
ff_hevc_sao_edge_filter_16x16_8_neon
void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
ff_hevc_add_residual_32x32_8_neon
void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_put_hevc_qpel_h16_8_neon
void ff_hevc_put_hevc_qpel_h16_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_idct_16x16_10_neon
void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit)
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_hevc_add_residual_4x4_8_neon
void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
cpu.h
ff_hevc_add_residual_32x32_10_neon
void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_add_residual_8x8_10_neon
void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_add_residual_16x16_8_neon
void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_put_hevc_qpel_bi_h16_8_neon
void ff_hevc_put_hevc_qpel_bi_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_add_residual_4x4_12_neon
void ff_hevc_add_residual_4x4_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)