FFmpeg
h264dsp_lasx.c
Go to the documentation of this file.
1 /*
2  * Loongson LASX optimized h264dsp
3  *
4  * Copyright (c) 2021 Loongson Technology Corporation Limited
5  * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
6  * Xiwei Gu <guxiwei-hf@loongson.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
26 #include "h264dsp_loongarch.h"
27 
28 #define AVC_LPF_P1_OR_Q1(p0_or_q0_org_in, q0_or_p0_org_in, \
29  p1_or_q1_org_in, p2_or_q2_org_in, \
30  neg_tc_in, tc_in, p1_or_q1_out) \
31 { \
32  __m256i clip3, temp; \
33  \
34  clip3 = __lasx_xvavgr_hu(p0_or_q0_org_in, \
35  q0_or_p0_org_in); \
36  temp = __lasx_xvslli_h(p1_or_q1_org_in, 1); \
37  clip3 = __lasx_xvsub_h(clip3, temp); \
38  clip3 = __lasx_xvavg_h(p2_or_q2_org_in, clip3); \
39  clip3 = __lasx_xvclip_h(clip3, neg_tc_in, tc_in); \
40  p1_or_q1_out = __lasx_xvadd_h(p1_or_q1_org_in, clip3); \
41 }
42 
43 #define AVC_LPF_P0Q0(q0_or_p0_org_in, p0_or_q0_org_in, \
44  p1_or_q1_org_in, q1_or_p1_org_in, \
45  neg_threshold_in, threshold_in, \
46  p0_or_q0_out, q0_or_p0_out) \
47 { \
48  __m256i q0_sub_p0, p1_sub_q1, delta; \
49  \
50  q0_sub_p0 = __lasx_xvsub_h(q0_or_p0_org_in, \
51  p0_or_q0_org_in); \
52  p1_sub_q1 = __lasx_xvsub_h(p1_or_q1_org_in, \
53  q1_or_p1_org_in); \
54  q0_sub_p0 = __lasx_xvslli_h(q0_sub_p0, 2); \
55  p1_sub_q1 = __lasx_xvaddi_hu(p1_sub_q1, 4); \
56  delta = __lasx_xvadd_h(q0_sub_p0, p1_sub_q1); \
57  delta = __lasx_xvsrai_h(delta, 3); \
58  delta = __lasx_xvclip_h(delta, neg_threshold_in, \
59  threshold_in); \
60  p0_or_q0_out = __lasx_xvadd_h(p0_or_q0_org_in, delta); \
61  q0_or_p0_out = __lasx_xvsub_h(q0_or_p0_org_in, delta); \
62  \
63  p0_or_q0_out = __lasx_xvclip255_h(p0_or_q0_out); \
64  q0_or_p0_out = __lasx_xvclip255_h(q0_or_p0_out); \
65 }
66 
67 void ff_h264_h_lpf_luma_8_lasx(uint8_t *data, ptrdiff_t img_width,
68  int alpha_in, int beta_in, int8_t *tc)
69 {
70  int img_width_2x = img_width << 1;
71  int img_width_4x = img_width << 2;
72  int img_width_8x = img_width << 3;
73  int img_width_3x = img_width_2x + img_width;
74  __m256i tmp_vec0, bs_vec;
75  __m256i tc_vec = {0x0101010100000000, 0x0303030302020202,
76  0x0101010100000000, 0x0303030302020202};
77 
78  tmp_vec0 = __lasx_xvldrepl_w((uint32_t*)tc, 0);
79  tc_vec = __lasx_xvshuf_b(tmp_vec0, tmp_vec0, tc_vec);
80  bs_vec = __lasx_xvslti_b(tc_vec, 0);
81  bs_vec = __lasx_xvxori_b(bs_vec, 255);
82  bs_vec = __lasx_xvandi_b(bs_vec, 1);
83 
84  if (__lasx_xbnz_v(bs_vec)) {
85  uint8_t *src = data - 4;
86  __m256i p3_org, p2_org, p1_org, p0_org, q0_org, q1_org, q2_org, q3_org;
87  __m256i p0_asub_q0, p1_asub_p0, q1_asub_q0, alpha, beta;
88  __m256i is_less_than, is_less_than_beta, is_less_than_alpha;
89  __m256i is_bs_greater_than0;
90  __m256i zero = __lasx_xvldi(0);
91 
92  is_bs_greater_than0 = __lasx_xvslt_bu(zero, bs_vec);
93 
94  {
95  uint8_t *src_tmp = src + img_width_8x;
96  __m256i row0, row1, row2, row3, row4, row5, row6, row7;
97  __m256i row8, row9, row10, row11, row12, row13, row14, row15;
98 
99  DUP4_ARG2(__lasx_xvldx, src, 0, src, img_width, src, img_width_2x,
100  src, img_width_3x, row0, row1, row2, row3);
101  src += img_width_4x;
102  DUP4_ARG2(__lasx_xvldx, src, 0, src, img_width, src, img_width_2x,
103  src, img_width_3x, row4, row5, row6, row7);
104  src -= img_width_4x;
105  DUP4_ARG2(__lasx_xvldx, src_tmp, 0, src_tmp, img_width, src_tmp,
106  img_width_2x, src_tmp, img_width_3x,
107  row8, row9, row10, row11);
108  src_tmp += img_width_4x;
109  DUP4_ARG2(__lasx_xvldx, src_tmp, 0, src_tmp, img_width, src_tmp,
110  img_width_2x, src_tmp, img_width_3x,
111  row12, row13, row14, row15);
112  src_tmp -= img_width_4x;
113 
114  LASX_TRANSPOSE16x8_B(row0, row1, row2, row3, row4, row5, row6,
115  row7, row8, row9, row10, row11,
116  row12, row13, row14, row15,
117  p3_org, p2_org, p1_org, p0_org,
118  q0_org, q1_org, q2_org, q3_org);
119  }
120 
121  p0_asub_q0 = __lasx_xvabsd_bu(p0_org, q0_org);
122  p1_asub_p0 = __lasx_xvabsd_bu(p1_org, p0_org);
123  q1_asub_q0 = __lasx_xvabsd_bu(q1_org, q0_org);
124 
125  alpha = __lasx_xvreplgr2vr_b(alpha_in);
126  beta = __lasx_xvreplgr2vr_b(beta_in);
127 
128  is_less_than_alpha = __lasx_xvslt_bu(p0_asub_q0, alpha);
129  is_less_than_beta = __lasx_xvslt_bu(p1_asub_p0, beta);
130  is_less_than = is_less_than_alpha & is_less_than_beta;
131  is_less_than_beta = __lasx_xvslt_bu(q1_asub_q0, beta);
132  is_less_than = is_less_than_beta & is_less_than;
133  is_less_than = is_less_than & is_bs_greater_than0;
134 
135  if (__lasx_xbnz_v(is_less_than)) {
136  __m256i neg_tc_h, tc_h, p1_org_h, p0_org_h, q0_org_h, q1_org_h;
137  __m256i p2_asub_p0, q2_asub_q0;
138 
139  neg_tc_h = __lasx_xvneg_b(tc_vec);
140  neg_tc_h = __lasx_vext2xv_h_b(neg_tc_h);
141  tc_h = __lasx_vext2xv_hu_bu(tc_vec);
142  p1_org_h = __lasx_vext2xv_hu_bu(p1_org);
143  p0_org_h = __lasx_vext2xv_hu_bu(p0_org);
144  q0_org_h = __lasx_vext2xv_hu_bu(q0_org);
145 
146  p2_asub_p0 = __lasx_xvabsd_bu(p2_org, p0_org);
147  is_less_than_beta = __lasx_xvslt_bu(p2_asub_p0, beta);
148  is_less_than_beta = is_less_than_beta & is_less_than;
149 
150  if (__lasx_xbnz_v(is_less_than_beta)) {
151  __m256i p2_org_h, p1_h;
152 
153  p2_org_h = __lasx_vext2xv_hu_bu(p2_org);
154  AVC_LPF_P1_OR_Q1(p0_org_h, q0_org_h, p1_org_h, p2_org_h,
155  neg_tc_h, tc_h, p1_h);
156  p1_h = __lasx_xvpickev_b(p1_h, p1_h);
157  p1_h = __lasx_xvpermi_d(p1_h, 0xd8);
158  p1_org = __lasx_xvbitsel_v(p1_org, p1_h, is_less_than_beta);
159  is_less_than_beta = __lasx_xvandi_b(is_less_than_beta, 1);
160  tc_vec = __lasx_xvadd_b(tc_vec, is_less_than_beta);
161  }
162 
163  q2_asub_q0 = __lasx_xvabsd_bu(q2_org, q0_org);
164  is_less_than_beta = __lasx_xvslt_bu(q2_asub_q0, beta);
165  is_less_than_beta = is_less_than_beta & is_less_than;
166 
167  q1_org_h = __lasx_vext2xv_hu_bu(q1_org);
168 
169  if (__lasx_xbnz_v(is_less_than_beta)) {
170  __m256i q2_org_h, q1_h;
171 
172  q2_org_h = __lasx_vext2xv_hu_bu(q2_org);
173  AVC_LPF_P1_OR_Q1(p0_org_h, q0_org_h, q1_org_h, q2_org_h,
174  neg_tc_h, tc_h, q1_h);
175  q1_h = __lasx_xvpickev_b(q1_h, q1_h);
176  q1_h = __lasx_xvpermi_d(q1_h, 0xd8);
177  q1_org = __lasx_xvbitsel_v(q1_org, q1_h, is_less_than_beta);
178 
179  is_less_than_beta = __lasx_xvandi_b(is_less_than_beta, 1);
180  tc_vec = __lasx_xvadd_b(tc_vec, is_less_than_beta);
181  }
182 
183  {
184  __m256i neg_thresh_h, p0_h, q0_h;
185 
186  neg_thresh_h = __lasx_xvneg_b(tc_vec);
187  neg_thresh_h = __lasx_vext2xv_h_b(neg_thresh_h);
188  tc_h = __lasx_vext2xv_hu_bu(tc_vec);
189 
190  AVC_LPF_P0Q0(q0_org_h, p0_org_h, p1_org_h, q1_org_h,
191  neg_thresh_h, tc_h, p0_h, q0_h);
192  DUP2_ARG2(__lasx_xvpickev_b, p0_h, p0_h, q0_h, q0_h,
193  p0_h, q0_h);
194  DUP2_ARG2(__lasx_xvpermi_d, p0_h, 0xd8, q0_h, 0xd8,
195  p0_h, q0_h);
196  p0_org = __lasx_xvbitsel_v(p0_org, p0_h, is_less_than);
197  q0_org = __lasx_xvbitsel_v(q0_org, q0_h, is_less_than);
198  }
199 
200  {
201  __m256i row0, row1, row2, row3, row4, row5, row6, row7;
202  __m256i control = {0x0000000400000000, 0x0000000500000001,
203  0x0000000600000002, 0x0000000700000003};
204 
205  DUP4_ARG3(__lasx_xvpermi_q, p0_org, q3_org, 0x02, p1_org,
206  q2_org, 0x02, p2_org, q1_org, 0x02, p3_org,
207  q0_org, 0x02, p0_org, p1_org, p2_org, p3_org);
208  DUP2_ARG2(__lasx_xvilvl_b, p1_org, p3_org, p0_org, p2_org,
209  row0, row2);
210  DUP2_ARG2(__lasx_xvilvh_b, p1_org, p3_org, p0_org, p2_org,
211  row1, row3);
212  DUP2_ARG2(__lasx_xvilvl_b, row2, row0, row3, row1, row4, row6);
213  DUP2_ARG2(__lasx_xvilvh_b, row2, row0, row3, row1, row5, row7);
214  DUP4_ARG2(__lasx_xvperm_w, row4, control, row5, control, row6,
215  control, row7, control, row4, row5, row6, row7);
216  __lasx_xvstelm_d(row4, src, 0, 0);
217  __lasx_xvstelm_d(row4, src + img_width, 0, 1);
218  src += img_width_2x;
219  __lasx_xvstelm_d(row4, src, 0, 2);
220  __lasx_xvstelm_d(row4, src + img_width, 0, 3);
221  src += img_width_2x;
222  __lasx_xvstelm_d(row5, src, 0, 0);
223  __lasx_xvstelm_d(row5, src + img_width, 0, 1);
224  src += img_width_2x;
225  __lasx_xvstelm_d(row5, src, 0, 2);
226  __lasx_xvstelm_d(row5, src + img_width, 0, 3);
227  src += img_width_2x;
228  __lasx_xvstelm_d(row6, src, 0, 0);
229  __lasx_xvstelm_d(row6, src + img_width, 0, 1);
230  src += img_width_2x;
231  __lasx_xvstelm_d(row6, src, 0, 2);
232  __lasx_xvstelm_d(row6, src + img_width, 0, 3);
233  src += img_width_2x;
234  __lasx_xvstelm_d(row7, src, 0, 0);
235  __lasx_xvstelm_d(row7, src + img_width, 0, 1);
236  src += img_width_2x;
237  __lasx_xvstelm_d(row7, src, 0, 2);
238  __lasx_xvstelm_d(row7, src + img_width, 0, 3);
239  }
240  }
241  }
242 }
243 
244 void ff_h264_v_lpf_luma_8_lasx(uint8_t *data, ptrdiff_t img_width,
245  int alpha_in, int beta_in, int8_t *tc)
246 {
247  int img_width_2x = img_width << 1;
248  int img_width_3x = img_width + img_width_2x;
249  __m256i tmp_vec0, bs_vec;
250  __m256i tc_vec = {0x0101010100000000, 0x0303030302020202,
251  0x0101010100000000, 0x0303030302020202};
252 
253  tmp_vec0 = __lasx_xvldrepl_w((uint32_t*)tc, 0);
254  tc_vec = __lasx_xvshuf_b(tmp_vec0, tmp_vec0, tc_vec);
255  bs_vec = __lasx_xvslti_b(tc_vec, 0);
256  bs_vec = __lasx_xvxori_b(bs_vec, 255);
257  bs_vec = __lasx_xvandi_b(bs_vec, 1);
258 
259  if (__lasx_xbnz_v(bs_vec)) {
260  __m256i p2_org, p1_org, p0_org, q0_org, q1_org, q2_org;
261  __m256i p0_asub_q0, p1_asub_p0, q1_asub_q0, alpha, beta;
262  __m256i is_less_than, is_less_than_beta, is_less_than_alpha;
263  __m256i p1_org_h, p0_org_h, q0_org_h, q1_org_h;
264  __m256i is_bs_greater_than0;
265  __m256i zero = __lasx_xvldi(0);
266 
267  alpha = __lasx_xvreplgr2vr_b(alpha_in);
268  beta = __lasx_xvreplgr2vr_b(beta_in);
269 
270  DUP2_ARG2(__lasx_xvldx, data, -img_width_3x, data, -img_width_2x,
271  p2_org, p1_org);
272  p0_org = __lasx_xvldx(data, -img_width);
273  DUP2_ARG2(__lasx_xvldx, data, 0, data, img_width, q0_org, q1_org);
274 
275  is_bs_greater_than0 = __lasx_xvslt_bu(zero, bs_vec);
276  p0_asub_q0 = __lasx_xvabsd_bu(p0_org, q0_org);
277  p1_asub_p0 = __lasx_xvabsd_bu(p1_org, p0_org);
278  q1_asub_q0 = __lasx_xvabsd_bu(q1_org, q0_org);
279 
280  is_less_than_alpha = __lasx_xvslt_bu(p0_asub_q0, alpha);
281  is_less_than_beta = __lasx_xvslt_bu(p1_asub_p0, beta);
282  is_less_than = is_less_than_alpha & is_less_than_beta;
283  is_less_than_beta = __lasx_xvslt_bu(q1_asub_q0, beta);
284  is_less_than = is_less_than_beta & is_less_than;
285  is_less_than = is_less_than & is_bs_greater_than0;
286 
287  if (__lasx_xbnz_v(is_less_than)) {
288  __m256i neg_tc_h, tc_h, p2_asub_p0, q2_asub_q0;
289 
290  q2_org = __lasx_xvldx(data, img_width_2x);
291 
292  neg_tc_h = __lasx_xvneg_b(tc_vec);
293  neg_tc_h = __lasx_vext2xv_h_b(neg_tc_h);
294  tc_h = __lasx_vext2xv_hu_bu(tc_vec);
295  p1_org_h = __lasx_vext2xv_hu_bu(p1_org);
296  p0_org_h = __lasx_vext2xv_hu_bu(p0_org);
297  q0_org_h = __lasx_vext2xv_hu_bu(q0_org);
298 
299  p2_asub_p0 = __lasx_xvabsd_bu(p2_org, p0_org);
300  is_less_than_beta = __lasx_xvslt_bu(p2_asub_p0, beta);
301  is_less_than_beta = is_less_than_beta & is_less_than;
302 
303  if (__lasx_xbnz_v(is_less_than_beta)) {
304  __m256i p1_h, p2_org_h;
305 
306  p2_org_h = __lasx_vext2xv_hu_bu(p2_org);
307  AVC_LPF_P1_OR_Q1(p0_org_h, q0_org_h, p1_org_h, p2_org_h,
308  neg_tc_h, tc_h, p1_h);
309  p1_h = __lasx_xvpickev_b(p1_h, p1_h);
310  p1_h = __lasx_xvpermi_d(p1_h, 0xd8);
311  p1_h = __lasx_xvbitsel_v(p1_org, p1_h, is_less_than_beta);
312  p1_org = __lasx_xvpermi_q(p1_org, p1_h, 0x30);
313  __lasx_xvst(p1_org, data - img_width_2x, 0);
314 
315  is_less_than_beta = __lasx_xvandi_b(is_less_than_beta, 1);
316  tc_vec = __lasx_xvadd_b(tc_vec, is_less_than_beta);
317  }
318 
319  q2_asub_q0 = __lasx_xvabsd_bu(q2_org, q0_org);
320  is_less_than_beta = __lasx_xvslt_bu(q2_asub_q0, beta);
321  is_less_than_beta = is_less_than_beta & is_less_than;
322 
323  q1_org_h = __lasx_vext2xv_hu_bu(q1_org);
324 
325  if (__lasx_xbnz_v(is_less_than_beta)) {
326  __m256i q1_h, q2_org_h;
327 
328  q2_org_h = __lasx_vext2xv_hu_bu(q2_org);
329  AVC_LPF_P1_OR_Q1(p0_org_h, q0_org_h, q1_org_h, q2_org_h,
330  neg_tc_h, tc_h, q1_h);
331  q1_h = __lasx_xvpickev_b(q1_h, q1_h);
332  q1_h = __lasx_xvpermi_d(q1_h, 0xd8);
333  q1_h = __lasx_xvbitsel_v(q1_org, q1_h, is_less_than_beta);
334  q1_org = __lasx_xvpermi_q(q1_org, q1_h, 0x30);
335  __lasx_xvst(q1_org, data + img_width, 0);
336 
337  is_less_than_beta = __lasx_xvandi_b(is_less_than_beta, 1);
338  tc_vec = __lasx_xvadd_b(tc_vec, is_less_than_beta);
339 
340  }
341 
342  {
343  __m256i neg_thresh_h, p0_h, q0_h;
344 
345  neg_thresh_h = __lasx_xvneg_b(tc_vec);
346  neg_thresh_h = __lasx_vext2xv_h_b(neg_thresh_h);
347  tc_h = __lasx_vext2xv_hu_bu(tc_vec);
348 
349  AVC_LPF_P0Q0(q0_org_h, p0_org_h, p1_org_h, q1_org_h,
350  neg_thresh_h, tc_h, p0_h, q0_h);
351  DUP2_ARG2(__lasx_xvpickev_b, p0_h, p0_h, q0_h, q0_h,
352  p0_h, q0_h);
353  DUP2_ARG2(__lasx_xvpermi_d, p0_h, 0Xd8, q0_h, 0xd8,
354  p0_h, q0_h);
355  p0_h = __lasx_xvbitsel_v(p0_org, p0_h, is_less_than);
356  q0_h = __lasx_xvbitsel_v(q0_org, q0_h, is_less_than);
357  p0_org = __lasx_xvpermi_q(p0_org, p0_h, 0x30);
358  q0_org = __lasx_xvpermi_q(q0_org, q0_h, 0x30);
359  __lasx_xvst(p0_org, data - img_width, 0);
360  __lasx_xvst(q0_org, data, 0);
361  }
362  }
363  }
364 }
365 
366 #define AVC_LPF_P0P1P2_OR_Q0Q1Q2(p3_or_q3_org_in, p0_or_q0_org_in, \
367  q3_or_p3_org_in, p1_or_q1_org_in, \
368  p2_or_q2_org_in, q1_or_p1_org_in, \
369  p0_or_q0_out, p1_or_q1_out, p2_or_q2_out) \
370 { \
371  __m256i threshold; \
372  __m256i const2, const3 = __lasx_xvldi(0); \
373  \
374  const2 = __lasx_xvaddi_hu(const3, 2); \
375  const3 = __lasx_xvaddi_hu(const3, 3); \
376  threshold = __lasx_xvadd_h(p0_or_q0_org_in, q3_or_p3_org_in); \
377  threshold = __lasx_xvadd_h(p1_or_q1_org_in, threshold); \
378  \
379  p0_or_q0_out = __lasx_xvslli_h(threshold, 1); \
380  p0_or_q0_out = __lasx_xvadd_h(p0_or_q0_out, p2_or_q2_org_in); \
381  p0_or_q0_out = __lasx_xvadd_h(p0_or_q0_out, q1_or_p1_org_in); \
382  p0_or_q0_out = __lasx_xvsrar_h(p0_or_q0_out, const3); \
383  \
384  p1_or_q1_out = __lasx_xvadd_h(p2_or_q2_org_in, threshold); \
385  p1_or_q1_out = __lasx_xvsrar_h(p1_or_q1_out, const2); \
386  \
387  p2_or_q2_out = __lasx_xvmul_h(p2_or_q2_org_in, const3); \
388  p2_or_q2_out = __lasx_xvadd_h(p2_or_q2_out, p3_or_q3_org_in); \
389  p2_or_q2_out = __lasx_xvadd_h(p2_or_q2_out, p3_or_q3_org_in); \
390  p2_or_q2_out = __lasx_xvadd_h(p2_or_q2_out, threshold); \
391  p2_or_q2_out = __lasx_xvsrar_h(p2_or_q2_out, const3); \
392 }
393 
394 /* data[-u32_img_width] = (uint8_t)((2 * p1 + p0 + q1 + 2) >> 2); */
395 #define AVC_LPF_P0_OR_Q0(p0_or_q0_org_in, q1_or_p1_org_in, \
396  p1_or_q1_org_in, p0_or_q0_out) \
397 { \
398  __m256i const2 = __lasx_xvldi(0); \
399  const2 = __lasx_xvaddi_hu(const2, 2); \
400  p0_or_q0_out = __lasx_xvadd_h(p0_or_q0_org_in, q1_or_p1_org_in); \
401  p0_or_q0_out = __lasx_xvadd_h(p0_or_q0_out, p1_or_q1_org_in); \
402  p0_or_q0_out = __lasx_xvadd_h(p0_or_q0_out, p1_or_q1_org_in); \
403  p0_or_q0_out = __lasx_xvsrar_h(p0_or_q0_out, const2); \
404 }
405 
406 void ff_h264_h_lpf_luma_intra_8_lasx(uint8_t *data, ptrdiff_t img_width,
407  int alpha_in, int beta_in)
408 {
409  int img_width_2x = img_width << 1;
410  int img_width_4x = img_width << 2;
411  int img_width_3x = img_width_2x + img_width;
412  uint8_t *src = data - 4;
413  __m256i p0_asub_q0, p1_asub_p0, q1_asub_q0, alpha, beta;
414  __m256i is_less_than, is_less_than_beta, is_less_than_alpha;
415  __m256i p3_org, p2_org, p1_org, p0_org, q0_org, q1_org, q2_org, q3_org;
416  __m256i zero = __lasx_xvldi(0);
417 
418  {
419  __m256i row0, row1, row2, row3, row4, row5, row6, row7;
420  __m256i row8, row9, row10, row11, row12, row13, row14, row15;
421 
422  DUP4_ARG2(__lasx_xvldx, src, 0, src, img_width, src, img_width_2x,
423  src, img_width_3x, row0, row1, row2, row3);
424  src += img_width_4x;
425  DUP4_ARG2(__lasx_xvldx, src, 0, src, img_width, src, img_width_2x,
426  src, img_width_3x, row4, row5, row6, row7);
427  src += img_width_4x;
428  DUP4_ARG2(__lasx_xvldx, src, 0, src, img_width, src, img_width_2x,
429  src, img_width_3x, row8, row9, row10, row11);
430  src += img_width_4x;
431  DUP4_ARG2(__lasx_xvldx, src, 0, src, img_width, src, img_width_2x,
432  src, img_width_3x, row12, row13, row14, row15);
433  src += img_width_4x;
434 
435  LASX_TRANSPOSE16x8_B(row0, row1, row2, row3,
436  row4, row5, row6, row7,
437  row8, row9, row10, row11,
438  row12, row13, row14, row15,
439  p3_org, p2_org, p1_org, p0_org,
440  q0_org, q1_org, q2_org, q3_org);
441  }
442 
443  alpha = __lasx_xvreplgr2vr_b(alpha_in);
444  beta = __lasx_xvreplgr2vr_b(beta_in);
445  p0_asub_q0 = __lasx_xvabsd_bu(p0_org, q0_org);
446  p1_asub_p0 = __lasx_xvabsd_bu(p1_org, p0_org);
447  q1_asub_q0 = __lasx_xvabsd_bu(q1_org, q0_org);
448 
449  is_less_than_alpha = __lasx_xvslt_bu(p0_asub_q0, alpha);
450  is_less_than_beta = __lasx_xvslt_bu(p1_asub_p0, beta);
451  is_less_than = is_less_than_beta & is_less_than_alpha;
452  is_less_than_beta = __lasx_xvslt_bu(q1_asub_q0, beta);
453  is_less_than = is_less_than_beta & is_less_than;
454  is_less_than = __lasx_xvpermi_q(zero, is_less_than, 0x30);
455 
456  if (__lasx_xbnz_v(is_less_than)) {
457  __m256i p2_asub_p0, q2_asub_q0, p0_h, q0_h, negate_is_less_than_beta;
458  __m256i p1_org_h, p0_org_h, q0_org_h, q1_org_h;
459  __m256i less_alpha_shift2_add2 = __lasx_xvsrli_b(alpha, 2);
460 
461  less_alpha_shift2_add2 = __lasx_xvaddi_bu(less_alpha_shift2_add2, 2);
462  less_alpha_shift2_add2 = __lasx_xvslt_bu(p0_asub_q0,
463  less_alpha_shift2_add2);
464 
465  p1_org_h = __lasx_vext2xv_hu_bu(p1_org);
466  p0_org_h = __lasx_vext2xv_hu_bu(p0_org);
467  q0_org_h = __lasx_vext2xv_hu_bu(q0_org);
468  q1_org_h = __lasx_vext2xv_hu_bu(q1_org);
469 
470  p2_asub_p0 = __lasx_xvabsd_bu(p2_org, p0_org);
471  is_less_than_beta = __lasx_xvslt_bu(p2_asub_p0, beta);
472  is_less_than_beta = is_less_than_beta & less_alpha_shift2_add2;
473  negate_is_less_than_beta = __lasx_xvxori_b(is_less_than_beta, 0xff);
474  is_less_than_beta = is_less_than_beta & is_less_than;
475  negate_is_less_than_beta = negate_is_less_than_beta & is_less_than;
476 
477  /* combine and store */
478  if (__lasx_xbnz_v(is_less_than_beta)) {
479  __m256i p2_org_h, p3_org_h, p1_h, p2_h;
480 
481  p2_org_h = __lasx_vext2xv_hu_bu(p2_org);
482  p3_org_h = __lasx_vext2xv_hu_bu(p3_org);
483 
484  AVC_LPF_P0P1P2_OR_Q0Q1Q2(p3_org_h, p0_org_h, q0_org_h, p1_org_h,
485  p2_org_h, q1_org_h, p0_h, p1_h, p2_h);
486 
487  p0_h = __lasx_xvpickev_b(p0_h, p0_h);
488  p0_h = __lasx_xvpermi_d(p0_h, 0xd8);
489  DUP2_ARG2(__lasx_xvpickev_b, p1_h, p1_h, p2_h, p2_h, p1_h, p2_h);
490  DUP2_ARG2(__lasx_xvpermi_d, p1_h, 0xd8, p2_h, 0xd8, p1_h, p2_h);
491  p0_org = __lasx_xvbitsel_v(p0_org, p0_h, is_less_than_beta);
492  p1_org = __lasx_xvbitsel_v(p1_org, p1_h, is_less_than_beta);
493  p2_org = __lasx_xvbitsel_v(p2_org, p2_h, is_less_than_beta);
494  }
495 
496  AVC_LPF_P0_OR_Q0(p0_org_h, q1_org_h, p1_org_h, p0_h);
497  /* combine */
498  p0_h = __lasx_xvpickev_b(p0_h, p0_h);
499  p0_h = __lasx_xvpermi_d(p0_h, 0xd8);
500  p0_org = __lasx_xvbitsel_v(p0_org, p0_h, negate_is_less_than_beta);
501 
502  /* if (tmpFlag && (unsigned)ABS(q2-q0) < thresholds->beta_in) */
503  q2_asub_q0 = __lasx_xvabsd_bu(q2_org, q0_org);
504  is_less_than_beta = __lasx_xvslt_bu(q2_asub_q0, beta);
505  is_less_than_beta = is_less_than_beta & less_alpha_shift2_add2;
506  negate_is_less_than_beta = __lasx_xvxori_b(is_less_than_beta, 0xff);
507  is_less_than_beta = is_less_than_beta & is_less_than;
508  negate_is_less_than_beta = negate_is_less_than_beta & is_less_than;
509 
510  /* combine and store */
511  if (__lasx_xbnz_v(is_less_than_beta)) {
512  __m256i q2_org_h, q3_org_h, q1_h, q2_h;
513 
514  q2_org_h = __lasx_vext2xv_hu_bu(q2_org);
515  q3_org_h = __lasx_vext2xv_hu_bu(q3_org);
516 
517  AVC_LPF_P0P1P2_OR_Q0Q1Q2(q3_org_h, q0_org_h, p0_org_h, q1_org_h,
518  q2_org_h, p1_org_h, q0_h, q1_h, q2_h);
519 
520  q0_h = __lasx_xvpickev_b(q0_h, q0_h);
521  q0_h = __lasx_xvpermi_d(q0_h, 0xd8);
522  DUP2_ARG2(__lasx_xvpickev_b, q1_h, q1_h, q2_h, q2_h, q1_h, q2_h);
523  DUP2_ARG2(__lasx_xvpermi_d, q1_h, 0xd8, q2_h, 0xd8, q1_h, q2_h);
524  q0_org = __lasx_xvbitsel_v(q0_org, q0_h, is_less_than_beta);
525  q1_org = __lasx_xvbitsel_v(q1_org, q1_h, is_less_than_beta);
526  q2_org = __lasx_xvbitsel_v(q2_org, q2_h, is_less_than_beta);
527 
528  }
529 
530  AVC_LPF_P0_OR_Q0(q0_org_h, p1_org_h, q1_org_h, q0_h);
531 
532  /* combine */
533  q0_h = __lasx_xvpickev_b(q0_h, q0_h);
534  q0_h = __lasx_xvpermi_d(q0_h, 0xd8);
535  q0_org = __lasx_xvbitsel_v(q0_org, q0_h, negate_is_less_than_beta);
536 
537  /* transpose and store */
538  {
539  __m256i row0, row1, row2, row3, row4, row5, row6, row7;
540  __m256i control = {0x0000000400000000, 0x0000000500000001,
541  0x0000000600000002, 0x0000000700000003};
542 
543  DUP4_ARG3(__lasx_xvpermi_q, p0_org, q3_org, 0x02, p1_org, q2_org,
544  0x02, p2_org, q1_org, 0x02, p3_org, q0_org, 0x02,
545  p0_org, p1_org, p2_org, p3_org);
546  DUP2_ARG2(__lasx_xvilvl_b, p1_org, p3_org, p0_org, p2_org,
547  row0, row2);
548  DUP2_ARG2(__lasx_xvilvh_b, p1_org, p3_org, p0_org, p2_org,
549  row1, row3);
550  DUP2_ARG2(__lasx_xvilvl_b, row2, row0, row3, row1, row4, row6);
551  DUP2_ARG2(__lasx_xvilvh_b, row2, row0, row3, row1, row5, row7);
552  DUP4_ARG2(__lasx_xvperm_w, row4, control, row5, control, row6,
553  control, row7, control, row4, row5, row6, row7);
554  src = data - 4;
555  __lasx_xvstelm_d(row4, src, 0, 0);
556  __lasx_xvstelm_d(row4, src + img_width, 0, 1);
557  src += img_width_2x;
558  __lasx_xvstelm_d(row4, src, 0, 2);
559  __lasx_xvstelm_d(row4, src + img_width, 0, 3);
560  src += img_width_2x;
561  __lasx_xvstelm_d(row5, src, 0, 0);
562  __lasx_xvstelm_d(row5, src + img_width, 0, 1);
563  src += img_width_2x;
564  __lasx_xvstelm_d(row5, src, 0, 2);
565  __lasx_xvstelm_d(row5, src + img_width, 0, 3);
566  src += img_width_2x;
567  __lasx_xvstelm_d(row6, src, 0, 0);
568  __lasx_xvstelm_d(row6, src + img_width, 0, 1);
569  src += img_width_2x;
570  __lasx_xvstelm_d(row6, src, 0, 2);
571  __lasx_xvstelm_d(row6, src + img_width, 0, 3);
572  src += img_width_2x;
573  __lasx_xvstelm_d(row7, src, 0, 0);
574  __lasx_xvstelm_d(row7, src + img_width, 0, 1);
575  src += img_width_2x;
576  __lasx_xvstelm_d(row7, src, 0, 2);
577  __lasx_xvstelm_d(row7, src + img_width, 0, 3);
578  }
579  }
580 }
581 
582 void ff_h264_v_lpf_luma_intra_8_lasx(uint8_t *data, ptrdiff_t img_width,
583  int alpha_in, int beta_in)
584 {
585  int img_width_2x = img_width << 1;
586  int img_width_3x = img_width_2x + img_width;
587  uint8_t *src = data - img_width_2x;
588  __m256i p0_asub_q0, p1_asub_p0, q1_asub_q0, alpha, beta;
589  __m256i is_less_than, is_less_than_beta, is_less_than_alpha;
590  __m256i p1_org, p0_org, q0_org, q1_org;
591  __m256i zero = __lasx_xvldi(0);
592 
593  DUP4_ARG2(__lasx_xvldx, src, 0, src, img_width, src, img_width_2x,
594  src, img_width_3x, p1_org, p0_org, q0_org, q1_org);
595  alpha = __lasx_xvreplgr2vr_b(alpha_in);
596  beta = __lasx_xvreplgr2vr_b(beta_in);
597  p0_asub_q0 = __lasx_xvabsd_bu(p0_org, q0_org);
598  p1_asub_p0 = __lasx_xvabsd_bu(p1_org, p0_org);
599  q1_asub_q0 = __lasx_xvabsd_bu(q1_org, q0_org);
600 
601  is_less_than_alpha = __lasx_xvslt_bu(p0_asub_q0, alpha);
602  is_less_than_beta = __lasx_xvslt_bu(p1_asub_p0, beta);
603  is_less_than = is_less_than_beta & is_less_than_alpha;
604  is_less_than_beta = __lasx_xvslt_bu(q1_asub_q0, beta);
605  is_less_than = is_less_than_beta & is_less_than;
606  is_less_than = __lasx_xvpermi_q(zero, is_less_than, 0x30);
607 
608  if (__lasx_xbnz_v(is_less_than)) {
609  __m256i p2_asub_p0, q2_asub_q0, p0_h, q0_h, negate_is_less_than_beta;
610  __m256i p1_org_h, p0_org_h, q0_org_h, q1_org_h;
611  __m256i p2_org = __lasx_xvldx(src, -img_width);
612  __m256i q2_org = __lasx_xvldx(data, img_width_2x);
613  __m256i less_alpha_shift2_add2 = __lasx_xvsrli_b(alpha, 2);
614  less_alpha_shift2_add2 = __lasx_xvaddi_bu(less_alpha_shift2_add2, 2);
615  less_alpha_shift2_add2 = __lasx_xvslt_bu(p0_asub_q0,
616  less_alpha_shift2_add2);
617 
618  p1_org_h = __lasx_vext2xv_hu_bu(p1_org);
619  p0_org_h = __lasx_vext2xv_hu_bu(p0_org);
620  q0_org_h = __lasx_vext2xv_hu_bu(q0_org);
621  q1_org_h = __lasx_vext2xv_hu_bu(q1_org);
622 
623  p2_asub_p0 = __lasx_xvabsd_bu(p2_org, p0_org);
624  is_less_than_beta = __lasx_xvslt_bu(p2_asub_p0, beta);
625  is_less_than_beta = is_less_than_beta & less_alpha_shift2_add2;
626  negate_is_less_than_beta = __lasx_xvxori_b(is_less_than_beta, 0xff);
627  is_less_than_beta = is_less_than_beta & is_less_than;
628  negate_is_less_than_beta = negate_is_less_than_beta & is_less_than;
629 
630  /* combine and store */
631  if (__lasx_xbnz_v(is_less_than_beta)) {
632  __m256i p2_org_h, p3_org_h, p1_h, p2_h;
633  __m256i p3_org = __lasx_xvldx(src, -img_width_2x);
634 
635  p2_org_h = __lasx_vext2xv_hu_bu(p2_org);
636  p3_org_h = __lasx_vext2xv_hu_bu(p3_org);
637 
638  AVC_LPF_P0P1P2_OR_Q0Q1Q2(p3_org_h, p0_org_h, q0_org_h, p1_org_h,
639  p2_org_h, q1_org_h, p0_h, p1_h, p2_h);
640 
641  p0_h = __lasx_xvpickev_b(p0_h, p0_h);
642  p0_h = __lasx_xvpermi_d(p0_h, 0xd8);
643  DUP2_ARG2(__lasx_xvpickev_b, p1_h, p1_h, p2_h, p2_h, p1_h, p2_h);
644  DUP2_ARG2(__lasx_xvpermi_d, p1_h, 0xd8, p2_h, 0xd8, p1_h, p2_h);
645  p0_org = __lasx_xvbitsel_v(p0_org, p0_h, is_less_than_beta);
646  p1_org = __lasx_xvbitsel_v(p1_org, p1_h, is_less_than_beta);
647  p2_org = __lasx_xvbitsel_v(p2_org, p2_h, is_less_than_beta);
648 
649  __lasx_xvst(p1_org, src, 0);
650  __lasx_xvst(p2_org, src - img_width, 0);
651  }
652 
653  AVC_LPF_P0_OR_Q0(p0_org_h, q1_org_h, p1_org_h, p0_h);
654  /* combine */
655  p0_h = __lasx_xvpickev_b(p0_h, p0_h);
656  p0_h = __lasx_xvpermi_d(p0_h, 0xd8);
657  p0_org = __lasx_xvbitsel_v(p0_org, p0_h, negate_is_less_than_beta);
658  __lasx_xvst(p0_org, data - img_width, 0);
659 
660  /* if (tmpFlag && (unsigned)ABS(q2-q0) < thresholds->beta_in) */
661  q2_asub_q0 = __lasx_xvabsd_bu(q2_org, q0_org);
662  is_less_than_beta = __lasx_xvslt_bu(q2_asub_q0, beta);
663  is_less_than_beta = is_less_than_beta & less_alpha_shift2_add2;
664  negate_is_less_than_beta = __lasx_xvxori_b(is_less_than_beta, 0xff);
665  is_less_than_beta = is_less_than_beta & is_less_than;
666  negate_is_less_than_beta = negate_is_less_than_beta & is_less_than;
667 
668  /* combine and store */
669  if (__lasx_xbnz_v(is_less_than_beta)) {
670  __m256i q2_org_h, q3_org_h, q1_h, q2_h;
671  __m256i q3_org = __lasx_xvldx(data, img_width_2x + img_width);
672 
673  q2_org_h = __lasx_vext2xv_hu_bu(q2_org);
674  q3_org_h = __lasx_vext2xv_hu_bu(q3_org);
675 
676  AVC_LPF_P0P1P2_OR_Q0Q1Q2(q3_org_h, q0_org_h, p0_org_h, q1_org_h,
677  q2_org_h, p1_org_h, q0_h, q1_h, q2_h);
678 
679  q0_h = __lasx_xvpickev_b(q0_h, q0_h);
680  q0_h = __lasx_xvpermi_d(q0_h, 0xd8);
681  DUP2_ARG2(__lasx_xvpickev_b, q1_h, q1_h, q2_h, q2_h, q1_h, q2_h);
682  DUP2_ARG2(__lasx_xvpermi_d, q1_h, 0xd8, q2_h, 0xd8, q1_h, q2_h);
683  q0_org = __lasx_xvbitsel_v(q0_org, q0_h, is_less_than_beta);
684  q1_org = __lasx_xvbitsel_v(q1_org, q1_h, is_less_than_beta);
685  q2_org = __lasx_xvbitsel_v(q2_org, q2_h, is_less_than_beta);
686 
687  __lasx_xvst(q1_org, data + img_width, 0);
688  __lasx_xvst(q2_org, data + img_width_2x, 0);
689  }
690 
691  AVC_LPF_P0_OR_Q0(q0_org_h, p1_org_h, q1_org_h, q0_h);
692 
693  /* combine */
694  q0_h = __lasx_xvpickev_b(q0_h, q0_h);
695  q0_h = __lasx_xvpermi_d(q0_h, 0xd8);
696  q0_org = __lasx_xvbitsel_v(q0_org, q0_h, negate_is_less_than_beta);
697 
698  __lasx_xvst(q0_org, data, 0);
699  }
700 }
701 
702 void ff_h264_add_pixels4_8_lasx(uint8_t *_dst, int16_t *_src, int stride)
703 {
704  __m256i src0, dst0, dst1, dst2, dst3, zero;
705  __m256i tmp0, tmp1;
706  uint8_t* _dst1 = _dst + stride;
707  uint8_t* _dst2 = _dst1 + stride;
708  uint8_t* _dst3 = _dst2 + stride;
709 
710  src0 = __lasx_xvld(_src, 0);
711  dst0 = __lasx_xvldrepl_w(_dst, 0);
712  dst1 = __lasx_xvldrepl_w(_dst1, 0);
713  dst2 = __lasx_xvldrepl_w(_dst2, 0);
714  dst3 = __lasx_xvldrepl_w(_dst3, 0);
715  tmp0 = __lasx_xvilvl_w(dst1, dst0);
716  tmp1 = __lasx_xvilvl_w(dst3, dst2);
717  dst0 = __lasx_xvilvl_d(tmp1, tmp0);
718  tmp0 = __lasx_vext2xv_hu_bu(dst0);
719  zero = __lasx_xvldi(0);
720  tmp1 = __lasx_xvadd_h(src0, tmp0);
721  dst0 = __lasx_xvpickev_b(tmp1, tmp1);
722  __lasx_xvstelm_w(dst0, _dst, 0, 0);
723  __lasx_xvstelm_w(dst0, _dst1, 0, 1);
724  __lasx_xvstelm_w(dst0, _dst2, 0, 4);
725  __lasx_xvstelm_w(dst0, _dst3, 0, 5);
726  __lasx_xvst(zero, _src, 0);
727 }
728 
729 void ff_h264_add_pixels8_8_lasx(uint8_t *_dst, int16_t *_src, int stride)
730 {
731  __m256i src0, src1, src2, src3;
732  __m256i dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
733  __m256i tmp0, tmp1, tmp2, tmp3;
734  __m256i zero = __lasx_xvldi(0);
735  uint8_t *_dst1 = _dst + stride;
736  uint8_t *_dst2 = _dst1 + stride;
737  uint8_t *_dst3 = _dst2 + stride;
738  uint8_t *_dst4 = _dst3 + stride;
739  uint8_t *_dst5 = _dst4 + stride;
740  uint8_t *_dst6 = _dst5 + stride;
741  uint8_t *_dst7 = _dst6 + stride;
742 
743  src0 = __lasx_xvld(_src, 0);
744  src1 = __lasx_xvld(_src, 32);
745  src2 = __lasx_xvld(_src, 64);
746  src3 = __lasx_xvld(_src, 96);
747  dst0 = __lasx_xvldrepl_d(_dst, 0);
748  dst1 = __lasx_xvldrepl_d(_dst1, 0);
749  dst2 = __lasx_xvldrepl_d(_dst2, 0);
750  dst3 = __lasx_xvldrepl_d(_dst3, 0);
751  dst4 = __lasx_xvldrepl_d(_dst4, 0);
752  dst5 = __lasx_xvldrepl_d(_dst5, 0);
753  dst6 = __lasx_xvldrepl_d(_dst6, 0);
754  dst7 = __lasx_xvldrepl_d(_dst7, 0);
755  tmp0 = __lasx_xvilvl_d(dst1, dst0);
756  tmp1 = __lasx_xvilvl_d(dst3, dst2);
757  tmp2 = __lasx_xvilvl_d(dst5, dst4);
758  tmp3 = __lasx_xvilvl_d(dst7, dst6);
759  dst0 = __lasx_vext2xv_hu_bu(tmp0);
760  dst1 = __lasx_vext2xv_hu_bu(tmp1);
761  dst1 = __lasx_vext2xv_hu_bu(tmp1);
762  dst2 = __lasx_vext2xv_hu_bu(tmp2);
763  dst3 = __lasx_vext2xv_hu_bu(tmp3);
764  tmp0 = __lasx_xvadd_h(src0, dst0);
765  tmp1 = __lasx_xvadd_h(src1, dst1);
766  tmp2 = __lasx_xvadd_h(src2, dst2);
767  tmp3 = __lasx_xvadd_h(src3, dst3);
768  dst1 = __lasx_xvpickev_b(tmp1, tmp0);
769  dst2 = __lasx_xvpickev_b(tmp3, tmp2);
770  __lasx_xvst(zero, _src, 0);
771  __lasx_xvst(zero, _src, 32);
772  __lasx_xvst(zero, _src, 64);
773  __lasx_xvst(zero, _src, 96);
774  __lasx_xvstelm_d(dst1, _dst, 0, 0);
775  __lasx_xvstelm_d(dst1, _dst1, 0, 2);
776  __lasx_xvstelm_d(dst1, _dst2, 0, 1);
777  __lasx_xvstelm_d(dst1, _dst3, 0, 3);
778  __lasx_xvstelm_d(dst2, _dst4, 0, 0);
779  __lasx_xvstelm_d(dst2, _dst5, 0, 2);
780  __lasx_xvstelm_d(dst2, _dst6, 0, 1);
781  __lasx_xvstelm_d(dst2, _dst7, 0, 3);
782 }
AVC_LPF_P1_OR_Q1
#define AVC_LPF_P1_OR_Q1(p0_or_q0_org_in, q0_or_p0_org_in, p1_or_q1_org_in, p2_or_q2_org_in, neg_tc_in, tc_in, p1_or_q1_out)
Definition: h264dsp_lasx.c:28
src1
const pixel * src1
Definition: h264pred_template.c:421
data
const char data[16]
Definition: mxf.c:148
DUP2_ARG2
#define DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:58
AVC_LPF_P0_OR_Q0
#define AVC_LPF_P0_OR_Q0(p0_or_q0_org_in, q1_or_p1_org_in, p1_or_q1_org_in, p0_or_q0_out)
Definition: h264dsp_lasx.c:395
DUP4_ARG2
#define DUP4_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:76
h264dsp_loongarch.h
ff_h264_h_lpf_luma_intra_8_lasx
void ff_h264_h_lpf_luma_intra_8_lasx(uint8_t *data, ptrdiff_t img_width, int alpha_in, int beta_in)
Definition: h264dsp_lasx.c:406
ff_h264_v_lpf_luma_intra_8_lasx
void ff_h264_v_lpf_luma_intra_8_lasx(uint8_t *data, ptrdiff_t img_width, int alpha_in, int beta_in)
Definition: h264dsp_lasx.c:582
ff_h264_add_pixels8_8_lasx
void ff_h264_add_pixels8_8_lasx(uint8_t *_dst, int16_t *_src, int stride)
Definition: h264dsp_lasx.c:729
AVC_LPF_P0P1P2_OR_Q0Q1Q2
#define AVC_LPF_P0P1P2_OR_Q0Q1Q2(p3_or_q3_org_in, p0_or_q0_org_in, q3_or_p3_org_in, p1_or_q1_org_in, p2_or_q2_org_in, q1_or_p1_org_in, p0_or_q0_out, p1_or_q1_out, p2_or_q2_out)
Definition: h264dsp_lasx.c:366
src2
const pixel * src2
Definition: h264pred_template.c:422
stride
#define stride
Definition: h264pred_template.c:537
ff_h264_v_lpf_luma_8_lasx
void ff_h264_v_lpf_luma_8_lasx(uint8_t *data, ptrdiff_t img_width, int alpha_in, int beta_in, int8_t *tc)
Definition: h264dsp_lasx.c:244
AVC_LPF_P0Q0
#define AVC_LPF_P0Q0(q0_or_p0_org_in, p0_or_q0_org_in, p1_or_q1_org_in, q1_or_p1_org_in, neg_threshold_in, threshold_in, p0_or_q0_out, q0_or_p0_out)
Definition: h264dsp_lasx.c:43
src0
const pixel *const src0
Definition: h264pred_template.c:420
tc
#define tc
Definition: regdef.h:69
zero
#define zero
Definition: regdef.h:64
ff_h264_add_pixels4_8_lasx
void ff_h264_add_pixels4_8_lasx(uint8_t *_dst, int16_t *_src, int stride)
Definition: h264dsp_lasx.c:702
loongson_intrinsics.h
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
ff_h264_h_lpf_luma_8_lasx
void ff_h264_h_lpf_luma_8_lasx(uint8_t *data, ptrdiff_t img_width, int alpha_in, int beta_in, int8_t *tc)
Definition: h264dsp_lasx.c:67
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
DUP4_ARG3
#define DUP4_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _IN8, _IN9, _IN10, _IN11, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:83