#include "libavcodec/wasm/hevc/idct.h"
#include <wasm_simd128.h>
#include "libavutil/mem_internal.h"

Macros
#define	tr_4x4_8(in0, in1, in2, in3, dst0, dst1, dst2, dst3, trans, half0, half1)

#define	tr_8x4(src0, src1, half0, half1, trans, shift)

#define	load16(x1, x3, x2, in0, in1, in2, in3)

#define	bufferfly(e, o, p, m)

#define	sum_sub(out, in0, in1, operation, half) out = wasm_i32x4_ ## operation (out, wasm_i32x4_extmul_ ## half ## _i16x8(in0, in1));

#define	add_member(in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, op5, op6, op7, half)

#define	butterfly16(in0, in1, in2, in3, in4, in5, in6, in7)

#define	add_member32(in, t0, t1, t2, t3, op0, op1, op2, op3, half)

#define	butterfly32(in0, in1, in2, in3, out)

Functions
static void	transpose_4x8h (v128_t *src)

static void	transpose_8x8h (v128_t *src)

static void	tr_4x4 (v128_t src, v128_t trans, int shift)

static void	idct_4x4 (int16_t *coeffs, int bit_depth)

void	ff_hevc_idct_4x4_8_simd128 (int16_t *coeffs, int col_limit)

void	ff_hevc_idct_4x4_10_simd128 (int16_t *coeffs, int col_limit)

static void	shift_narrow_low (v128_t src, v128_t *dst, v128_t add, int shift)

static void	shift_narrow_high (v128_t src, v128_t *dst, v128_t add, int shift)

static void	idct_8x8 (int16_t *coeffs, int bit_depth)

void	ff_hevc_idct_8x8_8_simd128 (int16_t *coeffs, int col_limit)

void	ff_hevc_idct_8x8_10_simd128 (int16_t *coeffs, int col_limit)

static void	tr16_8x4 (v128_t in0, v128_t in1, v128_t in2, v128_t in3, const v128_t trans, char sp, int offset)

static void	scale (v128_t out0, v128_t out1, v128_t out2, v128_t out3, v128_t in0, v128_t in1, v128_t in2, v128_t in3, v128_t in4, v128_t in5, v128_t in6, v128_t in7, int shift)

static void	transpose16_4x4_2 (v128_t r0, v128_t r1, v128_t r2, v128_t r3)

static void	store16 (v128_t in0, v128_t in1, v128_t in2, v128_t in3, char x1, char x3, int x1_step, int x3_step)

static void	store_to_stack (char *sp, int off1, int off2, v128_t in0, v128_t in2, v128_t in4, v128_t in6, v128_t in7, v128_t in5, v128_t in3, v128_t in1)

static void	tr_16x4 (char src, char buf, char *sp, int shift, int offset, int step)

static void	idct_16x16 (char *coeffs, int bit_depth)

void	ff_hevc_idct_16x16_8_simd128 (int16_t *coeffs, int col_limit)

void	ff_hevc_idct_16x16_10_simd128 (int16_t *coeffs, int col_limit)

static void	tr_32x4 (char x5, char x11, char *sp, int shift)

static void	idct_32x32 (char *coeffs, int bit_depth)

void	ff_hevc_idct_32x32_8_simd128 (int16_t *coeffs, int col_limit)

void	ff_hevc_idct_32x32_10_simd128 (int16_t *coeffs, int col_limit)

Variables
static const int8_t	transform []

Macro Definition Documentation

◆ tr_4x4_8

#define tr_4x4_8	(	in0,
		in1,
		in2,
		in3,
		dst0,
		dst1,
		dst2,
		dst3,
		trans,
		half0,
		half1
	)

Value:

    do {                                                                            \
        v128_t e0, e1, o0, o1;                                                      \
        v128_t tmp[4];                                                              \
                                                                                    \
        e0 = wasm_i32x4_extmul_ ## half0 ## _i16x8(in0, trans[0]);                  \
        e1 = e0;                                                                    \
        o0 = wasm_i32x4_extmul_ ## half0 ## _i16x8(in1, trans[1]);                  \
        o1 = wasm_i32x4_extmul_ ## half0 ## _i16x8(in1, trans[3]);                  \
                                                                                    \
        tmp[0] = wasm_i32x4_extmul_ ## half1 ## _i16x8(in2, trans[0]);              \
        tmp[1] = wasm_i32x4_extmul_ ## half1 ## _i16x8(in2, trans[0]);              \
        tmp[2] = wasm_i32x4_extmul_ ## half1 ## _i16x8(in3, trans[3]);              \
        tmp[3] = wasm_i32x4_extmul_ ## half1 ## _i16x8(in3, trans[1]);              \
        e0 = wasm_i32x4_add(e0, tmp[0]);                                            \
        e1 = wasm_i32x4_sub(e1, tmp[1]);                                            \
        o0 = wasm_i32x4_add(o0, tmp[2]);                                            \
        o1 = wasm_i32x4_sub(o1, tmp[3]);                                            \
        dst0 = wasm_i32x4_add(e0, o0);                                              \
        dst1 = wasm_i32x4_add(e1, o1);                                              \
        dst2 = wasm_i32x4_sub(e1, o1);                                              \
        dst3 = wasm_i32x4_sub(e0, o0);                                              \
    } while (0)

Definition at line 142 of file idct.c.

◆ tr_8x4

#define tr_8x4	(	src0,
		src1,
		half0,
		half1,
		trans,
		shift
	)

Definition at line 166 of file idct.c.

◆ load16

#define load16	(	x1,
		x3,
		x2,
		in0,
		in1,
		in2,
		in3
	)

Value:

    in0 = wasm_v128_load64_zero(x1);            \
    in0 = wasm_v128_load64_lane(x3, in0, 1);    \
    x1 += x2;                                   \
    x3 += x2;                                   \
    in1 = wasm_v128_load64_zero(x1);            \
    in1 = wasm_v128_load64_lane(x3, in1, 1);    \
    x1 += x2;                                   \
    x3 += x2;                                   \
    in2 = wasm_v128_load64_zero(x1);            \
    in2 = wasm_v128_load64_lane(x3, in2, 1);    \
    x1 += x2;                                   \
    x3 += x2;                                   \
    in3 = wasm_v128_load64_zero(x1);            \
    in3 = wasm_v128_load64_lane(x3, in3, 1);    \
    x1 += x2;                                   \
    x3 += x2;                                   \

Definition at line 264 of file idct.c.

◆ bufferfly

#define bufferfly	(	e,
		o,
		p,
		m
	)

Value:

p = wasm_i32x4_add(e, o); \

m = wasm_i32x4_sub(e, o); \

Definition at line 282 of file idct.c.

◆ sum_sub

#define sum_sub	(	out,
		in0,
		in1,
		operation,
		half
	)	out = wasm_i32x4_ ## operation (out, wasm_i32x4_extmul_ ## half ## _i16x8(in0, in1));

Definition at line 436 of file idct.c.

◆ add_member

#define add_member	(	in,
		t0,
		t1,
		t2,
		t3,
		t4,
		t5,
		t6,
		t7,
		op0,
		op1,
		op2,
		op3,
		op4,
		op5,
		op6,
		op7,
		half
	)

Value:

    do {                                \
        sum_sub(v21, in, t0, op0, half) \
        sum_sub(v22, in, t1, op1, half) \
        sum_sub(v23, in, t2, op2, half) \
        sum_sub(v24, in, t3, op3, half) \
        sum_sub(v25, in, t4, op4, half) \
        sum_sub(v26, in, t5, op5, half) \
        sum_sub(v27, in, t6, op6, half) \
        sum_sub(v28, in, t7, op7, half) \
    } while (0)

Definition at line 439 of file idct.c.

◆ butterfly16

#define butterfly16	(	in0,
		in1,
		in2,
		in3,
		in4,
		in5,
		in6,
		in7
	)

Value:

    do {                                                    \
        v20 = wasm_i32x4_add(in0, in1);                     \
        in0 = wasm_i32x4_sub(in0, in1);                     \
        in1 = wasm_i32x4_add(in2, in3);                     \
        in2 = wasm_i32x4_sub(in2, in3);                     \
        in3 = wasm_i32x4_add(in4, in5);                     \
        in4 = wasm_i32x4_sub(in4, in5);                     \
        in5 = wasm_i32x4_add(in6, in7);                     \
        in6 = wasm_i32x4_sub(in6, in7);                     \
    } while (0)

Definition at line 451 of file idct.c.

◆ add_member32

#define add_member32	(	in,
		t0,
		t1,
		t2,
		t3,
		op0,
		op1,
		op2,
		op3,
		half
	)

Value:

    do { \
        sum_sub(v24, in, t0, op0, half) \
        sum_sub(v25, in, t1, op1, half) \
        sum_sub(v26, in, t2, op2, half) \
        sum_sub(v27, in, t3, op3, half) \
    } while (0)

Definition at line 606 of file idct.c.

◆ butterfly32

#define butterfly32	(	in0,
		in1,
		in2,
		in3,
		out
	)

Value:

    do {                                     \
        out = wasm_i32x4_add(in0, in1);      \
        in0 = wasm_i32x4_sub(in0, in1);      \
        in1 = wasm_i32x4_add(in2, in3);      \
        in2 = wasm_i32x4_sub(in2, in3);      \
    } while (0)

Definition at line 614 of file idct.c.

Function Documentation

◆ transpose_4x8h()

static void transpose_4x8h ( v128_t * src )

inlinestatic

Definition at line 34 of file idct.c.

Referenced by idct_4x4(), and transpose_8x8h().

◆ transpose_8x8h()

static void transpose_8x8h ( v128_t * src )

inlinestatic

Definition at line 47 of file idct.c.

Referenced by idct_8x8().

◆ tr_4x4()

static void tr_4x4	(	v128_t *	src,
		v128_t *	trans,
		int	shift
	)

inlinestatic

Definition at line 53 of file idct.c.

Referenced by idct_4x4().

◆ idct_4x4()

static void idct_4x4	(	int16_t *	coeffs,
		int	bit_depth
	)

static

Definition at line 91 of file idct.c.

Referenced by ff_hevc_idct_4x4_10_simd128(), and ff_hevc_idct_4x4_8_simd128().

◆ ff_hevc_idct_4x4_8_simd128()

void ff_hevc_idct_4x4_8_simd128	(	int16_t *	coeffs,
		int	col_limit
	)

Definition at line 118 of file idct.c.

Referenced by ff_hevc_dsp_init_wasm().

◆ ff_hevc_idct_4x4_10_simd128()

void ff_hevc_idct_4x4_10_simd128	(	int16_t *	coeffs,
		int	col_limit
	)

Definition at line 123 of file idct.c.

Referenced by ff_hevc_dsp_init_wasm().

◆ shift_narrow_low()

static void shift_narrow_low	(	v128_t	src,
		v128_t *	dst,
		v128_t	add,
		int	shift
	)

inlinestatic

Definition at line 128 of file idct.c.

◆ shift_narrow_high()

static void shift_narrow_high	(	v128_t	src,
		v128_t *	dst,
		v128_t	add,
		int	shift
	)

inlinestatic

Definition at line 135 of file idct.c.

◆ idct_8x8()

static void idct_8x8	(	int16_t *	coeffs,
		int	bit_depth
	)

static

Definition at line 210 of file idct.c.

Referenced by ff_hevc_idct_8x8_10_simd128(), and ff_hevc_idct_8x8_8_simd128().

◆ ff_hevc_idct_8x8_8_simd128()

void ff_hevc_idct_8x8_8_simd128	(	int16_t *	coeffs,
		int	col_limit
	)

Definition at line 254 of file idct.c.

Referenced by ff_hevc_dsp_init_wasm().

◆ ff_hevc_idct_8x8_10_simd128()

void ff_hevc_idct_8x8_10_simd128	(	int16_t *	coeffs,
		int	col_limit
	)

Definition at line 259 of file idct.c.

Referenced by ff_hevc_dsp_init_wasm().

◆ tr16_8x4()

static void tr16_8x4	(	v128_t	in0,
		v128_t	in1,
		v128_t	in2,
		v128_t	in3,
		const v128_t *	trans,
		char *	sp,
		int	offset
	)

static

Definition at line 286 of file idct.c.

Referenced by tr_16x4().

◆ scale()

static void scale	(	v128_t *	out0,
		v128_t *	out1,
		v128_t *	out2,
		v128_t *	out3,
		v128_t	in0,
		v128_t	in1,
		v128_t	in2,
		v128_t	in3,
		v128_t	in4,
		v128_t	in5,
		v128_t	in6,
		v128_t	in7,
		int	shift
	)

static

Definition at line 328 of file idct.c.

Referenced by tr_16x4(), and tr_32x4().

◆ transpose16_4x4_2()

static void transpose16_4x4_2	(	v128_t *	r0,
		v128_t *	r1,
		v128_t *	r2,
		v128_t *	r3
	)

static

Definition at line 359 of file idct.c.

Referenced by tr_16x4(), and tr_32x4().

◆ store16()

static void store16	(	v128_t	in0,
		v128_t	in1,
		v128_t	in2,
		v128_t	in3,
		char *	x1,
		char *	x3,
		int	x1_step,
		int	x3_step
	)

static

Definition at line 390 of file idct.c.

Referenced by tr_16x4(), and tr_32x4().

◆ store_to_stack()

static void store_to_stack	(	char *	sp,
		int	off1,
		int	off2,
		v128_t	in0,
		v128_t	in2,
		v128_t	in4,
		v128_t	in6,
		v128_t	in7,
		v128_t	in5,
		v128_t	in3,
		v128_t	in1
	)

static

Definition at line 413 of file idct.c.

Referenced by tr_16x4().

◆ tr_16x4()

static void tr_16x4	(	char *	src,
		char *	buf,
		char *	sp,
		int	shift,
		int	offset,
		int	step
	)

static

Definition at line 463 of file idct.c.

Referenced by idct_16x16(), and tr_32x4().

◆ idct_16x16()

static void idct_16x16	(	char *	coeffs,
		int	bit_depth
	)

static

Definition at line 579 of file idct.c.

Referenced by ff_hevc_idct_16x16_10_simd128(), and ff_hevc_idct_16x16_8_simd128().

◆ ff_hevc_idct_16x16_8_simd128()

void ff_hevc_idct_16x16_8_simd128	(	int16_t *	coeffs,
		int	col_limit
	)

Definition at line 596 of file idct.c.

Referenced by ff_hevc_dsp_init_wasm().

◆ ff_hevc_idct_16x16_10_simd128()

void ff_hevc_idct_16x16_10_simd128	(	int16_t *	coeffs,
		int	col_limit
	)

Definition at line 601 of file idct.c.

Referenced by ff_hevc_dsp_init_wasm().

◆ tr_32x4()

static void tr_32x4	(	char *	x5,
		char *	x11,
		char *	sp,
		int	shift
	)

static

Definition at line 622 of file idct.c.

Referenced by idct_32x32().

◆ idct_32x32()

static void idct_32x32	(	char *	coeffs,
		int	bit_depth
	)

static

Definition at line 843 of file idct.c.

Referenced by ff_hevc_idct_32x32_10_simd128(), and ff_hevc_idct_32x32_8_simd128().

◆ ff_hevc_idct_32x32_8_simd128()

void ff_hevc_idct_32x32_8_simd128	(	int16_t *	coeffs,
		int	col_limit
	)

Definition at line 861 of file idct.c.

Referenced by ff_hevc_dsp_init_wasm().

◆ ff_hevc_idct_32x32_10_simd128()

void ff_hevc_idct_32x32_10_simd128	(	int16_t *	coeffs,
		int	col_limit
	)

Definition at line 866 of file idct.c.

Referenced by ff_hevc_dsp_init_wasm().

Variable Documentation

◆ transform

const int8_t transform[]

static

Initial value:

= {
    64, 83, 64, 36, 89, 75, 50, 18,
    90, 87, 80, 70, 57, 43, 25, 9,
    90, 90, 88, 85, 82, 78, 73, 67,
    61, 54, 46, 38, 31, 22, 13, 4,
}

Definition at line 27 of file idct.c.

Referenced by idct_4x4(), idct_8x8(), tr_16x4(), and tr_32x4().

Macros

Functions

Variables

Macro Definition Documentation

◆ tr_4x4_8

◆ tr_8x4

◆ load16

◆ bufferfly

◆ sum_sub

◆ add_member

◆ butterfly16

◆ add_member32

◆ butterfly32

Function Documentation

◆ transpose_4x8h()

◆ transpose_8x8h()

◆ tr_4x4()

◆ idct_4x4()

◆ ff_hevc_idct_4x4_8_simd128()

◆ ff_hevc_idct_4x4_10_simd128()

◆ shift_narrow_low()

◆ shift_narrow_high()

◆ idct_8x8()

◆ ff_hevc_idct_8x8_8_simd128()

◆ ff_hevc_idct_8x8_10_simd128()

◆ tr16_8x4()

◆ scale()

◆ transpose16_4x4_2()

◆ store16()

◆ store_to_stack()

◆ tr_16x4()

◆ idct_16x16()

◆ ff_hevc_idct_16x16_8_simd128()

◆ ff_hevc_idct_16x16_10_simd128()

◆ tr_32x4()

◆ idct_32x32()

◆ ff_hevc_idct_32x32_8_simd128()

◆ ff_hevc_idct_32x32_10_simd128()

Variable Documentation

◆ transform