35 #define SAMPLE_FORMAT float
38 #define ctype AVComplexFloat
40 #define TX_TYPE AV_TX_FLOAT_RDFT
44 #define SAMPLE_FORMAT double
47 #define ctype AVComplexDouble
49 #define TX_TYPE AV_TX_DOUBLE_RDFT
54 #define fn3(a,b) a##_##b
55 #define fn2(a,b) fn3(a,b)
56 #define fn(a) fn2(a, SAMPLE_FORMAT)
59 int cur_nb_taps,
const ftype *time)
61 ftype ch_gain, sum = 0;
63 if (
s->ir_norm < 0.f) {
65 }
else if (
s->ir_norm == 0.f) {
66 for (
int i = 0;
i < cur_nb_taps;
i++)
70 ftype ir_norm =
s->ir_norm;
72 for (
int i = 0;
i < cur_nb_taps;
i++)
74 ch_gain = 1. /
POW(sum, 1. / ir_norm);
81 int cur_nb_taps,
int ch,
84 if (ch_gain != 1. ||
s->ir_gain != 1.) {
85 ftype gain = ch_gain *
s->ir_gain;
89 s->fdsp->vector_fmul_scalar(time, time, gain,
FFALIGN(cur_nb_taps, 4));
91 s->fdsp->vector_dmul_scalar(time, time, gain,
FFALIGN(cur_nb_taps, 8));
99 const int coffset = coeff_partition * seg->coeff_size;
100 const int nb_taps =
s->nb_taps[selir];
101 ftype *time = (
ftype *)
s->norm_ir[selir]->extended_data[ch];
102 ftype *tempin = (
ftype *)seg->tempin->extended_data[ch];
103 ftype *tempout = (
ftype *)seg->tempout->extended_data[ch];
105 const int remaining = nb_taps - (seg->input_offset + coeff_partition * seg->part_size);
106 const int size = remaining >= seg->part_size ? seg->part_size : remaining;
108 memset(tempin +
size, 0,
sizeof(*tempin) * (seg->block_size -
size));
109 memcpy(tempin, time + seg->input_offset + coeff_partition * seg->part_size,
110 size *
sizeof(*tempin));
111 seg->ctx_fn(seg->ctx[ch], tempout, tempin,
sizeof(*tempin));
112 memcpy(
coeff + coffset, tempout, seg->coeff_size *
sizeof(*
coeff));
126 if ((nb_samples & 15) == 0 && nb_samples >= 8) {
128 s->fdsp->vector_fmac_scalar(dst,
src, 1.
f, nb_samples);
130 s->fdsp->vector_dmac_scalar(dst,
src, 1.0, nb_samples);
133 for (
int n = 0; n < nb_samples; n++)
141 const ftype *in = (
const ftype *)
s->in->extended_data[ch] + ioffset;
143 const int min_part_size =
s->min_part_size;
144 const int nb_samples =
FFMIN(min_part_size,
out->nb_samples -
offset);
145 const int nb_segments =
s->nb_segments[selir];
146 const float dry_gain =
s->dry_gain;
147 const float wet_gain =
s->wet_gain;
164 if (dry_gain == 1.
f) {
165 memcpy(
src + input_offset, in, nb_samples *
sizeof(*
src));
166 }
else if (min_part_size >= 8) {
168 s->fdsp->vector_fmul_scalar(
src + input_offset, in, dry_gain,
FFALIGN(nb_samples, 4));
170 s->fdsp->vector_dmul_scalar(
src + input_offset, in, dry_gain,
FFALIGN(nb_samples, 8));
174 for (
int n = 0; n < nb_samples; n++)
175 src2[n] = in[n] * dry_gain;
178 output_offset[0] += min_part_size;
179 if (output_offset[0] >= part_size) {
180 output_offset[0] = 0;
184 dst += output_offset[0];
189 memset(sumin, 0,
sizeof(*sumin) * seg->
fft_length);
192 memset(tempin + part_size, 0,
sizeof(*tempin) * (seg->
block_size - part_size));
193 memcpy(tempin,
src,
sizeof(*
src) * part_size);
194 seg->
tx_fn(seg->
tx[ch], blockout, tempin,
sizeof(
ftype));
197 for (
int i = 0;
i < nb_partitions;
i++) {
198 const int input_partition = j;
199 const int coeff_partition =
i;
200 const int coffset = coeff_partition * seg->
coeff_size;
209 s->afirdsp.fcmul_add(sumin, blockout, (
const ftype *)
coeff, part_size);
211 s->afirdsp.dcmul_add(sumin, blockout, (
const ftype *)
coeff, part_size);
218 memcpy(dst, buf, part_size *
sizeof(*dst));
219 memcpy(buf, sumout + part_size, part_size *
sizeof(*buf));
223 if (part_size != min_part_size)
232 if (min_part_size >= 8) {
234 s->fdsp->vector_fmul_scalar(ptr, ptr, wet_gain,
FFALIGN(nb_samples, 4));
236 s->fdsp->vector_dmul_scalar(ptr, ptr, wet_gain,
FFALIGN(nb_samples, 8));
239 for (
int n = 0; n < nb_samples; n++)
247 int min_part_size,
int ch,
int offset,
248 int prev_selir,
int selir)
250 if (
ctx->is_disabled ||
s->prev_is_disabled) {
252 const ftype *xfade0 = (
const ftype *)
s->xfade[0]->extended_data[ch];
253 const ftype *xfade1 = (
const ftype *)
s->xfade[1]->extended_data[ch];
258 if (
ctx->is_disabled && !
s->prev_is_disabled) {
259 memset(
src0, 0, min_part_size *
sizeof(
ftype));
261 for (
int n = 0; n < min_part_size; n++)
262 dst[n] = xfade1[n] *
src0[n] + xfade0[n] * in[n];
263 }
else if (!
ctx->is_disabled &&
s->prev_is_disabled) {
264 memset(
src1, 0, min_part_size *
sizeof(
ftype));
266 for (
int n = 0; n < min_part_size; n++)
267 dst[n] = xfade1[n] * in[n] + xfade0[n] *
src1[n];
269 memcpy(dst, in,
sizeof(
ftype) * min_part_size);
271 }
else if (prev_selir != selir &&
s->loading[ch] != 0) {
272 const ftype *xfade0 = (
const ftype *)
s->xfade[0]->extended_data[ch];
273 const ftype *xfade1 = (
const ftype *)
s->xfade[1]->extended_data[ch];
278 memset(
src0, 0, min_part_size *
sizeof(
ftype));
279 memset(
src1, 0, min_part_size *
sizeof(
ftype));
284 if (
s->loading[ch] >
s->max_offset[selir]) {
285 for (
int n = 0; n < min_part_size; n++)
286 dst[n] = xfade1[n] *
src0[n] + xfade0[n] *
src1[n];
289 memcpy(dst,
src0, min_part_size *
sizeof(
ftype));