From 355e980101d68f0381b1858a7ba418afd0b6818a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9l=C3=A8ne=20Corbineau?= Date: Tue, 11 Feb 2025 14:14:22 +0100 Subject: [PATCH] BNLMS/VAD works with fixed step size --- pw_plugin_II/aec.cc | 29 +++++++++++++++++--- pw_plugin_II/aec.h | 15 ++--------- pw_plugin_II/fdaf.cc | 64 +++++++++++++++++++++++++++++++++----------- pw_plugin_II/fdaf.h | 17 +++++------- pw_plugin_II/main.cc | 4 +-- 5 files changed, 86 insertions(+), 43 deletions(-) diff --git a/pw_plugin_II/aec.cc b/pw_plugin_II/aec.cc index f717d0f..3ad2281 100644 --- a/pw_plugin_II/aec.cc +++ b/pw_plugin_II/aec.cc @@ -37,12 +37,35 @@ void AEC::do_buffer(float* res, float* mic, float* hp, bool maybevoice) { std::memcpy(fd.input_buffer, mic, 1024*sizeof(float)); - fd.do_step(true); + if(status == LEARNING) { + + fd.do_step(true, 0.1f); // mu_max = 0.5f + if(fd.echoratio > .5f && fd.costheta < -.75f) { + status = ONLINE; + std::puts("Switched to online status"); + } + + } else { + fd.do_step(!maybevoice || fd.echoratio > .8f); + // Learn if there is no near-end activity OR we have mic activity + // but it is explained by the echo + + // We could have more stringent parameters... + if(fd.costheta > -.5f) { + if(cnt++ == 5) { + cnt = 0; status = LEARNING; + std::puts("Fell back to LEARNING"); + } + } else {cnt = 0;} + } + + std::printf("echoratio = %f, adj. cos(theta) = %f\n", + 10*std::log10(fd.echoratio), fd.costheta); std::memcpy(res, fd.output_buffer, 1024*sizeof(float)); } bool AEC::inject_noise() { - return true;//status == LOST; //|| status == LEARNING; + return status == LOST || status == LEARNING; } void AEC::find_delay(float mic, float hp) { @@ -85,7 +108,7 @@ void AEC::find_delay(float mic, float hp) { if(max_xcor > 4.0f * low_max_xcor) { std::printf("Found delay at %d chunks\n", argmax_xcor); status = LEARNING; - current_delay = argmax_xcor * 8 ; // Safety margin + current_delay = argmax_xcor * 8; hp_acc = 0.0f; mic_acc = 0.0f; diff --git a/pw_plugin_II/aec.h b/pw_plugin_II/aec.h index b67fa14..0a74cc5 100644 --- a/pw_plugin_II/aec.h +++ b/pw_plugin_II/aec.h @@ -28,19 +28,8 @@ public: int current_delay = 0; // We expect sudden drifts - enum {LOST, LEARNING, LOCKED} status = LOST; - - // 20ms active echo length should be more than enough - // The back-transform should have length 1024 - alignas(16) float h_DFT[4096]; // 2048 complex coefficients - - alignas(16) float hp_DFT[4096]; // 2048 complex coefficients - alignas(16) float hp_work_buffer[2048]; // 2048 real coefficients - - alignas(16) float mic_DFT[4096]; - - float mu = .5f/1024; - float delta = 1.0e-8f; + enum {LOST, LEARNING, ONLINE} status = LOST; + int cnt = 0; FDAF fd = FDAF(1024); diff --git a/pw_plugin_II/fdaf.cc b/pw_plugin_II/fdaf.cc index f6e425a..684d382 100644 --- a/pw_plugin_II/fdaf.cc +++ b/pw_plugin_II/fdaf.cc @@ -1,4 +1,5 @@ #include +#include #include "fdaf.h" FDAF::FDAF(int b) : block_len(b), buffer(new float[6*2*b + b]) { @@ -18,45 +19,78 @@ FDAF::FDAF(int b) : block_len(b), buffer(new float[6*2*b + b]) { setup = pffft_new_setup(2*block_len, PFFFT_REAL); // Rn we leak memory.. } -void FDAF::do_step(bool adapt) { +void FDAF::do_step(bool adapt, float forcemu) { + /* When convolving, we look at the normalization constant which makes + the normalized dirac impulse have gain 1. It is 1/(FFT size). + */ + const float normalize = 1.0f/(2*block_len); + pffft_transform(setup, internal_aux, scratch[0], scratch[1], PFFFT_FORWARD); pffft_transform(setup, impulse, scratch[1], scratch[2], PFFFT_FORWARD); std::memset(scratch[2], 0, sizeof(float)*(2*block_len)); - pffft_zconvolve_accumulate(setup, scratch[0], scratch[1], scratch[2], -1.0f); + pffft_zconvolve_accumulate(setup, scratch[0], scratch[1], scratch[2], -normalize); pffft_transform(setup, scratch[2], internal_out, scratch[1], PFFFT_BACKWARD); + float echopower = 0.0f; + float inputpower = 0.0f; + float interfpower = 0.0f; + for(int i = 0; i < block_len; ++i) { + echopower += output_buffer[i] * output_buffer[i]; + inputpower += input_buffer[i] * input_buffer[i]; output_buffer[i] += input_buffer[i]; + + interfpower += output_buffer[i] * output_buffer[i]; } + // We have Fresnel relation : interfpower = echopower + inputpower + 2*cos(theta) * echopower * inputpower + + // With E = (ipow - echopow - inputpow) / (2 * sqrt(echopower * inputpower)) + // We fudge cos(theta) like + // E * sqrt(I1I2)/[sqrt(I1I2) + I2 * .5] + (-1) * I2 * .5/[I2 * .5 + sqrt(I1I2)] + // = (E * sqrt(I1/I2) - .5)/(sqrt(I1/I2) + .5) + echoratio = echopower/inputpower; + float sqrt = std::sqrt(echoratio); + float E = (interfpower - inputpower - echopower)/ (2 * std::sqrt(inputpower * echopower) + 1.0e-16); + + costheta = (E * sqrt - .5f) / (sqrt + .5f); + if(adapt) { // Reset the first part of the output buffer std::memset(internal_out, 0, sizeof(float)*block_len); - // zreorder here? + pffft_transform(setup, internal_out, scratch[1], scratch[2], PFFFT_FORWARD); // Take the conjugate of the DFT of the input -> prepare for convolution - pffft_transform(setup, internal_out, scratch[1], scratch[2], PFFFT_FORWARD); - for(int i = 0; i < 2*block_len; i+=8) { - scratch[0][i+4] = -scratch[0][i+4]; - scratch[0][i+5] = -scratch[0][i+5]; - scratch[0][i+6] = -scratch[0][i+6]; - scratch[0][i+7] = -scratch[0][i+7]; + pffft_zreorder(setup, scratch[0], scratch[2], PFFFT_FORWARD); + for(int i = 2; i < 2*block_len; i+=2) { + scratch[2][i+1] = -scratch[2][i+1]; } + /* Right now, we do not do frequency binning */ + float power = 0.0f; + for(int i = 0; i < 2*block_len; ++i) { + power += internal_aux[i]*internal_aux[i]; + } + power /= 2*block_len; + pffft_zreorder(setup, scratch[2], scratch[0], PFFFT_BACKWARD); + + /* Compute (normalized) block gradient estimate */ std::memset(scratch[2], 0, sizeof(float)*(2*block_len)); - pffft_zconvolve_accumulate(setup, scratch[0], scratch[1], scratch[2], 1.0f); + pffft_zconvolve_accumulate(setup, scratch[0], scratch[1], scratch[2], + normalize/(block_len * (power + delta))); pffft_transform(setup, scratch[2], scratch[0], scratch[1], PFFFT_BACKWARD); + /* The data we want to convolve is the _beginning_ of the (reversed) - input buffer and the _end_ of the output buffer. Therefore the correlation result - is at the _end_ of scratch[0] + input buffer and the _end_ of the output buffer. + Therefore the correlation result is at the _end_ of scratch[0] */ + float act_mu = forcemu > 0.0f ? forcemu : mu; for(int i = 0; i < block_len; ++i) { - impulse_test[i] += mu * (scratch[0][i+block_len] - impulse_test[i]); - //impulse[i] -= mu * scratch[0][i+block_len]; - //std::printf("%e\n", impulse[i]); + // mu is constant... for now + impulse[i+block_len] += act_mu * scratch[0][i+block_len]; } } diff --git a/pw_plugin_II/fdaf.h b/pw_plugin_II/fdaf.h index 0efb073..682fb9d 100644 --- a/pw_plugin_II/fdaf.h +++ b/pw_plugin_II/fdaf.h @@ -7,7 +7,7 @@ class FDAF { public: FDAF(int blen = 1024); - void do_step(bool adapt=false); + void do_step(bool adapt=false, float forcemu=-1.0f); const int block_len; float* aux_in; // HP command in @@ -15,9 +15,12 @@ public: float* input_buffer; float* output_buffer; - float* impulse; // real, 2*block_len, zero padded float* impulse_test; + + float costheta = 0.0f; + float echoratio = 0.0f; // Power ratio, echo vs input + private: void shift_buffers(); // Prepare next step void do_learn(); @@ -25,17 +28,11 @@ private: PFFFT_Setup* setup; std::unique_ptr buffer; - float* internal_aux; // real, 2*block_len float* internal_out; // real, 2*block_len float* scratch[3]; // FFT results, 2*block_len length each, times 3 - float mu = 1.0f/1024; - float delta = 1e-15; - - /* - float power_decay = 1.0f/1024; - float* power_bins; // real, 2*block_len, imag/real counted separatly - */ + float mu = 0.001f; + float delta = 1e-8f; }; diff --git a/pw_plugin_II/main.cc b/pw_plugin_II/main.cc index 8cf7a12..c9c974a 100644 --- a/pw_plugin_II/main.cc +++ b/pw_plugin_II/main.cc @@ -77,7 +77,7 @@ static void on_process(void* pro, spa_io_position* position) { } power /= n_samples; - if(power > 1e-8) {p->early_lock_cnt--;} + if(power > 1e-6) {p->early_lock_cnt--;} else {p->early_lock_cnt = 10;} if(!p->early_lock_cnt) { @@ -183,7 +183,7 @@ static void do_report(void* pro, int) { */ std::printf("Impulse response : L = [\n"); for(int i = 0; i < 1024; ++i) { - std::printf("%e,", p->aec.fd.impulse_test[i]); + std::printf("%e,", p->aec.fd.impulse[i+p->aec.fd.block_len]); } std::printf("]\n");