From 355e980101d68f0381b1858a7ba418afd0b6818a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9l=C3=A8ne=20Corbineau?= <selene.corbineau@ens.fr>
Date: Tue, 11 Feb 2025 14:14:22 +0100
Subject: [PATCH] BNLMS/VAD works with fixed step size

---
 pw_plugin_II/aec.cc  | 29 +++++++++++++++++---
 pw_plugin_II/aec.h   | 15 ++---------
 pw_plugin_II/fdaf.cc | 64 +++++++++++++++++++++++++++++++++-----------
 pw_plugin_II/fdaf.h  | 17 +++++-------
 pw_plugin_II/main.cc |  4 +--
 5 files changed, 86 insertions(+), 43 deletions(-)
diff --git a/pw_plugin_II/aec.cc b/pw_plugin_II/aec.cc
index f717d0f..3ad2281 100644
--- a/pw_plugin_II/aec.cc
+++ b/pw_plugin_II/aec.cc
@@ -37,12 +37,35 @@ void AEC::do_buffer(float* res, float* mic, float* hp, bool maybevoice) {
 
 	std::memcpy(fd.input_buffer, mic, 1024*sizeof(float));
 
-	fd.do_step(true);
+	if(status == LEARNING) {
+
+		fd.do_step(true, 0.1f); // mu_max = 0.5f
+		if(fd.echoratio > .5f && fd.costheta < -.75f) {
+			status = ONLINE;
+			std::puts("Switched to online status");
+		}
+
+	} else {
+		fd.do_step(!maybevoice || fd.echoratio > .8f);
+		// Learn if there is no near-end activity OR we have mic activity
+		// but it is explained by the echo
+
+		// We could have more stringent parameters...
+		if(fd.costheta > -.5f) {
+			if(cnt++ == 5) {
+				cnt = 0; status = LEARNING;
+				std::puts("Fell back to LEARNING");
+			}
+		} else {cnt = 0;}
+	}
+
+	std::printf("echoratio = %f, adj. cos(theta) = %f\n",
+		10*std::log10(fd.echoratio), fd.costheta);
 	std::memcpy(res, fd.output_buffer, 1024*sizeof(float));
 }
 
 bool AEC::inject_noise() {
-	return true;//status == LOST; //|| status == LEARNING;
+	return status == LOST || status == LEARNING;
 }
 
 void AEC::find_delay(float mic, float hp) {
@@ -85,7 +108,7 @@ void AEC::find_delay(float mic, float hp) {
 		if(max_xcor > 4.0f * low_max_xcor) {
 			std::printf("Found delay at %d chunks\n", argmax_xcor);
 			status = LEARNING;
-			current_delay = argmax_xcor * 8 ; // Safety margin
+			current_delay = argmax_xcor * 8;
 
 			hp_acc = 0.0f;
 			mic_acc = 0.0f;
diff --git a/pw_plugin_II/aec.h b/pw_plugin_II/aec.h
index b67fa14..0a74cc5 100644
--- a/pw_plugin_II/aec.h
+++ b/pw_plugin_II/aec.h
@@ -28,19 +28,8 @@ public:
 
 	int current_delay = 0; // We expect sudden drifts
 
-	enum {LOST, LEARNING, LOCKED} status = LOST; 
-
-	// 20ms active echo length should be more than enough
-	// The back-transform should have length 1024 
-	alignas(16) float h_DFT[4096]; // 2048 complex coefficients
-
-	alignas(16) float hp_DFT[4096]; // 2048 complex coefficients
-	alignas(16) float hp_work_buffer[2048]; // 2048 real coefficients
-
-	alignas(16) float mic_DFT[4096];
-
-	float mu = .5f/1024;
-	float delta = 1.0e-8f;
+	enum {LOST, LEARNING, ONLINE} status = LOST; 
+	int cnt = 0;
 
 	FDAF fd = FDAF(1024);
 
diff --git a/pw_plugin_II/fdaf.cc b/pw_plugin_II/fdaf.cc
index f6e425a..684d382 100644
--- a/pw_plugin_II/fdaf.cc
+++ b/pw_plugin_II/fdaf.cc
@@ -1,4 +1,5 @@
 #include <cstring>
+#include <cmath>
 #include "fdaf.h"
 
 FDAF::FDAF(int b) : block_len(b), buffer(new float[6*2*b + b]) {
@@ -18,45 +19,78 @@ FDAF::FDAF(int b) : block_len(b), buffer(new float[6*2*b + b]) {
 	setup = pffft_new_setup(2*block_len, PFFFT_REAL); // Rn we leak memory..
 }
 
-void FDAF::do_step(bool adapt) {
+void FDAF::do_step(bool adapt, float forcemu) {
+	/* When convolving, we look at the normalization constant which makes
+	the normalized dirac impulse have gain 1. It is 1/(FFT size).
+	*/
+	const float normalize = 1.0f/(2*block_len);
+
 	pffft_transform(setup, internal_aux, scratch[0], scratch[1], PFFFT_FORWARD);
 	pffft_transform(setup, impulse, scratch[1], scratch[2], PFFFT_FORWARD);
 	std::memset(scratch[2], 0, sizeof(float)*(2*block_len));
 
-	pffft_zconvolve_accumulate(setup, scratch[0], scratch[1], scratch[2], -1.0f);
+	pffft_zconvolve_accumulate(setup, scratch[0], scratch[1], scratch[2], -normalize);
 	pffft_transform(setup, scratch[2], internal_out, scratch[1], PFFFT_BACKWARD);
 
 
+	float echopower = 0.0f;
+	float inputpower = 0.0f;
+	float interfpower = 0.0f;
+
 	for(int i = 0; i < block_len; ++i) {
+		echopower  += output_buffer[i] * output_buffer[i];
+		inputpower += input_buffer[i] * input_buffer[i];
 		output_buffer[i] += input_buffer[i];
+
+		interfpower += output_buffer[i] * output_buffer[i];
 	}
 
+	// We have Fresnel relation : interfpower = echopower + inputpower + 2*cos(theta) * echopower * inputpower
+
+	//  With E = (ipow - echopow - inputpow) / (2 * sqrt(echopower * inputpower))
+	//  We fudge cos(theta) like 
+	//  E * sqrt(I1I2)/[sqrt(I1I2) + I2 * .5] + (-1) * I2 * .5/[I2 * .5 + sqrt(I1I2)]
+	// = (E * sqrt(I1/I2) - .5)/(sqrt(I1/I2) + .5)
+	echoratio = echopower/inputpower;
+	float sqrt = std::sqrt(echoratio);
+	float E = (interfpower - inputpower - echopower)/ (2 * std::sqrt(inputpower * echopower) + 1.0e-16);
+
+	costheta = (E * sqrt - .5f) / (sqrt + .5f);
+
 	if(adapt) {
 		// Reset the first part of the output buffer
 		std::memset(internal_out, 0, sizeof(float)*block_len);
 
-		// zreorder here?
+		pffft_transform(setup, internal_out, scratch[1], scratch[2], PFFFT_FORWARD);
 
 		// Take the conjugate of the DFT of the input -> prepare for convolution
-		pffft_transform(setup, internal_out, scratch[1], scratch[2], PFFFT_FORWARD);
-		for(int i = 0; i < 2*block_len; i+=8) {
-			scratch[0][i+4] = -scratch[0][i+4];
-			scratch[0][i+5] = -scratch[0][i+5];
-			scratch[0][i+6] = -scratch[0][i+6];
-			scratch[0][i+7] = -scratch[0][i+7];
+		pffft_zreorder(setup, scratch[0], scratch[2], PFFFT_FORWARD);
+		for(int i = 2; i < 2*block_len; i+=2) {
+			scratch[2][i+1] = -scratch[2][i+1];
 		}
 
+		/* Right now, we do not do frequency binning */
+		float power = 0.0f;
+		for(int i = 0; i < 2*block_len; ++i) {
+			power += internal_aux[i]*internal_aux[i];
+		}
+		power /= 2*block_len;
+		pffft_zreorder(setup, scratch[2], scratch[0], PFFFT_BACKWARD);
+
+		/* Compute (normalized) block gradient estimate */
 		std::memset(scratch[2], 0, sizeof(float)*(2*block_len));
-		pffft_zconvolve_accumulate(setup, scratch[0], scratch[1], scratch[2], 1.0f);
+		pffft_zconvolve_accumulate(setup, scratch[0], scratch[1], scratch[2],
+			normalize/(block_len * (power + delta)));
 		pffft_transform(setup, scratch[2], scratch[0], scratch[1], PFFFT_BACKWARD);
+
 		/* The data we want to convolve is the _beginning_ of the (reversed) 
-			 input buffer and the _end_ of the output buffer. Therefore the correlation result
-		   is at the _end_ of scratch[0]
+			 input buffer and the _end_ of the output buffer.
+			 Therefore the correlation result is at the _end_ of scratch[0]
 		*/
+		float act_mu = forcemu > 0.0f ? forcemu : mu;
 		for(int i = 0; i < block_len; ++i) {
-			impulse_test[i] += mu * (scratch[0][i+block_len] - impulse_test[i]);
-			//impulse[i] -= mu * scratch[0][i+block_len];
-			//std::printf("%e\n", impulse[i]);
+			// mu is constant... for now
+			impulse[i+block_len] += act_mu * scratch[0][i+block_len];
 		}
 	}	
 
diff --git a/pw_plugin_II/fdaf.h b/pw_plugin_II/fdaf.h
index 0efb073..682fb9d 100644
--- a/pw_plugin_II/fdaf.h
+++ b/pw_plugin_II/fdaf.h
@@ -7,7 +7,7 @@ class FDAF {
 public:
 	FDAF(int blen = 1024);
 
-	void do_step(bool adapt=false);
+	void do_step(bool adapt=false, float forcemu=-1.0f);
 
 	const int block_len;
 	float* aux_in; // HP command in
@@ -15,9 +15,12 @@ public:
 	float* input_buffer;
 	float* output_buffer;
 
-
 	float* impulse; // real, 2*block_len, zero padded
 	float* impulse_test;
+
+	float costheta = 0.0f;
+	float echoratio = 0.0f; // Power ratio, echo vs input
+
 private:
 	void shift_buffers(); // Prepare next step
 	void do_learn();
@@ -25,17 +28,11 @@ private:
 	PFFFT_Setup* setup;
 	std::unique_ptr<float[]> buffer;
 
-
 	float* internal_aux;  // real, 2*block_len
 	float* internal_out; // real, 2*block_len
 
 	float* scratch[3]; // FFT results, 2*block_len length each, times 3
 
-	float mu = 1.0f/1024;
-	float delta = 1e-15;
-
-	/*
-	float  power_decay = 1.0f/1024;
-	float* power_bins; // real, 2*block_len, imag/real counted separatly
-	*/
+	float mu = 0.001f;
+	float delta = 1e-8f;
 };
diff --git a/pw_plugin_II/main.cc b/pw_plugin_II/main.cc
index 8cf7a12..c9c974a 100644
--- a/pw_plugin_II/main.cc
+++ b/pw_plugin_II/main.cc
@@ -77,7 +77,7 @@ static void on_process(void* pro, spa_io_position* position) {
 		}
 		power /= n_samples;
 
-		if(power > 1e-8) {p->early_lock_cnt--;}
+		if(power > 1e-6) {p->early_lock_cnt--;}
 		else {p->early_lock_cnt = 10;}
 
 		if(!p->early_lock_cnt) {
@@ -183,7 +183,7 @@ static void do_report(void* pro, int) {
 	*/
 	std::printf("Impulse response : L = [\n");
 	for(int i = 0; i < 1024; ++i) {
-		std::printf("%e,", p->aec.fd.impulse_test[i]);
+		std::printf("%e,", p->aec.fd.impulse[i+p->aec.fd.block_len]);
 	}
 	std::printf("]\n");