On-line delay finding works

Now to choose between NMLS and FDAF for pw_plugin_II
2025-02-05 10:51:49 +01:00 · 2025-02-05 10:51:49 +01:00 · eaad54dcd8
commit eaad54dcd8
parent b9f678c52a
10 changed files with 361 additions and 35 deletions
--- a/pw_plugin_II/Makefile
+++ b/pw_plugin_II/Makefile
@ -4,7 +4,10 @@ CXXFLAGS += -O3 -ffast-math -march=native -std=c++20 -Wall -Wextra \
 .PHONY: build
 build: plug2
-plug2: pickup_cancel.o main.o
+plug2: pickup_cancel.o equalizer.o main.o aec.o
 	$(CXX) $(LDFLAGS) $^ -o $@
-#micplug.out: pickup_cancel.o
+
 main.o: main.cc pickup_cancel.h equalizer.h aec.h
 aec.o: aec.cc aec.h
 pickup_cancel.o: pickup_cancel.cc pickup_cancel.h
 equalizer.o: equalizer.cc equalizer.h
--- a/pw_plugin_II/aec.cc
+++ b/pw_plugin_II/aec.cc
@ -0,0 +1,77 @@
 #include "aec.h"
 #include <cmath>
 #include <cstdio>
 float AEC::do_sample(float mic, float hp, bool maybevoice) {
 	lookback[(lb_index++)&4095u] = hp;
 	if(status == LOST) {
 		find_delay(mic, hp); return 0.0f;
 	} else if(status == LEARNING) {
 		if(!maybevoice) {
 			do_learn(mic, hp);
 		}
 		return 0.0f;
 	}
 }
 bool AEC::inject_noise() {
 	return status == LOST; //|| status == LEARNING;
 }
 void AEC::find_delay(float mic, float hp) {
 	hp_acc += hp;
 	mic_acc += mic;
 	if(++lookback_phase % 8 == 0) {
 		lookback_power[lookback_phase/8 % lookback_power.size()] = hp_acc;
 		for(auto i = 0u; i < weighted_xcorr.size(); ++i) {
 			float y = lookback_power[(lookback_phase / 8  - i) % lookback_power.size()]
 				* mic_acc;
 			if(__builtin_expect(lookback_phase <= 8 * 512, 0)) {
 				weighted_xcorr[i] = y;
 			} else {
 				weighted_xcorr[i]	+= xcorr_decay * (y - weighted_xcorr[i]);
 			}
 		}
 		hp_acc = 0.0f;
 		mic_acc = 0.0f;
 	}
 	if(lookback_phase % (512*8) == 0) { // 4096 samples
 		float max_xcor = 0.0f;
 		int argmax_xcor = 0;
 		float low_max_xcor = 0.0f;
 		for(auto i = 0u; i < weighted_xcorr.size(); ++i) {
 			if(max_xcor < weighted_xcorr[i]*weighted_xcorr[i]) {
 				max_xcor = weighted_xcorr[i]*weighted_xcorr[i];
 				argmax_xcor = i;
 			}
 			if(i < 128 && weighted_xcorr[i]*weighted_xcorr[i] > low_max_xcor) {
 				low_max_xcor = weighted_xcorr[i]*weighted_xcorr[i];
 			}
 		}
 		if(max_xcor > 4.0f * low_max_xcor) {
 			std::printf("Found delay at %d chunks\n", argmax_xcor);
 			status = LEARNING;
 			current_delay = argmax_xcor * 8 + 32; // Safety margin
 			hp_acc = 0.0f;
 			mic_acc = 0.0f;
 			lookback_phase = 0;
 			for(auto i = lookback_power.size(); i >= 1; --i) {
 				lookback_power[lookback_power.size() - i] = 0.0f;
 			}	
 		}
 	}
 }
 void AEC::do_learn(float mic, float hp) {
 }
--- a/pw_plugin_II/aec.h
+++ b/pw_plugin_II/aec.h
@ -0,0 +1,37 @@
 #pragma once
 #include <array>
 class AEC {
 public:
 	float do_sample(float mic, float hp, bool maybevoice);
 	bool  inject_noise();
 	std::array<float, 4096> lookback;
 	int lb_index;
 	/*
 		 By chunks of 8 samples
 		 We expect to have >= 1024 samples delay, or 128 chunks delay.
 	*/
 	std::array<float, 512> lookback_power;
 	float mic_acc = 0.0f;
 	float hp_acc = 0.0f; 
 	int lookback_phase = 0;
 	std::array<float, 512> weighted_xcorr;
 	static constexpr float xcorr_decay = (1.0f)/(48000/8); // 100ms 
 	int current_delay = 0; // We expect sudden drifts
 	enum {LOST, LEARNING, LOCKED} status = LOST; 
 	// 20ms active echo length
 	// Should be more than enough
 	std::array<float, 1024> impulse_response;
 	float mu = .5/1024;
 private:
 	void find_delay(float mic, float hp);
 	void do_learn(float mic, float hp);
 };
--- a/pw_plugin_II/equalizer.cc
+++ b/pw_plugin_II/equalizer.cc
@ -0,0 +1,33 @@
 #include "equalizer.h"
 #include <numbers>
 #include <algorithm>
 #include <cmath>
 static constexpr float pi = std::numbers::pi_v<float>;
 float equalizer::do_sample(float x) {
 /*
 	constexpr float alpha = std::tan(shelf_trans_start * pi / 48000.0f);
 	constexpr float beta  = std::tan(shelf_trans_end * pi / 48000.0f);
 */
 	/* Following the bilinear transform method, we implement
 		H(z) = (1 + z^-1 + alpha * (1 - z^-1))/(1 + z^-1 + beta*(1 - z^-1))
 		which gives the difference relation
 		y = 1/(1+beta) * [ x * (1 + alpha) + x^-1 * (1 - alpha) - y^-1 (1 - beta) ]
 	*/
 /*
 	float y = 1.0f/(1.0f + beta) * (
 		x * (1.0f + alpha) + last_x * (1.0f - alpha) - last_y * (1.0f - beta) );
 */
 	constexpr float cut = 1.0f/std::tan(cut_start * pi / 48000.0f);
 	float y = ( cut * (x - last_x) + last_y * (cut - 1.0f))/(cut + 1.0f);
 	last_y = y;
 	last_x = x;
 	return y;
 }
--- a/pw_plugin_II/equalizer.h
+++ b/pw_plugin_II/equalizer.h
@ -0,0 +1,19 @@
 #pragma once
 /*
 	Implements a simple high pass filter.
 */
 class equalizer {
 public:
 	float do_sample(float x);
 	float last_x = 0.0f;
 	float last_y = 0.0f;
 	/*
 	static constexpr float shelf_trans_start = 400.0f;
 	static constexpr float shelf_trans_end   = 1000.0f;
 	*/
 	static constexpr float cut_start = 200.0f;
 };
--- a/pw_plugin_II/exemple
+++ b/pw_plugin_II/exemple
--- a/pw_plugin_II/main.cc
+++ b/pw_plugin_II/main.cc
@ -1,8 +1,12 @@
 #include "pickup_cancel.h"
 #include "equalizer.h"
 #include "aec.h"
 #include <unistd.h>
 #include <cstdio>
 #include <cstring>
-
+#include <algorithm>
 #include <random>
 #include <spa/pod/builder.h>
 //#include "spa/param/audio/format-utils.h"
 #include "spa/param/latency-utils.h"
@ -15,15 +19,33 @@ struct port {
 };
 struct processing {
 	mains_PLL pll;
 	pickup_cancel pc;
 	equalizer eq1;
 	equalizer eq2;
 	AEC aec;
 	pw_main_loop* lp;
 	pw_filter* filter;
 	port* mic_in_port;
 	port* mic_out_port;
 	port* sink_FL;
 	port* sink_FR;
 	port* playback_FL;
 	port* playback_FR;
 	int  hangover = 5;
 	float min_power = 0.0f; // Decays over ~3s
 	std::mt19937_64 gen;
 	std::normal_distribution<float> nd{0, 0.1}; // -20dB power
 	int early_lock_cnt = 10;
 };
 static void on_process(void* pro, spa_io_position* position) {
@ -33,12 +55,62 @@ static void on_process(void* pro, spa_io_position* position) {
 	float* in = (float*)pw_filter_get_dsp_buffer(p->mic_in_port, n_samples);
 	float* out = (float*)pw_filter_get_dsp_buffer(p->mic_out_port, n_samples);
-	if(in == NULL || out == NULL) return;
+	float* s_FL = (float*)pw_filter_get_dsp_buffer(p->sink_FL, n_samples);
 	float* s_FR = (float*)pw_filter_get_dsp_buffer(p->sink_FR, n_samples);
 	float* p_FL = (float*)pw_filter_get_dsp_buffer(p->playback_FL, n_samples);
 	float* p_FR = (float*)pw_filter_get_dsp_buffer(p->playback_FR, n_samples);
 	if(in == NULL || out == NULL
 		|| s_FL == NULL || s_FR == NULL
 		|| p_FL == NULL || p_FR == NULL) return;
 	if(__builtin_expect(p->early_lock_cnt, false)) {
 		std::memcpy(p_FL, s_FL, sizeof(float)*n_samples);
 		std::memcpy(p_FR, s_FR, sizeof(float)*n_samples);
 		std::memset(out, 0, sizeof(float)*n_samples); 
 		float power = 0.0f;
 		for(int i = 0u; i < n_samples; ++i) {
 			power += *in * *in;
 			in++;
 		}
 		power /= n_samples;
 		if(power > 1e-8) {p->early_lock_cnt--;}
 		else {p->early_lock_cnt = 10;}
 		if(!p->early_lock_cnt) {
 			std::puts("Activated microphone");
 		}
 		return;
 	}
 	/* We do not do any injection... yet*/
 	if(p->aec.inject_noise()) {
 		for(auto i = 0u; i < n_samples; ++i) {
 			p_FL[i] = p->nd(p->gen);
 			p_FR[i] = p->nd(p->gen);
 		}
 	} else {
 		std::memcpy(p_FL, s_FL, sizeof(float)*n_samples);
 		std::memcpy(p_FR, s_FR, sizeof(float)*n_samples);
 	}
 	float curr_power = 0.0f;
 	bool maybevoice = p->hangover>0;
 	for(auto i = n_samples; i >= 1; --i) {
 		curr_power += *in * *in;
-		*out++ = *in - p->pc.do_sample(*in, p->hangover > 0);
+
 		float phase = p->pll.do_sample(*in, maybevoice);
 		float eqzd  = p->eq1.do_sample(*in); 
 		eqzd  = p->eq2.do_sample(eqzd); 
 		float rm_picked = eqzd - p->pc.do_sample(eqzd, phase, maybevoice);
 		*out++ = eqzd - p->aec.do_sample(*in/*rm_picked*/, *p_FR, maybevoice);
 		p_FR++;
 		in++;
 	}
@ -77,13 +149,13 @@ static void do_quit(void* pro, int) {
 static void do_report(void* pro, int) {
 	auto* p = (processing*)pro;
-	std::printf("PLL status : %s\n", p->pc.is_in_sync() ? "locked" : "unlocked");
+	std::printf("PLL status : %s\n", p->pll.is_in_sync() ? "locked" : "unlocked");
-	float s = p->pc.sin_power;
+	float s = p->pll.sin_power;
-	float c = p->pc.cos_power;
+	float c = p->pll.cos_power;
 	float ns = s*s/(s*s + c*c);
 	float nc = c*c/(s*s + c*c);
 	std::printf("Powers : %f %f\n", ns, nc);
-	std::printf("Instant. frequency : %f\n", p->pc.act_f * 48000 / (2*3.1415));
+	std::printf("Instant. frequency : %f\n", p->pll.act_f * 48000 / (2*3.1415));
 	float energy = 0.0f;
 	//std::puts("TD=[");
@ -96,14 +168,26 @@ static void do_report(void* pro, int) {
 	std::printf("Correction power : %f dB\n", 10*std::log10(energy/2048));
 	std::printf("Estimated quiet power : %f dB\n", 10*std::log10(p->min_power/1024));
 	int delay = p->aec.current_delay;
 	std::printf("Estimated delay : %d\n", delay);
 	std::printf("Rough delay est. : L = [\n");
 	for(int i = 0; i < 512; ++i) {
 		std::printf("%e,", p->aec.weighted_xcorr[i]);
 	}
 	std::printf("]\n");
 }
 int main(int argc, char *argv[]) {
 	pw_init(&argc, &argv);
 	std::printf("PID %ld\n", (long)getpid());
 	processing proc = {
-		pickup_cancel(
+		mains_PLL(
 			1./48000 * 2 * 3.1415 * 50, //   50 Hz
 			1./48000 * 2 * 3.1415 * 2  // +- 2 Hz
 			)};
@ -129,7 +213,8 @@ int main(int argc, char *argv[]) {
 	uint8_t buffer[1024];
 	auto b = SPA_POD_BUILDER_INIT(buffer, sizeof(buffer));
 	spa_audio_info_raw fmt = {
-		.format = SPA_AUDIO_FORMAT_DSP_F32,
+		
 .format = SPA_AUDIO_FORMAT_DSP_F32,
 		.rate = 48000,
 		.channels = 1,
 		.position = {SPA_AUDIO_CHANNEL_MONO}
@ -159,6 +244,48 @@ int main(int argc, char *argv[]) {
 			NULL), // props
 		NULL, 0);
 	proc.sink_FL = (port*)pw_filter_add_port(proc.filter,
 		PW_DIRECTION_INPUT,
 		PW_FILTER_PORT_FLAG_MAP_BUFFERS,
 		sizeof(struct port),// prt_data_size
 		pw_properties_new(
 			PW_KEY_FORMAT_DSP, "32 bit float mono audio",
 			PW_KEY_PORT_NAME, "sink_FL",
 			NULL), // props
 		NULL, 0);
 	proc.sink_FR = (port*)pw_filter_add_port(proc.filter,
 		PW_DIRECTION_INPUT,
 		PW_FILTER_PORT_FLAG_MAP_BUFFERS,
 		sizeof(struct port),// prt_data_size
 		pw_properties_new(
 			PW_KEY_FORMAT_DSP, "32 bit float mono audio",
 			PW_KEY_PORT_NAME, "sink_FR",
 			NULL), // props
 		NULL, 0);
 	proc.playback_FL = (port*)pw_filter_add_port(proc.filter,
 		PW_DIRECTION_OUTPUT,
 		PW_FILTER_PORT_FLAG_MAP_BUFFERS,
 		sizeof(struct port),// prt_data_size
 		pw_properties_new(
 			PW_KEY_FORMAT_DSP, "32 bit float mono audio",
 			PW_KEY_PORT_NAME, "playback_FL",
 			NULL), // props
 		NULL, 0);
 	proc.playback_FR = (port*)pw_filter_add_port(proc.filter,
 		PW_DIRECTION_OUTPUT,
 		PW_FILTER_PORT_FLAG_MAP_BUFFERS,
 		sizeof(struct port),// prt_data_size
 		pw_properties_new(
 			PW_KEY_FORMAT_DSP, "32 bit float mono audio",
 			PW_KEY_PORT_NAME, "playback_FR",
 			NULL), // props
 		NULL, 0);
 	if(pw_filter_connect(proc.filter,
 			PW_FILTER_FLAG_RT_PROCESS,
 			NULL, 0) < 0) {
--- a/pw_plugin_II/pickup_cancel.cc
+++ b/pw_plugin_II/pickup_cancel.cc
@ -41,21 +41,20 @@ static constexpr float rc_filter(float spdist) {
 }
 static constexpr float twopi = 2.0f*std::numbers::pi_v<float>;
-static constexpr float pi = 2.0f*std::numbers::pi_v<float>;
+static constexpr float pi = std::numbers::pi_v<float>;
-pickup_cancel::pickup_cancel(float fst_f, float eps_u) :
+mains_PLL::mains_PLL(float fst_f, float eps_u) :
 	act_f(fst_f), act_phi(0.0f),
 	max_f(fst_f+eps_u),min_f(fst_f-eps_u),
 	decay_cst(fst_f/200.0f), error_filter(0.0f),
-	lockin_filter(1024), filter_index(0), nom_f(fst_f), lock_phi(0.0f)
+	filter_index(0), nom_f(fst_f), lock_phi(0.0f)
-{
+{}
 }
-bool pickup_cancel::is_in_sync() {
+bool mains_PLL::is_in_sync() {
 	return sin_power * sin_power >= 2.0f * cos_power * cos_power;
 }
-float pickup_cancel::do_sample(float s, bool maybevoice) {
+float mains_PLL::do_sample(float s, bool maybevoice) {
 	lock_phi += nom_f;
 	if(lock_phi >= twopi) {
 		lock_phi -= twopi;
@ -64,10 +63,10 @@ float pickup_cancel::do_sample(float s, bool maybevoice) {
 	std::complex<float> ex = std::exp(std::complex<float>(1.0i) * lock_phi);
 	std::complex<float> c = s * ex;
-	filter_sum += c - lockin_filter[filter_index & 1023u];
+	filter_sum += c - lockin_filter[filter_index & 4095u];
-	lockin_filter[filter_index & 1023u] = c;
+	lockin_filter[filter_index & 4095u] = c;
 	filter_index++;
-	float pll_sample = std::real(filter_sum * ex)/1024;
+	float pll_sample = std::real(filter_sum * ex);
 	if(!maybevoice) {
 		mean_power += (pll_sample * pll_sample - mean_power) * 1.0f/48000;
@ -118,6 +117,10 @@ float pickup_cancel::do_sample(float s, bool maybevoice) {
 	cos_power +=
 		.1f/48000 * (pll_sample * std::cos(act_phi + pi * (act_f - nom_f)/(max_f - min_f)) - cos_power);
 	return act_phi;
 }
 float pickup_cancel::do_sample(float s, float act_phi, bool maybevoice) {
 	float sampphase = act_phi/twopi * 2048; // in [0, 2048]
 	float corr = 0.0f;
@ -143,5 +146,5 @@ float pickup_cancel::do_sample(float s, bool maybevoice) {
 			iter++;
 		}
 	}
-	if(!this->is_in_sync()) {return 0.0f;} else {return corr;}
+	return corr;
 }
--- a/pw_plugin_II/pickup_cancel.h
+++ b/pw_plugin_II/pickup_cancel.h
@ -4,11 +4,15 @@
 #include <array>
 #include <vector>
-class pickup_cancel {
+/*
-public:
+Only 50Hz for now, but in theory should be good for 60Hz too barring
-	pickup_cancel(float fst_f, float eps);
+some tuning.
-	float do_sample(float x_input, bool maybevoice=false);
+*/
 class mains_PLL {
 public:
 	mains_PLL(float fst_f, float eps);
 	float do_sample(float x_input, bool maybevoice=false);
 	bool is_in_sync();
 	float act_f;
@ -19,6 +23,7 @@ public:
 	/* PLL filter time constant */
 	const float decay_cst;
 	float pd_delayed = 0.0f;
 	float error_filter = 0.0f;
 	float cos_power = 0.0f;
@ -26,15 +31,18 @@ public:
 	float mean_power = 1.0f; // Averaged over say 1s without voice
 	// One revolution is 2048 samples
 	float mu = 1e-2f;
 	std::array<float, 2048> pickup_timedomain;
 	// Length 1024 boxcar filter (fst 0 ~at +48Hz !)
 	float nom_f;
 	float lock_phi;
-	float pd_delayed;
+	std::array<std::complex<float>, 4096> lockin_filter;
 	std::vector<std::complex<float>> lockin_filter;
 	unsigned int filter_index = 0;
 	std::complex<float> filter_sum;
 };
 class pickup_cancel {
 public:
 	float do_sample(float x_input, float phase, bool maybevoice=false);
 	// One revolution is 2048 samples
 	float mu = 1e-2f;
 	std::array<float, 2048> pickup_timedomain;
 };
--- a/pw_plugin_II/script.sh
+++ b/pw_plugin_II/script.sh
@ -1,7 +1,26 @@
 #!/bin/sh
 pactl unload-module module-null-sink
 pactl load-module module-null-sink sink_name=AEC_out channels=1 sink_properties=device.description=AEC_out
 pactl load-module module-null-sink sink_name=AEC_sink channels=2 sink_properties=device.description=AEC_sink
 echo "Created sinks"
 pw-link audio-filter:playback_FL alsa_output.usb-Logitech_Logitech_USB_Headset-00.analog-stereo:playback_FL
 pw-link audio-filter:playback_FR alsa_output.usb-Logitech_Logitech_USB_Headset-00.analog-stereo:playback_FR
 echo "Linked playback"
 pw-link alsa_input.pci-0000_00_1b.0.analog-stereo:capture_FL audio-filter:input
 pw-link audio-filter:output AEC_out:playback_MONO
 echo "Linked AEC"
 pw-link AEC_sink:monitor_FL audio-filter:sink_FL
 pw-link AEC_sink:monitor_FR audio-filter:sink_FR
 echo "Linked sink to input"
 pactl set-default-source AEC_out.monitor
 pactl set-default-sink AEC_sink