On-line delay finding works

Now to choose between NMLS and FDAF for pw_plugin_II
This commit is contained in:
Sélène Corbineau 2025-02-05 10:51:49 +01:00
parent b9f678c52a
commit eaad54dcd8
10 changed files with 361 additions and 35 deletions

View file

@ -4,7 +4,10 @@ CXXFLAGS += -O3 -ffast-math -march=native -std=c++20 -Wall -Wextra \
.PHONY: build .PHONY: build
build: plug2 build: plug2
plug2: pickup_cancel.o main.o plug2: pickup_cancel.o equalizer.o main.o aec.o
$(CXX) $(LDFLAGS) $^ -o $@ $(CXX) $(LDFLAGS) $^ -o $@
#micplug.out: pickup_cancel.o
main.o: main.cc pickup_cancel.h equalizer.h aec.h
aec.o: aec.cc aec.h
pickup_cancel.o: pickup_cancel.cc pickup_cancel.h pickup_cancel.o: pickup_cancel.cc pickup_cancel.h
equalizer.o: equalizer.cc equalizer.h

77
pw_plugin_II/aec.cc Normal file
View file

@ -0,0 +1,77 @@
#include "aec.h"
#include <cmath>
#include <cstdio>
float AEC::do_sample(float mic, float hp, bool maybevoice) {
lookback[(lb_index++)&4095u] = hp;
if(status == LOST) {
find_delay(mic, hp); return 0.0f;
} else if(status == LEARNING) {
if(!maybevoice) {
do_learn(mic, hp);
}
return 0.0f;
}
}
bool AEC::inject_noise() {
return status == LOST; //|| status == LEARNING;
}
void AEC::find_delay(float mic, float hp) {
hp_acc += hp;
mic_acc += mic;
if(++lookback_phase % 8 == 0) {
lookback_power[lookback_phase/8 % lookback_power.size()] = hp_acc;
for(auto i = 0u; i < weighted_xcorr.size(); ++i) {
float y = lookback_power[(lookback_phase / 8 - i) % lookback_power.size()]
* mic_acc;
if(__builtin_expect(lookback_phase <= 8 * 512, 0)) {
weighted_xcorr[i] = y;
} else {
weighted_xcorr[i] += xcorr_decay * (y - weighted_xcorr[i]);
}
}
hp_acc = 0.0f;
mic_acc = 0.0f;
}
if(lookback_phase % (512*8) == 0) { // 4096 samples
float max_xcor = 0.0f;
int argmax_xcor = 0;
float low_max_xcor = 0.0f;
for(auto i = 0u; i < weighted_xcorr.size(); ++i) {
if(max_xcor < weighted_xcorr[i]*weighted_xcorr[i]) {
max_xcor = weighted_xcorr[i]*weighted_xcorr[i];
argmax_xcor = i;
}
if(i < 128 && weighted_xcorr[i]*weighted_xcorr[i] > low_max_xcor) {
low_max_xcor = weighted_xcorr[i]*weighted_xcorr[i];
}
}
if(max_xcor > 4.0f * low_max_xcor) {
std::printf("Found delay at %d chunks\n", argmax_xcor);
status = LEARNING;
current_delay = argmax_xcor * 8 + 32; // Safety margin
hp_acc = 0.0f;
mic_acc = 0.0f;
lookback_phase = 0;
for(auto i = lookback_power.size(); i >= 1; --i) {
lookback_power[lookback_power.size() - i] = 0.0f;
}
}
}
}
void AEC::do_learn(float mic, float hp) {
}

37
pw_plugin_II/aec.h Normal file
View file

@ -0,0 +1,37 @@
#pragma once
#include <array>
class AEC {
public:
float do_sample(float mic, float hp, bool maybevoice);
bool inject_noise();
std::array<float, 4096> lookback;
int lb_index;
/*
By chunks of 8 samples
We expect to have >= 1024 samples delay, or 128 chunks delay.
*/
std::array<float, 512> lookback_power;
float mic_acc = 0.0f;
float hp_acc = 0.0f;
int lookback_phase = 0;
std::array<float, 512> weighted_xcorr;
static constexpr float xcorr_decay = (1.0f)/(48000/8); // 100ms
int current_delay = 0; // We expect sudden drifts
enum {LOST, LEARNING, LOCKED} status = LOST;
// 20ms active echo length
// Should be more than enough
std::array<float, 1024> impulse_response;
float mu = .5/1024;
private:
void find_delay(float mic, float hp);
void do_learn(float mic, float hp);
};

33
pw_plugin_II/equalizer.cc Normal file
View file

@ -0,0 +1,33 @@
#include "equalizer.h"
#include <numbers>
#include <algorithm>
#include <cmath>
static constexpr float pi = std::numbers::pi_v<float>;
float equalizer::do_sample(float x) {
/*
constexpr float alpha = std::tan(shelf_trans_start * pi / 48000.0f);
constexpr float beta = std::tan(shelf_trans_end * pi / 48000.0f);
*/
/* Following the bilinear transform method, we implement
H(z) = (1 + z^-1 + alpha * (1 - z^-1))/(1 + z^-1 + beta*(1 - z^-1))
which gives the difference relation
y = 1/(1+beta) * [ x * (1 + alpha) + x^-1 * (1 - alpha) - y^-1 (1 - beta) ]
*/
/*
float y = 1.0f/(1.0f + beta) * (
x * (1.0f + alpha) + last_x * (1.0f - alpha) - last_y * (1.0f - beta) );
*/
constexpr float cut = 1.0f/std::tan(cut_start * pi / 48000.0f);
float y = ( cut * (x - last_x) + last_y * (cut - 1.0f))/(cut + 1.0f);
last_y = y;
last_x = x;
return y;
}

19
pw_plugin_II/equalizer.h Normal file
View file

@ -0,0 +1,19 @@
#pragma once
/*
Implements a simple high pass filter.
*/
class equalizer {
public:
float do_sample(float x);
float last_x = 0.0f;
float last_y = 0.0f;
/*
static constexpr float shelf_trans_start = 400.0f;
static constexpr float shelf_trans_end = 1000.0f;
*/
static constexpr float cut_start = 200.0f;
};

Binary file not shown.

View file

@ -1,8 +1,12 @@
#include "pickup_cancel.h" #include "pickup_cancel.h"
#include "equalizer.h"
#include "aec.h"
#include <unistd.h>
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <algorithm>
#include <random>
#include <spa/pod/builder.h> #include <spa/pod/builder.h>
//#include "spa/param/audio/format-utils.h" //#include "spa/param/audio/format-utils.h"
#include "spa/param/latency-utils.h" #include "spa/param/latency-utils.h"
@ -15,15 +19,33 @@ struct port {
}; };
struct processing { struct processing {
mains_PLL pll;
pickup_cancel pc; pickup_cancel pc;
equalizer eq1;
equalizer eq2;
AEC aec;
pw_main_loop* lp; pw_main_loop* lp;
pw_filter* filter; pw_filter* filter;
port* mic_in_port; port* mic_in_port;
port* mic_out_port; port* mic_out_port;
port* sink_FL;
port* sink_FR;
port* playback_FL;
port* playback_FR;
int hangover = 5; int hangover = 5;
float min_power = 0.0f; // Decays over ~3s float min_power = 0.0f; // Decays over ~3s
std::mt19937_64 gen;
std::normal_distribution<float> nd{0, 0.1}; // -20dB power
int early_lock_cnt = 10;
}; };
static void on_process(void* pro, spa_io_position* position) { static void on_process(void* pro, spa_io_position* position) {
@ -33,12 +55,62 @@ static void on_process(void* pro, spa_io_position* position) {
float* in = (float*)pw_filter_get_dsp_buffer(p->mic_in_port, n_samples); float* in = (float*)pw_filter_get_dsp_buffer(p->mic_in_port, n_samples);
float* out = (float*)pw_filter_get_dsp_buffer(p->mic_out_port, n_samples); float* out = (float*)pw_filter_get_dsp_buffer(p->mic_out_port, n_samples);
if(in == NULL || out == NULL) return; float* s_FL = (float*)pw_filter_get_dsp_buffer(p->sink_FL, n_samples);
float* s_FR = (float*)pw_filter_get_dsp_buffer(p->sink_FR, n_samples);
float* p_FL = (float*)pw_filter_get_dsp_buffer(p->playback_FL, n_samples);
float* p_FR = (float*)pw_filter_get_dsp_buffer(p->playback_FR, n_samples);
if(in == NULL || out == NULL
|| s_FL == NULL || s_FR == NULL
|| p_FL == NULL || p_FR == NULL) return;
if(__builtin_expect(p->early_lock_cnt, false)) {
std::memcpy(p_FL, s_FL, sizeof(float)*n_samples);
std::memcpy(p_FR, s_FR, sizeof(float)*n_samples);
std::memset(out, 0, sizeof(float)*n_samples);
float power = 0.0f;
for(int i = 0u; i < n_samples; ++i) {
power += *in * *in;
in++;
}
power /= n_samples;
if(power > 1e-8) {p->early_lock_cnt--;}
else {p->early_lock_cnt = 10;}
if(!p->early_lock_cnt) {
std::puts("Activated microphone");
}
return;
}
/* We do not do any injection... yet*/
if(p->aec.inject_noise()) {
for(auto i = 0u; i < n_samples; ++i) {
p_FL[i] = p->nd(p->gen);
p_FR[i] = p->nd(p->gen);
}
} else {
std::memcpy(p_FL, s_FL, sizeof(float)*n_samples);
std::memcpy(p_FR, s_FR, sizeof(float)*n_samples);
}
float curr_power = 0.0f; float curr_power = 0.0f;
bool maybevoice = p->hangover>0;
for(auto i = n_samples; i >= 1; --i) { for(auto i = n_samples; i >= 1; --i) {
curr_power += *in * *in; curr_power += *in * *in;
*out++ = *in - p->pc.do_sample(*in, p->hangover > 0);
float phase = p->pll.do_sample(*in, maybevoice);
float eqzd = p->eq1.do_sample(*in);
eqzd = p->eq2.do_sample(eqzd);
float rm_picked = eqzd - p->pc.do_sample(eqzd, phase, maybevoice);
*out++ = eqzd - p->aec.do_sample(*in/*rm_picked*/, *p_FR, maybevoice);
p_FR++;
in++; in++;
} }
@ -77,13 +149,13 @@ static void do_quit(void* pro, int) {
static void do_report(void* pro, int) { static void do_report(void* pro, int) {
auto* p = (processing*)pro; auto* p = (processing*)pro;
std::printf("PLL status : %s\n", p->pc.is_in_sync() ? "locked" : "unlocked"); std::printf("PLL status : %s\n", p->pll.is_in_sync() ? "locked" : "unlocked");
float s = p->pc.sin_power; float s = p->pll.sin_power;
float c = p->pc.cos_power; float c = p->pll.cos_power;
float ns = s*s/(s*s + c*c); float ns = s*s/(s*s + c*c);
float nc = c*c/(s*s + c*c); float nc = c*c/(s*s + c*c);
std::printf("Powers : %f %f\n", ns, nc); std::printf("Powers : %f %f\n", ns, nc);
std::printf("Instant. frequency : %f\n", p->pc.act_f * 48000 / (2*3.1415)); std::printf("Instant. frequency : %f\n", p->pll.act_f * 48000 / (2*3.1415));
float energy = 0.0f; float energy = 0.0f;
//std::puts("TD=["); //std::puts("TD=[");
@ -96,14 +168,26 @@ static void do_report(void* pro, int) {
std::printf("Correction power : %f dB\n", 10*std::log10(energy/2048)); std::printf("Correction power : %f dB\n", 10*std::log10(energy/2048));
std::printf("Estimated quiet power : %f dB\n", 10*std::log10(p->min_power/1024)); std::printf("Estimated quiet power : %f dB\n", 10*std::log10(p->min_power/1024));
int delay = p->aec.current_delay;
std::printf("Estimated delay : %d\n", delay);
std::printf("Rough delay est. : L = [\n");
for(int i = 0; i < 512; ++i) {
std::printf("%e,", p->aec.weighted_xcorr[i]);
}
std::printf("]\n");
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
pw_init(&argc, &argv); pw_init(&argc, &argv);
std::printf("PID %ld\n", (long)getpid());
processing proc = { processing proc = {
pickup_cancel( mains_PLL(
1./48000 * 2 * 3.1415 * 50, // 50 Hz 1./48000 * 2 * 3.1415 * 50, // 50 Hz
1./48000 * 2 * 3.1415 * 2 // +- 2 Hz 1./48000 * 2 * 3.1415 * 2 // +- 2 Hz
)}; )};
@ -129,7 +213,8 @@ int main(int argc, char *argv[]) {
uint8_t buffer[1024]; uint8_t buffer[1024];
auto b = SPA_POD_BUILDER_INIT(buffer, sizeof(buffer)); auto b = SPA_POD_BUILDER_INIT(buffer, sizeof(buffer));
spa_audio_info_raw fmt = { spa_audio_info_raw fmt = {
.format = SPA_AUDIO_FORMAT_DSP_F32,
.format = SPA_AUDIO_FORMAT_DSP_F32,
.rate = 48000, .rate = 48000,
.channels = 1, .channels = 1,
.position = {SPA_AUDIO_CHANNEL_MONO} .position = {SPA_AUDIO_CHANNEL_MONO}
@ -159,6 +244,48 @@ int main(int argc, char *argv[]) {
NULL), // props NULL), // props
NULL, 0); NULL, 0);
proc.sink_FL = (port*)pw_filter_add_port(proc.filter,
PW_DIRECTION_INPUT,
PW_FILTER_PORT_FLAG_MAP_BUFFERS,
sizeof(struct port),// prt_data_size
pw_properties_new(
PW_KEY_FORMAT_DSP, "32 bit float mono audio",
PW_KEY_PORT_NAME, "sink_FL",
NULL), // props
NULL, 0);
proc.sink_FR = (port*)pw_filter_add_port(proc.filter,
PW_DIRECTION_INPUT,
PW_FILTER_PORT_FLAG_MAP_BUFFERS,
sizeof(struct port),// prt_data_size
pw_properties_new(
PW_KEY_FORMAT_DSP, "32 bit float mono audio",
PW_KEY_PORT_NAME, "sink_FR",
NULL), // props
NULL, 0);
proc.playback_FL = (port*)pw_filter_add_port(proc.filter,
PW_DIRECTION_OUTPUT,
PW_FILTER_PORT_FLAG_MAP_BUFFERS,
sizeof(struct port),// prt_data_size
pw_properties_new(
PW_KEY_FORMAT_DSP, "32 bit float mono audio",
PW_KEY_PORT_NAME, "playback_FL",
NULL), // props
NULL, 0);
proc.playback_FR = (port*)pw_filter_add_port(proc.filter,
PW_DIRECTION_OUTPUT,
PW_FILTER_PORT_FLAG_MAP_BUFFERS,
sizeof(struct port),// prt_data_size
pw_properties_new(
PW_KEY_FORMAT_DSP, "32 bit float mono audio",
PW_KEY_PORT_NAME, "playback_FR",
NULL), // props
NULL, 0);
if(pw_filter_connect(proc.filter, if(pw_filter_connect(proc.filter,
PW_FILTER_FLAG_RT_PROCESS, PW_FILTER_FLAG_RT_PROCESS,
NULL, 0) < 0) { NULL, 0) < 0) {

View file

@ -41,21 +41,20 @@ static constexpr float rc_filter(float spdist) {
} }
static constexpr float twopi = 2.0f*std::numbers::pi_v<float>; static constexpr float twopi = 2.0f*std::numbers::pi_v<float>;
static constexpr float pi = 2.0f*std::numbers::pi_v<float>; static constexpr float pi = std::numbers::pi_v<float>;
pickup_cancel::pickup_cancel(float fst_f, float eps_u) : mains_PLL::mains_PLL(float fst_f, float eps_u) :
act_f(fst_f), act_phi(0.0f), act_f(fst_f), act_phi(0.0f),
max_f(fst_f+eps_u),min_f(fst_f-eps_u), max_f(fst_f+eps_u),min_f(fst_f-eps_u),
decay_cst(fst_f/200.0f), error_filter(0.0f), decay_cst(fst_f/200.0f), error_filter(0.0f),
lockin_filter(1024), filter_index(0), nom_f(fst_f), lock_phi(0.0f) filter_index(0), nom_f(fst_f), lock_phi(0.0f)
{ {}
}
bool pickup_cancel::is_in_sync() { bool mains_PLL::is_in_sync() {
return sin_power * sin_power >= 2.0f * cos_power * cos_power; return sin_power * sin_power >= 2.0f * cos_power * cos_power;
} }
float pickup_cancel::do_sample(float s, bool maybevoice) { float mains_PLL::do_sample(float s, bool maybevoice) {
lock_phi += nom_f; lock_phi += nom_f;
if(lock_phi >= twopi) { if(lock_phi >= twopi) {
lock_phi -= twopi; lock_phi -= twopi;
@ -64,10 +63,10 @@ float pickup_cancel::do_sample(float s, bool maybevoice) {
std::complex<float> ex = std::exp(std::complex<float>(1.0i) * lock_phi); std::complex<float> ex = std::exp(std::complex<float>(1.0i) * lock_phi);
std::complex<float> c = s * ex; std::complex<float> c = s * ex;
filter_sum += c - lockin_filter[filter_index & 1023u]; filter_sum += c - lockin_filter[filter_index & 4095u];
lockin_filter[filter_index & 1023u] = c; lockin_filter[filter_index & 4095u] = c;
filter_index++; filter_index++;
float pll_sample = std::real(filter_sum * ex)/1024; float pll_sample = std::real(filter_sum * ex);
if(!maybevoice) { if(!maybevoice) {
mean_power += (pll_sample * pll_sample - mean_power) * 1.0f/48000; mean_power += (pll_sample * pll_sample - mean_power) * 1.0f/48000;
@ -118,6 +117,10 @@ float pickup_cancel::do_sample(float s, bool maybevoice) {
cos_power += cos_power +=
.1f/48000 * (pll_sample * std::cos(act_phi + pi * (act_f - nom_f)/(max_f - min_f)) - cos_power); .1f/48000 * (pll_sample * std::cos(act_phi + pi * (act_f - nom_f)/(max_f - min_f)) - cos_power);
return act_phi;
}
float pickup_cancel::do_sample(float s, float act_phi, bool maybevoice) {
float sampphase = act_phi/twopi * 2048; // in [0, 2048] float sampphase = act_phi/twopi * 2048; // in [0, 2048]
float corr = 0.0f; float corr = 0.0f;
@ -143,5 +146,5 @@ float pickup_cancel::do_sample(float s, bool maybevoice) {
iter++; iter++;
} }
} }
if(!this->is_in_sync()) {return 0.0f;} else {return corr;} return corr;
} }

View file

@ -4,11 +4,15 @@
#include <array> #include <array>
#include <vector> #include <vector>
class pickup_cancel { /*
public: Only 50Hz for now, but in theory should be good for 60Hz too barring
pickup_cancel(float fst_f, float eps); some tuning.
float do_sample(float x_input, bool maybevoice=false); */
class mains_PLL {
public:
mains_PLL(float fst_f, float eps);
float do_sample(float x_input, bool maybevoice=false);
bool is_in_sync(); bool is_in_sync();
float act_f; float act_f;
@ -19,6 +23,7 @@ public:
/* PLL filter time constant */ /* PLL filter time constant */
const float decay_cst; const float decay_cst;
float pd_delayed = 0.0f;
float error_filter = 0.0f; float error_filter = 0.0f;
float cos_power = 0.0f; float cos_power = 0.0f;
@ -26,15 +31,18 @@ public:
float mean_power = 1.0f; // Averaged over say 1s without voice float mean_power = 1.0f; // Averaged over say 1s without voice
// One revolution is 2048 samples
float mu = 1e-2f;
std::array<float, 2048> pickup_timedomain;
// Length 1024 boxcar filter (fst 0 ~at +48Hz !)
float nom_f; float nom_f;
float lock_phi; float lock_phi;
float pd_delayed; std::array<std::complex<float>, 4096> lockin_filter;
std::vector<std::complex<float>> lockin_filter;
unsigned int filter_index = 0; unsigned int filter_index = 0;
std::complex<float> filter_sum; std::complex<float> filter_sum;
}; };
class pickup_cancel {
public:
float do_sample(float x_input, float phase, bool maybevoice=false);
// One revolution is 2048 samples
float mu = 1e-2f;
std::array<float, 2048> pickup_timedomain;
};

View file

@ -1,7 +1,26 @@
#!/bin/sh #!/bin/sh
pactl unload-module module-null-sink pactl unload-module module-null-sink
pactl load-module module-null-sink sink_name=AEC_out channels=1 sink_properties=device.description=AEC_out pactl load-module module-null-sink sink_name=AEC_out channels=1 sink_properties=device.description=AEC_out
pactl load-module module-null-sink sink_name=AEC_sink channels=2 sink_properties=device.description=AEC_sink
echo "Created sinks"
pw-link audio-filter:playback_FL alsa_output.usb-Logitech_Logitech_USB_Headset-00.analog-stereo:playback_FL
pw-link audio-filter:playback_FR alsa_output.usb-Logitech_Logitech_USB_Headset-00.analog-stereo:playback_FR
echo "Linked playback"
pw-link alsa_input.pci-0000_00_1b.0.analog-stereo:capture_FL audio-filter:input pw-link alsa_input.pci-0000_00_1b.0.analog-stereo:capture_FL audio-filter:input
pw-link audio-filter:output AEC_out:playback_MONO pw-link audio-filter:output AEC_out:playback_MONO
echo "Linked AEC"
pw-link AEC_sink:monitor_FL audio-filter:sink_FL
pw-link AEC_sink:monitor_FR audio-filter:sink_FR
echo "Linked sink to input"
pactl set-default-source AEC_out.monitor pactl set-default-source AEC_out.monitor
pactl set-default-sink AEC_sink