feat(users/edef/weave): use FxHashSet and dedupe early
Deduping early saves a fair bit of memory, but the extra hashing is costly. We switch to FxHash, since we don't need a DoS-proof hash, but we do need it to be *fast*. Change-Id: Ic6b7010874c417862baa9b882593208c8dd1d5e6 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12648 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
06d2536eec
commit
201d8f0cf2
4 changed files with 33 additions and 4 deletions
7
users/edef/weave/Cargo.lock
generated
7
users/edef/weave/Cargo.lock
generated
|
@ -1556,6 +1556,12 @@ version = "0.1.24"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
|
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-hash"
|
||||||
|
version = "2.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc_version"
|
name = "rustc_version"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
|
@ -2027,6 +2033,7 @@ dependencies = [
|
||||||
"nix-compat",
|
"nix-compat",
|
||||||
"polars",
|
"polars",
|
||||||
"rayon",
|
"rayon",
|
||||||
|
"rustc-hash",
|
||||||
"safer_owning_ref",
|
"safer_owning_ref",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -5446,6 +5446,21 @@ rec {
|
||||||
"rustc-dep-of-std" = [ "core" "compiler_builtins" ];
|
"rustc-dep-of-std" = [ "core" "compiler_builtins" ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
"rustc-hash" = rec {
|
||||||
|
crateName = "rustc-hash";
|
||||||
|
version = "2.0.0";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "0lni0lf846bzrf3jvci6jaf4142n1mdqxvcpczk5ch9pfgyk8c2q";
|
||||||
|
libName = "rustc_hash";
|
||||||
|
authors = [
|
||||||
|
"The Rust Project Developers"
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"default" = [ "std" ];
|
||||||
|
"rand" = [ "dep:rand" "std" ];
|
||||||
|
};
|
||||||
|
resolvedDefaultFeatures = [ "default" "std" ];
|
||||||
|
};
|
||||||
"rustc_version" = rec {
|
"rustc_version" = rec {
|
||||||
crateName = "rustc_version";
|
crateName = "rustc_version";
|
||||||
version = "0.4.1";
|
version = "0.4.1";
|
||||||
|
@ -6766,6 +6781,10 @@ rec {
|
||||||
name = "rayon";
|
name = "rayon";
|
||||||
packageId = "rayon";
|
packageId = "rayon";
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
name = "rustc-hash";
|
||||||
|
packageId = "rustc-hash";
|
||||||
|
}
|
||||||
{
|
{
|
||||||
name = "safer_owning_ref";
|
name = "safer_owning_ref";
|
||||||
packageId = "safer_owning_ref";
|
packageId = "safer_owning_ref";
|
||||||
|
|
|
@ -13,6 +13,7 @@ hashbrown = "0.14.3"
|
||||||
nix-compat = { version = "0.1.0", path = "../../../tvix/nix-compat" }
|
nix-compat = { version = "0.1.0", path = "../../../tvix/nix-compat" }
|
||||||
safer_owning_ref = "0.5.0"
|
safer_owning_ref = "0.5.0"
|
||||||
rayon = "1.8.1"
|
rayon = "1.8.1"
|
||||||
|
rustc-hash = "2.0.0"
|
||||||
|
|
||||||
[dependencies.polars]
|
[dependencies.polars]
|
||||||
version = "0.36.2"
|
version = "0.36.2"
|
||||||
|
|
|
@ -8,8 +8,9 @@
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use hashbrown::{hash_table, HashTable};
|
use hashbrown::{hash_table, HashTable};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
use rustc_hash::FxHashSet;
|
||||||
use std::{
|
use std::{
|
||||||
collections::{BTreeMap, HashSet},
|
collections::BTreeMap,
|
||||||
fs::File,
|
fs::File,
|
||||||
ops::Index,
|
ops::Index,
|
||||||
sync::atomic::{AtomicU32, Ordering},
|
sync::atomic::{AtomicU32, Ordering},
|
||||||
|
@ -54,7 +55,8 @@ fn main() -> Result<()> {
|
||||||
eprintln!("{DONE}");
|
eprintln!("{DONE}");
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut todo = HashSet::with_capacity(roots.len());
|
let mut todo = FxHashSet::default();
|
||||||
|
todo.reserve(roots.len());
|
||||||
{
|
{
|
||||||
let mut unknown_roots = 0usize;
|
let mut unknown_roots = 0usize;
|
||||||
for (_, idx) in roots.table {
|
for (_, idx) in roots.table {
|
||||||
|
@ -99,14 +101,14 @@ fn main() -> Result<()> {
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.flat_map(|&parent| {
|
.flat_map(|&parent| {
|
||||||
if parent == INDEX_NULL {
|
if parent == INDEX_NULL {
|
||||||
return vec![];
|
return FxHashSet::default();
|
||||||
}
|
}
|
||||||
|
|
||||||
ri_array[parent as usize]
|
ri_array[parent as usize]
|
||||||
.iter()
|
.iter()
|
||||||
.cloned()
|
.cloned()
|
||||||
.filter(|child| !seen.contains(child))
|
.filter(|child| !seen.contains(child))
|
||||||
.collect::<Vec<u32>>()
|
.collect::<FxHashSet<u32>>()
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue