diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 9bea027e4..e92803e99 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -2400,6 +2400,12 @@ dependencies = [ "zstd", ] +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "7.1.3" @@ -4907,6 +4913,7 @@ dependencies = [ "lexical-core", "md-5", "mimalloc", + "nohash-hasher", "os_str_bytes", "path-clean", "pretty_assertions", diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 9df3c8af8..33805ced0 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -7525,6 +7525,19 @@ rec { }; resolvedDefaultFeatures = [ "async" "default" "pin-project-lite" "tokio" "wire" ]; }; + "nohash-hasher" = rec { + crateName = "nohash-hasher"; + version = "0.2.0"; + edition = "2018"; + sha256 = "0lf4p6k01w4wm7zn4grnihzj8s7zd5qczjmzng7wviwxawih5x9b"; + authors = [ + "Parity Technologies " + ]; + features = { + "default" = [ "std" ]; + }; + resolvedDefaultFeatures = [ "default" "std" ]; + }; "nom" = rec { crateName = "nom"; version = "7.1.3"; @@ -16042,6 +16055,10 @@ rec { name = "md-5"; packageId = "md-5"; } + { + name = "nohash-hasher"; + packageId = "nohash-hasher"; + } { name = "os_str_bytes"; packageId = "os_str_bytes"; diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index 9637165e8..70a356376 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -35,6 +35,7 @@ sha1 = "0.10.6" md-5 = "0.10.6" data-encoding = "2.6.0" rustc-hash = "2.0.0" +nohash-hasher = "0.2.0" [dev-dependencies] criterion = "0.5" diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index 0365f5a9b..247d23a7c 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -4,18 +4,20 @@ //! level, allowing us to shave off some memory overhead and only //! paying the cost when creating new strings. use bstr::{BStr, BString, ByteSlice, Chars}; +use nohash_hasher::BuildNoHashHasher; use rnix::ast; -use rustc_hash::{FxHashMap, FxHashSet}; +use rustc_hash::FxHashSet; +use rustc_hash::FxHasher; use std::alloc::dealloc; use std::alloc::{alloc, handle_alloc_error, Layout}; use std::borrow::{Borrow, Cow}; use std::cell::RefCell; use std::ffi::c_void; use std::fmt::{self, Debug, Display}; -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use std::ops::Deref; use std::ptr::{self, NonNull}; -use std::{mem, slice}; +use std::slice; use serde::de::{Deserializer, Visitor}; use serde::{Deserialize, Serialize}; @@ -399,22 +401,33 @@ impl NixStringInner { #[derive(Default)] struct InternerInner { - map: FxHashMap<&'static [u8], NonNull>, + #[allow(clippy::disallowed_types)] // Not using the default hasher + map: std::collections::HashMap, BuildNoHashHasher>, #[cfg(feature = "no_leak")] + #[allow(clippy::disallowed_types)] // Not using the default hasher interned_strings: FxHashSet>, } unsafe impl Send for InternerInner {} +fn hash(s: T) -> u64 +where + T: Hash, +{ + let mut hasher = FxHasher::default(); + s.hash(&mut hasher); + hasher.finish() +} + impl InternerInner { pub fn intern(&mut self, s: &[u8]) -> NixString { - if let Some(s) = self.map.get(s) { + let hash = hash(s); + if let Some(s) = self.map.get(&hash) { return NixString(*s); } let string = NixString::new_inner(s, None); - self.map - .insert(unsafe { mem::transmute(string.as_bytes()) }, string.0); + self.map.insert(hash, string.0); #[cfg(feature = "no_leak")] self.interned_strings.insert(string.0); string diff --git a/web/tvixbolt/Cargo.lock b/web/tvixbolt/Cargo.lock index 82162469d..c12647b97 100644 --- a/web/tvixbolt/Cargo.lock +++ b/web/tvixbolt/Cargo.lock @@ -958,6 +958,12 @@ dependencies = [ "adler", ] +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom8" version = "0.2.0" @@ -1562,6 +1568,7 @@ dependencies = [ "lazy_static", "lexical-core", "md-5", + "nohash-hasher", "os_str_bytes", "path-clean", "regex", diff --git a/web/tvixbolt/Cargo.nix b/web/tvixbolt/Cargo.nix index a2ad6cc33..68c34901a 100644 --- a/web/tvixbolt/Cargo.nix +++ b/web/tvixbolt/Cargo.nix @@ -2939,6 +2939,19 @@ rec { "simd-adler32" = [ "dep:simd-adler32" ]; }; }; + "nohash-hasher" = rec { + crateName = "nohash-hasher"; + version = "0.2.0"; + edition = "2018"; + sha256 = "0lf4p6k01w4wm7zn4grnihzj8s7zd5qczjmzng7wviwxawih5x9b"; + authors = [ + "Parity Technologies " + ]; + features = { + "default" = [ "std" ]; + }; + resolvedDefaultFeatures = [ "default" "std" ]; + }; "nom8" = rec { crateName = "nom8"; version = "0.2.0"; @@ -4635,6 +4648,10 @@ rec { name = "md-5"; packageId = "md-5"; } + { + name = "nohash-hasher"; + packageId = "nohash-hasher"; + } { name = "os_str_bytes"; packageId = "os_str_bytes";