feat(tvix/eval): Store hash in key of interner

Rather than storing the leaked allocation for the string as the key in
the interner, store the hash (using NoHashHashBuilder). I thought this
would improve performance, but it doesn't:

hello outpath           time:   [736.85 ms 748.42 ms 760.42 ms]
                        change: [-2.0754% +0.4798% +2.7096%] (p = 0.72 > 0.05)
                        No change in performance detected.

but it at least doesn't *hurt* performance, and it *does* avoid an
`unsafe`, so it's probably net good.

Change-Id: Ie413955bdb6f04b1f468f511e5ebce56e329fa37
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12049
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Autosubmit: aspen <root@gws.fyi>
This commit is contained in:
Aspen Smith 2024-07-28 12:50:09 -04:00 committed by clbot
parent a6d6fc418d
commit d378111d77
6 changed files with 69 additions and 7 deletions

7
tvix/Cargo.lock generated
View file

@ -2400,6 +2400,12 @@ dependencies = [
"zstd",
]
[[package]]
name = "nohash-hasher"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"
[[package]]
name = "nom"
version = "7.1.3"
@ -4907,6 +4913,7 @@ dependencies = [
"lexical-core",
"md-5",
"mimalloc",
"nohash-hasher",
"os_str_bytes",
"path-clean",
"pretty_assertions",

View file

@ -7525,6 +7525,19 @@ rec {
};
resolvedDefaultFeatures = [ "async" "default" "pin-project-lite" "tokio" "wire" ];
};
"nohash-hasher" = rec {
crateName = "nohash-hasher";
version = "0.2.0";
edition = "2018";
sha256 = "0lf4p6k01w4wm7zn4grnihzj8s7zd5qczjmzng7wviwxawih5x9b";
authors = [
"Parity Technologies <admin@parity.io>"
];
features = {
"default" = [ "std" ];
};
resolvedDefaultFeatures = [ "default" "std" ];
};
"nom" = rec {
crateName = "nom";
version = "7.1.3";
@ -16042,6 +16055,10 @@ rec {
name = "md-5";
packageId = "md-5";
}
{
name = "nohash-hasher";
packageId = "nohash-hasher";
}
{
name = "os_str_bytes";
packageId = "os_str_bytes";

View file

@ -35,6 +35,7 @@ sha1 = "0.10.6"
md-5 = "0.10.6"
data-encoding = "2.6.0"
rustc-hash = "2.0.0"
nohash-hasher = "0.2.0"
[dev-dependencies]
criterion = "0.5"

View file

@ -4,18 +4,20 @@
//! level, allowing us to shave off some memory overhead and only
//! paying the cost when creating new strings.
use bstr::{BStr, BString, ByteSlice, Chars};
use nohash_hasher::BuildNoHashHasher;
use rnix::ast;
use rustc_hash::{FxHashMap, FxHashSet};
use rustc_hash::FxHashSet;
use rustc_hash::FxHasher;
use std::alloc::dealloc;
use std::alloc::{alloc, handle_alloc_error, Layout};
use std::borrow::{Borrow, Cow};
use std::cell::RefCell;
use std::ffi::c_void;
use std::fmt::{self, Debug, Display};
use std::hash::Hash;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::ptr::{self, NonNull};
use std::{mem, slice};
use std::slice;
use serde::de::{Deserializer, Visitor};
use serde::{Deserialize, Serialize};
@ -399,22 +401,33 @@ impl NixStringInner {
#[derive(Default)]
struct InternerInner {
map: FxHashMap<&'static [u8], NonNull<c_void>>,
#[allow(clippy::disallowed_types)] // Not using the default hasher
map: std::collections::HashMap<u64, NonNull<c_void>, BuildNoHashHasher<u64>>,
#[cfg(feature = "no_leak")]
#[allow(clippy::disallowed_types)] // Not using the default hasher
interned_strings: FxHashSet<NonNull<c_void>>,
}
unsafe impl Send for InternerInner {}
fn hash<T>(s: T) -> u64
where
T: Hash,
{
let mut hasher = FxHasher::default();
s.hash(&mut hasher);
hasher.finish()
}
impl InternerInner {
pub fn intern(&mut self, s: &[u8]) -> NixString {
if let Some(s) = self.map.get(s) {
let hash = hash(s);
if let Some(s) = self.map.get(&hash) {
return NixString(*s);
}
let string = NixString::new_inner(s, None);
self.map
.insert(unsafe { mem::transmute(string.as_bytes()) }, string.0);
self.map.insert(hash, string.0);
#[cfg(feature = "no_leak")]
self.interned_strings.insert(string.0);
string

View file

@ -958,6 +958,12 @@ dependencies = [
"adler",
]
[[package]]
name = "nohash-hasher"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"
[[package]]
name = "nom8"
version = "0.2.0"
@ -1562,6 +1568,7 @@ dependencies = [
"lazy_static",
"lexical-core",
"md-5",
"nohash-hasher",
"os_str_bytes",
"path-clean",
"regex",

View file

@ -2939,6 +2939,19 @@ rec {
"simd-adler32" = [ "dep:simd-adler32" ];
};
};
"nohash-hasher" = rec {
crateName = "nohash-hasher";
version = "0.2.0";
edition = "2018";
sha256 = "0lf4p6k01w4wm7zn4grnihzj8s7zd5qczjmzng7wviwxawih5x9b";
authors = [
"Parity Technologies <admin@parity.io>"
];
features = {
"default" = [ "std" ];
};
resolvedDefaultFeatures = [ "default" "std" ];
};
"nom8" = rec {
crateName = "nom8";
version = "0.2.0";
@ -4635,6 +4648,10 @@ rec {
name = "md-5";
packageId = "md-5";
}
{
name = "nohash-hasher";
packageId = "nohash-hasher";
}
{
name = "os_str_bytes";
packageId = "os_str_bytes";