feat(tvix/eval): Intern (and leak) small strings, behind a mutex
This is the most naive version of string interning possible - we store a map from the string itself to the pointer behind a global mutex, and memoize the allocation of all strings below a threshold length (16 bytes, for now) into that map. This requires leaking /all/ strings, since it's not easy to know just from the pointer that a string has been interned - so interning is disabled if string leaking is also disabled. In the case where we're leaking strings (the default), even the naive version of this gets us a pretty nice perfomance boost: hello outpath time: [742.54 ms 745.89 ms 749.14 ms] change: [-2.8722% -2.0135% -1.0654%] (p = 0.00 < 0.05) Performance has improved. However, in the case where we're not leaking strings, we have to keep track of which strings have and haven't been interned, which makes this a little worse: hello outpath time: [779.30 ms 792.82 ms 808.74 ms] change: [+2.5258% +4.0884% +5.8931%] (p = 0.00 < 0.05) Performance has regressed. Hopefully we can close the gap here a bit with some clever tricks (coming next). Change-Id: If08cb48ede703c7fe3bdd8d617443f8a561ad09b Reviewed-on: https://cl.tvl.fyi/c/depot/+/12047 Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de> Autosubmit: aspen <root@gws.fyi>
This commit is contained in:
parent
b8f92a6d53
commit
6366cee717
1 changed files with 82 additions and 5 deletions
|
@ -4,16 +4,19 @@
|
||||||
//! level, allowing us to shave off some memory overhead and only
|
//! level, allowing us to shave off some memory overhead and only
|
||||||
//! paying the cost when creating new strings.
|
//! paying the cost when creating new strings.
|
||||||
use bstr::{BStr, BString, ByteSlice, Chars};
|
use bstr::{BStr, BString, ByteSlice, Chars};
|
||||||
|
use lazy_static::lazy_static;
|
||||||
use rnix::ast;
|
use rnix::ast;
|
||||||
use rustc_hash::FxHashSet;
|
use rustc_hash::{FxHashMap, FxHashSet};
|
||||||
use std::alloc::{alloc, dealloc, handle_alloc_error, Layout};
|
use std::alloc::dealloc;
|
||||||
|
use std::alloc::{alloc, handle_alloc_error, Layout};
|
||||||
use std::borrow::{Borrow, Cow};
|
use std::borrow::{Borrow, Cow};
|
||||||
use std::ffi::c_void;
|
use std::ffi::c_void;
|
||||||
use std::fmt::{self, Debug, Display};
|
use std::fmt::{self, Debug, Display};
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::ptr::{self, NonNull};
|
use std::ptr::{self, NonNull};
|
||||||
use std::slice;
|
use std::sync::Mutex;
|
||||||
|
use std::{mem, slice};
|
||||||
|
|
||||||
use serde::de::{Deserializer, Visitor};
|
use serde::de::{Deserializer, Visitor};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
@ -395,6 +398,48 @@ impl NixStringInner {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct InternerInner {
|
||||||
|
map: FxHashMap<&'static [u8], NonNull<c_void>>,
|
||||||
|
#[cfg(feature = "no_leak")]
|
||||||
|
interned_strings: FxHashSet<NonNull<c_void>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Send for InternerInner {}
|
||||||
|
|
||||||
|
impl InternerInner {
|
||||||
|
pub fn intern(&mut self, s: &[u8]) -> NixString {
|
||||||
|
if let Some(s) = self.map.get(s) {
|
||||||
|
return NixString(*s);
|
||||||
|
}
|
||||||
|
|
||||||
|
let string = NixString::new_inner(s, None);
|
||||||
|
self.map
|
||||||
|
.insert(unsafe { mem::transmute(string.as_bytes()) }, string.0);
|
||||||
|
#[cfg(feature = "no_leak")]
|
||||||
|
self.interned_strings.insert(string.0);
|
||||||
|
string
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct Interner(Mutex<InternerInner>);
|
||||||
|
|
||||||
|
impl Interner {
|
||||||
|
pub fn intern(&self, s: &[u8]) -> NixString {
|
||||||
|
self.0.lock().unwrap().intern(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "no_leak")]
|
||||||
|
pub fn is_interned_string(&self, string: &NixString) -> bool {
|
||||||
|
self.0.lock().unwrap().interned_strings.contains(&string.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref INTERNER: Interner = Interner::default();
|
||||||
|
}
|
||||||
|
|
||||||
/// Nix string values
|
/// Nix string values
|
||||||
///
|
///
|
||||||
/// # Internals
|
/// # Internals
|
||||||
|
@ -414,8 +459,20 @@ unsafe impl Send for NixString {}
|
||||||
unsafe impl Sync for NixString {}
|
unsafe impl Sync for NixString {}
|
||||||
|
|
||||||
impl Drop for NixString {
|
impl Drop for NixString {
|
||||||
|
#[cfg(not(feature = "no_leak"))]
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if !cfg!(feature = "no_leak") {
|
if self.context().is_some() {
|
||||||
|
// SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct
|
||||||
|
// according to the rules of dealloc
|
||||||
|
unsafe {
|
||||||
|
NixStringInner::dealloc(self.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "no_leak")]
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if INTERNER.is_interned_string(self) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -458,7 +515,7 @@ impl Debug for NixString {
|
||||||
|
|
||||||
impl PartialEq for NixString {
|
impl PartialEq for NixString {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
self.as_bstr() == other.as_bstr()
|
self.0 == other.0 || self.as_bstr() == other.as_bstr()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -484,6 +541,9 @@ impl PartialOrd for NixString {
|
||||||
|
|
||||||
impl Ord for NixString {
|
impl Ord for NixString {
|
||||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||||
|
if self.0 == other.0 {
|
||||||
|
return std::cmp::Ordering::Equal;
|
||||||
|
}
|
||||||
self.as_bstr().cmp(other.as_bstr())
|
self.as_bstr().cmp(other.as_bstr())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -647,6 +707,9 @@ mod arbitrary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set non-scientifically. TODO(aspen): think more about what this should be
|
||||||
|
const INTERN_THRESHOLD: usize = 32;
|
||||||
|
|
||||||
impl NixString {
|
impl NixString {
|
||||||
fn new(contents: &[u8], context: Option<Box<NixContext>>) -> Self {
|
fn new(contents: &[u8], context: Option<Box<NixContext>>) -> Self {
|
||||||
debug_assert!(
|
debug_assert!(
|
||||||
|
@ -654,6 +717,20 @@ impl NixString {
|
||||||
"BUG: initialized with empty context"
|
"BUG: initialized with empty context"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if !cfg!(feature = "no_leak") /* It's only safe to intern if we leak strings, since there's
|
||||||
|
* nothing yet preventing interned strings from getting freed
|
||||||
|
* (and then used by other copies) otherwise
|
||||||
|
*/
|
||||||
|
&& contents.len() <= INTERN_THRESHOLD
|
||||||
|
&& context.is_none()
|
||||||
|
{
|
||||||
|
return INTERNER.intern(contents);
|
||||||
|
}
|
||||||
|
|
||||||
|
Self::new_inner(contents, context)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_inner(contents: &[u8], context: Option<Box<NixContext>>) -> Self {
|
||||||
// SAFETY: We're always fully initializing a NixString here:
|
// SAFETY: We're always fully initializing a NixString here:
|
||||||
//
|
//
|
||||||
// 1. NixStringInner::alloc sets up the len for us
|
// 1. NixStringInner::alloc sets up the len for us
|
||||||
|
|
Loading…
Add table
Reference in a new issue