fix(tvix/eval): Replace inner NixString repr with Box<Bstr>

Storing a full BString here incurs the extra overhead of the capacity
for the inner byte-vector, which we basically never use as Nix strings
are immutable (and we don't do any mutation / sharing analysis).
Switching to a Box<BStr> cuts us from 72 bytes to 64 bytes per
string (and there are a lot of strings!)

Change-Id: I11f34c14a08fa02759f260b1c78b2a2b981714e4
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10794
Autosubmit: aspen <root@gws.fyi>
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Aspen Smith 2024-02-10 11:29:58 -05:00 committed by clbot
parent 24a089f87d
commit e3c92ac3b4
5 changed files with 92 additions and 65 deletions

View file

@ -3,7 +3,7 @@
//! See //tvix/eval/docs/builtins.md for a some context on the
//! available builtins in Nix.
use bstr::ByteVec;
use bstr::{ByteSlice, ByteVec};
use builtin_macros::builtins;
use genawaiter::rc::Gen;
use imbl::OrdMap;
@ -67,7 +67,7 @@ pub async fn coerce_value_to_path(
.await
{
Ok(vs) => {
let path = (**vs).clone().into_path_buf()?;
let path = vs.to_path()?.to_owned();
if path.is_absolute() {
Ok(Ok(path))
} else {
@ -82,7 +82,7 @@ pub async fn coerce_value_to_path(
mod pure_builtins {
use std::ffi::OsString;
use bstr::{BString, ByteSlice};
use bstr::{BString, ByteSlice, B};
use imbl::Vector;
use itertools::Itertools;
use os_str_bytes::OsStringBytes;
@ -171,26 +171,23 @@ mod pure_builtins {
#[builtin("baseNameOf")]
async fn builtin_base_name_of(co: GenCo, s: Value) -> Result<Value, ErrorKind> {
let span = generators::request_span(&co).await;
let mut s = match s {
val @ Value::Catchable(_) => return Ok(val),
_ => s
.coerce_to_string(
co,
CoercionKind {
strong: false,
import_paths: false,
},
span,
)
.await?
.to_contextful_str()?,
};
let s = s
.coerce_to_string(
co,
CoercionKind {
strong: false,
import_paths: false,
},
span,
)
.await?
.to_contextful_str()?;
let bs = s.as_mut_bstring();
let mut bs = (**s).to_owned();
if let Some(last_slash) = bs.rfind_char('/') {
*bs = bs[(last_slash + 1)..].into();
bs = bs[(last_slash + 1)..].into();
}
Ok(s.into())
Ok(NixString::new_inherit_context_from(&s, bs).into())
}
#[builtin("bitAnd")]
@ -344,7 +341,7 @@ mod pure_builtins {
.map(|last_slash| {
let x = &str[..last_slash];
if x.is_empty() {
b"/"
B("/")
} else {
x
}
@ -356,8 +353,7 @@ mod pure_builtins {
))))
} else {
Ok(Value::from(NixString::new_inherit_context_from(
&str,
result.into(),
&str, result,
)))
}
}
@ -1190,7 +1186,7 @@ mod pure_builtins {
// push the unmatched characters preceding the match
ret.push_back(Value::from(NixString::new_inherit_context_from(
&s,
(&text[pos..thematch.start()]).into(),
&text[pos..thematch.start()],
)));
// Push a list with one element for each capture
@ -1363,7 +1359,7 @@ mod pure_builtins {
Ok(Value::from(NixString::new_inherit_context_from(
&x,
(&x[beg..end]).into(),
&x[beg..end],
)))
}

View file

@ -146,7 +146,7 @@ impl NixContext {
// FIXME: when serializing, ignore the context?
#[derive(Clone, Debug, Serialize)]
pub struct NixString(BString, Option<NixContext>);
pub struct NixString(Box<BStr>, Option<NixContext>);
impl PartialEq for NixString {
fn eq(&self, other: &Self) -> bool {
@ -180,45 +180,66 @@ impl Ord for NixString {
}
}
impl From<Box<BStr>> for NixString {
fn from(value: Box<BStr>) -> Self {
Self(value, None)
}
}
impl From<BString> for NixString {
fn from(value: BString) -> Self {
Self(Vec::<u8>::from(value).into_boxed_slice().into(), None)
}
}
impl From<&BStr> for NixString {
fn from(value: &BStr) -> Self {
Self(value.to_owned(), None)
value.to_owned().into()
}
}
impl From<&[u8]> for NixString {
fn from(value: &[u8]) -> Self {
Self(value.into(), None)
Self::from(value.to_owned())
}
}
impl From<Vec<u8>> for NixString {
fn from(value: Vec<u8>) -> Self {
value.into_boxed_slice().into()
}
}
impl From<Box<[u8]>> for NixString {
fn from(value: Box<[u8]>) -> Self {
Self(value.into(), None)
}
}
impl From<&str> for NixString {
fn from(s: &str) -> Self {
Self(s.as_bytes().into(), None)
s.as_bytes().into()
}
}
impl From<String> for NixString {
fn from(s: String) -> Self {
Self(s.into(), None)
s.into_bytes().into()
}
}
impl From<(String, Option<NixContext>)> for NixString {
fn from((s, ctx): (String, Option<NixContext>)) -> Self {
NixString(s.into(), ctx)
impl<T> From<(T, Option<NixContext>)> for NixString
where
NixString: From<T>,
{
fn from((s, ctx): (T, Option<NixContext>)) -> Self {
NixString(NixString::from(s).0, ctx)
}
}
impl From<Box<str>> for NixString {
fn from(s: Box<str>) -> Self {
Self(s.into_boxed_bytes().into_vec().into(), None)
s.into_boxed_bytes().into()
}
}
@ -234,12 +255,18 @@ impl<'a> From<&'a NixString> for &'a BStr {
}
}
impl From<NixString> for BString {
impl From<NixString> for Box<BStr> {
fn from(s: NixString) -> Self {
s.0
}
}
impl From<NixString> for BString {
fn from(s: NixString) -> Self {
s.0.to_vec().into()
}
}
impl AsRef<[u8]> for NixString {
fn as_ref(&self) -> &[u8] {
&self.0
@ -292,7 +319,7 @@ impl<'de> Deserialize<'de> for NixString {
}
impl Deref for NixString {
type Target = BString;
type Target = BStr;
fn deref(&self) -> &Self::Target {
&self.0
@ -317,13 +344,19 @@ mod arbitrary {
}
impl NixString {
pub fn new_inherit_context_from(other: &NixString, new_contents: BString) -> Self {
Self(new_contents, other.1.clone())
pub fn new_inherit_context_from<T>(other: &NixString, new_contents: T) -> Self
where
NixString: From<T>,
{
Self(Self::from(new_contents).0, other.1.clone())
}
pub fn new_context_from(context: NixContext, contents: BString) -> Self {
pub fn new_context_from<T>(context: NixContext, contents: T) -> Self
where
NixString: From<T>,
{
Self(
contents,
Self::from(contents).0,
if context.is_empty() {
None
} else {
@ -332,12 +365,8 @@ impl NixString {
)
}
pub fn as_mut_bstring(&mut self) -> &mut BString {
&mut self.0
}
pub fn as_bstr(&self) -> &BStr {
BStr::new(self.as_bytes())
self
}
pub fn as_bytes(&self) -> &[u8] {
@ -345,7 +374,7 @@ impl NixString {
}
pub fn into_bstring(self) -> BString {
self.0
(*self.0).to_owned()
}
/// Return a displayable representation of the string as an
@ -378,7 +407,7 @@ impl NixString {
pub fn concat(&self, other: &Self) -> Self {
let mut s = self.to_vec();
s.extend(other.0.as_slice());
s.extend(&(***other));
let context = [&self.1, &other.1]
.into_iter()
@ -386,7 +415,7 @@ impl NixString {
.fold(NixContext::new(), |acc_ctx, new_ctx| {
acc_ctx.join(&mut new_ctx.clone())
});
Self::new_context_from(context, s.into())
Self::new_context_from(context, s)
}
pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> {
@ -514,6 +543,11 @@ mod tests {
use crate::properties::{eq_laws, hash_laws, ord_laws};
#[test]
fn size() {
assert_eq!(std::mem::size_of::<NixString>(), 64);
}
eq_laws!(NixString);
hash_laws!(NixString);
ord_laws!(NixString);

View file

@ -558,7 +558,7 @@ where
return frame.error(
self,
ErrorKind::AttributeNotFound {
name: (**key).clone().into_string_lossy()
name: key.to_str_lossy().into_owned()
},
);
}

View file

@ -125,7 +125,7 @@ pub(crate) mod derivation_builtins {
use std::collections::BTreeMap;
use super::*;
use bstr::{ByteSlice, ByteVec};
use bstr::ByteSlice;
use nix_compat::store_path::hash_placeholder;
use tvix_eval::generators::Gen;
use tvix_eval::{NixContext, NixContextElement, NixString};
@ -251,7 +251,7 @@ pub(crate) mod derivation_builtins {
Err(cek) => return Ok(Value::Catchable(cek)),
Ok(s) => {
input_context.mimic(&s);
drv.arguments.push((**s).clone().into_string()?)
drv.arguments.push(s.to_str()?.to_owned())
}
}
}
@ -280,14 +280,14 @@ pub(crate) mod derivation_builtins {
// Populate drv.outputs
if drv
.outputs
.insert((**output_name).clone().into_string()?, Default::default())
.insert(output_name.to_str()?.to_owned(), Default::default())
.is_some()
{
Err(DerivationError::DuplicateOutput(
(**output_name).clone().into_string_lossy(),
output_name.to_str_lossy().into_owned(),
))?
}
output_names.push((**output_name).clone().into_string()?);
output_names.push(output_name.to_str()?.to_owned());
}
// Add drv.environment[outputs] unconditionally.
@ -304,9 +304,9 @@ pub(crate) mod derivation_builtins {
input_context.mimic(&val_str);
if arg_name == "builder" {
drv.builder = (**val_str).clone().into_string()?;
drv.builder = val_str.to_str()?.to_owned();
} else {
drv.system = (**val_str).clone().into_string()?;
drv.system = val_str.to_str()?.to_owned();
}
// Either populate drv.environment or structured_attrs.
@ -314,7 +314,7 @@ pub(crate) mod derivation_builtins {
// No need to check for dups, we only iterate over every attribute name once
structured_attrs.insert(
arg_name.to_owned(),
(**val_str).clone().into_string()?.into(),
val_str.to_str()?.to_owned().into(),
);
} else {
insert_env(&mut drv, arg_name, val_str.as_bytes().into())?;
@ -373,7 +373,7 @@ pub(crate) mod derivation_builtins {
if let Some(attr) = attrs.select(key) {
match strong_importing_coerce_to_string(co, attr.clone()).await {
Err(cek) => return Ok(Err(cek)),
Ok(str) => return Ok(Ok(Some((**str).clone().into_string()?))),
Ok(str) => return Ok(Ok(Some(str.to_str()?.to_owned()))),
}
}
@ -520,7 +520,7 @@ pub(crate) mod derivation_builtins {
nix_compat::store_path::build_text_path(name.to_str()?, &content, content.iter_plain())
.map_err(|_e| {
nix_compat::derivation::DerivationError::InvalidOutputName(
(**name).clone().into_string_lossy(),
name.to_str_lossy().into_owned(),
)
})
.map_err(DerivationError::InvalidDerivation)?
@ -530,9 +530,6 @@ pub(crate) mod derivation_builtins {
// TODO: actually persist the file in the store at that path ...
Ok(Value::from(NixString::new_context_from(
context,
path.into(),
)))
Ok(Value::from(NixString::new_context_from(context, path)))
}
}

View file

@ -297,7 +297,7 @@ impl EvalIO for TvixStoreIO {
mod tests {
use std::{path::Path, rc::Rc, sync::Arc};
use bstr::ByteVec;
use bstr::ByteSlice;
use tempfile::TempDir;
use tvix_build::buildservice::DummyBuildService;
use tvix_castore::{
@ -356,7 +356,7 @@ mod tests {
let value = result.value.expect("must be some");
match value {
tvix_eval::Value::String(s) => Some((***s).clone().into_string_lossy()),
tvix_eval::Value::String(s) => Some(s.to_str_lossy().into_owned()),
_ => panic!("unexpected value type: {:?}", value),
}
}