feat(tvix/eval): Forbid Hash{Map,Set}, use Fx instead

Per https://nnethercote.github.io/perf-book/hashing.html, we have
basically no reason to use the default hasher over a faster,
non-DoS-resistant hasher. This gives a nice perf boost basically for
free:

hello outpath           time:   [704.76 ms 714.91 ms 725.63 ms]
                        change: [-7.2391% -6.1018% -4.9189%] (p = 0.00 < 0.05)
                        Performance has improved.

Change-Id: If5587f444ed3af69f8af4eead6af3ea303b4ae68
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12046
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Reviewed-by: Ilan Joselevich <personal@ilanjoselevich.com>
Autosubmit: aspen <root@gws.fyi>
This commit is contained in:
Aspen Smith 2024-07-28 12:11:41 -04:00 committed by clbot
parent 1d7ba89c19
commit b8f92a6d53
17 changed files with 116 additions and 46 deletions

14
tvix/Cargo.lock generated
View file

@ -3085,7 +3085,7 @@ dependencies = [
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustc-hash 1.1.0",
"rustls 0.23.7",
"thiserror",
"tokio",
@ -3101,7 +3101,7 @@ dependencies = [
"bytes",
"rand",
"ring",
"rustc-hash",
"rustc-hash 1.1.0",
"rustls 0.23.7",
"slab",
"thiserror",
@ -3416,7 +3416,7 @@ dependencies = [
"countme",
"hashbrown 0.14.3",
"memoffset 0.9.0",
"rustc-hash",
"rustc-hash 1.1.0",
"text-size",
]
@ -3473,6 +3473,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc-hash"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
[[package]]
name = "rustc_version"
version = "0.4.0"
@ -4864,6 +4870,7 @@ dependencies = [
"nix-compat",
"rnix",
"rowan",
"rustc-hash 2.0.0",
"rustyline",
"smol_str",
"thiserror",
@ -4905,6 +4912,7 @@ dependencies = [
"rnix",
"rowan",
"rstest",
"rustc-hash 2.0.0",
"serde",
"serde_json",
"sha1",

View file

@ -9701,7 +9701,7 @@ rec {
}
{
name = "rustc-hash";
packageId = "rustc-hash";
packageId = "rustc-hash 1.1.0";
}
{
name = "rustls";
@ -9768,7 +9768,7 @@ rec {
}
{
name = "rustc-hash";
packageId = "rustc-hash";
packageId = "rustc-hash 1.1.0";
}
{
name = "rustls";
@ -10901,7 +10901,7 @@ rec {
}
{
name = "rustc-hash";
packageId = "rustc-hash";
packageId = "rustc-hash 1.1.0";
}
{
name = "text-size";
@ -11051,7 +11051,7 @@ rec {
"rustc-dep-of-std" = [ "core" "compiler_builtins" ];
};
};
"rustc-hash" = rec {
"rustc-hash 1.1.0" = rec {
crateName = "rustc-hash";
version = "1.1.0";
edition = "2015";
@ -11064,6 +11064,20 @@ rec {
};
resolvedDefaultFeatures = [ "default" "std" ];
};
"rustc-hash 2.0.0" = rec {
crateName = "rustc-hash";
version = "2.0.0";
edition = "2021";
sha256 = "0lni0lf846bzrf3jvci6jaf4142n1mdqxvcpczk5ch9pfgyk8c2q";
authors = [
"The Rust Project Developers"
];
features = {
"default" = [ "std" ];
"rand" = [ "dep:rand" "std" ];
};
resolvedDefaultFeatures = [ "default" "std" ];
};
"rustc_version" = rec {
crateName = "rustc_version";
version = "0.4.0";
@ -15892,6 +15906,10 @@ rec {
name = "rowan";
packageId = "rowan";
}
{
name = "rustc-hash";
packageId = "rustc-hash 2.0.0";
}
{
name = "rustyline";
packageId = "rustyline";
@ -16049,6 +16067,10 @@ rec {
name = "rowan";
packageId = "rowan";
}
{
name = "rustc-hash";
packageId = "rustc-hash 2.0.0";
}
{
name = "serde";
packageId = "serde";

View file

@ -26,6 +26,7 @@ thiserror = "1.0.38"
tokio = "1.28.0"
tracing = "0.1.40"
tracing-indicatif = "0.3.6"
rustc-hash = "2.0.0"
[dependencies.wu-manber]
git = "https://github.com/tvlfyi/wu-manber.git"

View file

@ -1,5 +1,7 @@
use std::{collections::HashMap, path::PathBuf, rc::Rc};
use std::path::PathBuf;
use std::rc::Rc;
use rustc_hash::FxHashMap;
use smol_str::SmolStr;
use std::fmt::Write;
use tracing::{instrument, Span};
@ -86,7 +88,7 @@ pub fn evaluate(
path: Option<PathBuf>,
args: &Args,
allow_incomplete: AllowIncomplete,
env: Option<&HashMap<SmolStr, Value>>,
env: Option<&FxHashMap<SmolStr, Value>>,
globals: Option<Rc<GlobalsMap>>,
source_map: Option<SourceCode>,
) -> Result<EvalResult, IncompleteInput> {
@ -218,7 +220,7 @@ pub fn interpret(
args: &Args,
explain: bool,
allow_incomplete: AllowIncomplete,
env: Option<&HashMap<SmolStr, Value>>,
env: Option<&FxHashMap<SmolStr, Value>>,
globals: Option<Rc<GlobalsMap>>,
source_map: Option<SourceCode>,
) -> Result<InterpretResult, IncompleteInput> {

View file

@ -1,6 +1,7 @@
use std::path::PathBuf;
use std::rc::Rc;
use std::{collections::HashMap, path::PathBuf};
use rustc_hash::FxHashMap;
use rustyline::{error::ReadlineError, Editor};
use smol_str::SmolStr;
use tvix_eval::{GlobalsMap, SourceCode, Value};
@ -88,7 +89,7 @@ pub struct Repl<'a> {
multiline_input: Option<String>,
rl: Editor<()>,
/// Local variables defined at the top-level in the repl
env: HashMap<SmolStr, Value>,
env: FxHashMap<SmolStr, Value>,
io_handle: Rc<TvixStoreIO>,
args: &'a Args,
@ -102,7 +103,7 @@ impl<'a> Repl<'a> {
Self {
multiline_input: None,
rl,
env: HashMap::new(),
env: FxHashMap::default(),
io_handle,
args,
source_map: Default::default(),

View file

@ -34,6 +34,7 @@ sha2 = "0.10.8"
sha1 = "0.10.6"
md-5 = "0.10.6"
data-encoding = "2.6.0"
rustc-hash = "2.0.0"
[dev-dependencies]
criterion = "0.5"

3
tvix/eval/clippy.toml Normal file
View file

@ -0,0 +1,3 @@
# See https://nnethercote.github.io/perf-book/hashing.html. Use FxHashMap and
# FxHashSet, not HashMap and HashSet
disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"]

View file

@ -9,8 +9,8 @@ use genawaiter::rc::Gen;
use imbl::OrdMap;
use regex::Regex;
use std::cmp::{self, Ordering};
use std::collections::BTreeMap;
use std::collections::VecDeque;
use std::collections::{BTreeMap, HashSet};
use std::path::PathBuf;
use crate::arithmetic_op;
@ -88,6 +88,7 @@ mod pure_builtins {
use imbl::Vector;
use itertools::Itertools;
use os_str_bytes::OsStringBytes;
use rustc_hash::FxHashSet;
use crate::{value::PointerEquality, AddContext, NixContext, NixContextElement};
@ -700,7 +701,7 @@ mod pure_builtins {
//
// In this implementation, we do none of that, no syntax checks, no realization.
// The next `TODO` are the checks that Nix implements.
let mut ctx_elements: HashSet<NixContextElement> = HashSet::new();
let mut ctx_elements: FxHashSet<NixContextElement> = FxHashSet::default();
let span = generators::request_span(&co).await;
let origin = origin
.coerce_to_string(
@ -1115,7 +1116,7 @@ mod pure_builtins {
.to_list()?
.into_iter()
.map(|v| v.to_str())
.collect::<Result<HashSet<_>, _>>()?;
.collect::<Result<FxHashSet<_>, _>>()?;
let res = attrs.iter().filter_map(|(k, v)| {
if !keys.contains(k) {
Some((k.clone(), v.clone()))

View file

@ -560,7 +560,7 @@ impl Compiler<'_, '_> {
/// Emit definitions for all variables in the top-level global env passed to the evaluation (eg
/// local variables in the REPL)
pub(super) fn compile_env(&mut self, env: &HashMap<SmolStr, Value>) {
pub(super) fn compile_env(&mut self, env: &FxHashMap<SmolStr, Value>) {
for (name, value) in env {
self.scope_mut().declare_constant(name.to_string());
self.emit_constant(value.clone(), &EntireFile);

View file

@ -20,8 +20,9 @@ mod scope;
use codemap::Span;
use rnix::ast::{self, AstToken};
use rustc_hash::FxHashMap;
use smol_str::SmolStr;
use std::collections::{BTreeMap, HashMap};
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::rc::{Rc, Weak};
@ -117,7 +118,7 @@ impl TrackedFormal {
/// The map of globally available functions and other values that
/// should implicitly be resolvable in the global scope.
pub type GlobalsMap = HashMap<&'static str, Value>;
pub type GlobalsMap = FxHashMap<&'static str, Value>;
/// Set of builtins that (if they exist) should be made available in
/// the global scope, meaning that they can be accessed not just
@ -187,7 +188,7 @@ impl<'source, 'observer> Compiler<'source, 'observer> {
pub(crate) fn new(
location: Option<PathBuf>,
globals: Rc<GlobalsMap>,
env: Option<&HashMap<SmolStr, Value>>,
env: Option<&FxHashMap<SmolStr, Value>>,
source: &'source SourceCode,
file: &'source codemap::File,
observer: &'observer mut dyn CompilerObserver,
@ -1588,7 +1589,7 @@ pub fn prepare_globals(
Rc::new_cyclic(Box::new(move |weak: &Weak<GlobalsMap>| {
// First step is to construct the builtins themselves as
// `NixAttrs`.
let mut builtins: GlobalsMap = HashMap::from_iter(builtins);
let mut builtins: GlobalsMap = FxHashMap::from_iter(builtins);
// At this point, optionally insert `import` if enabled. To
// "tie the knot" of `import` needing the full set of globals
@ -1601,7 +1602,7 @@ pub fn prepare_globals(
// Next, the actual map of globals which the compiler will use
// to resolve identifiers is constructed.
let mut globals: GlobalsMap = HashMap::new();
let mut globals: GlobalsMap = FxHashMap::default();
// builtins contain themselves (`builtins.builtins`), which we
// can resolve by manually constructing a suspended thunk that
@ -1654,7 +1655,7 @@ pub fn compile(
expr: &ast::Expr,
location: Option<PathBuf>,
globals: Rc<GlobalsMap>,
env: Option<&HashMap<SmolStr, Value>>,
env: Option<&FxHashMap<SmolStr, Value>>,
source: &SourceCode,
file: &codemap::File,
observer: &mut dyn CompilerObserver,

View file

@ -10,10 +10,8 @@
//! stack indices. To do this, the compiler simulates where locals
//! will be at runtime using the data structures implemented here.
use std::{
collections::{hash_map, HashMap},
ops::Index,
};
use rustc_hash::FxHashMap;
use std::{collections::hash_map, ops::Index};
use smol_str::SmolStr;
@ -168,7 +166,7 @@ pub struct Scope {
pub upvalues: Vec<Upvalue>,
/// Secondary by-name index over locals.
by_name: HashMap<String, ByName>,
by_name: FxHashMap<String, ByName>,
/// How many scopes "deep" are these locals?
scope_depth: usize,

View file

@ -36,7 +36,7 @@ mod test_utils;
#[cfg(test)]
mod tests;
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use std::path::PathBuf;
use std::rc::Rc;
use std::str::FromStr;
@ -86,7 +86,7 @@ enum BuilderGlobals {
pub struct EvaluationBuilder<'co, 'ro, 'env, IO> {
source_map: Option<SourceCode>,
globals: BuilderGlobals,
env: Option<&'env HashMap<SmolStr, Value>>,
env: Option<&'env FxHashMap<SmolStr, Value>>,
io_handle: IO,
enable_import: bool,
strict: bool,
@ -264,7 +264,7 @@ impl<'co, 'ro, 'env, IO> EvaluationBuilder<'co, 'ro, 'env, IO> {
Self { nix_path, ..self }
}
pub fn env(self, env: Option<&'env HashMap<SmolStr, Value>>) -> Self {
pub fn env(self, env: Option<&'env FxHashMap<SmolStr, Value>>) -> Self {
Self { env, ..self }
}
@ -352,7 +352,7 @@ pub struct Evaluation<'co, 'ro, 'env, IO> {
globals: Rc<GlobalsMap>,
/// Top-level variables to define in the evaluation
env: Option<&'env HashMap<SmolStr, Value>>,
env: Option<&'env FxHashMap<SmolStr, Value>>,
/// Implementation of file-IO to use during evaluation, e.g. for
/// impure builtins.
@ -570,7 +570,7 @@ fn parse_compile_internal(
location: Option<PathBuf>,
source: SourceCode,
globals: Rc<GlobalsMap>,
env: Option<&HashMap<SmolStr, Value>>,
env: Option<&FxHashMap<SmolStr, Value>>,
compiler_observer: &mut dyn CompilerObserver,
) -> Option<Rc<Lambda>> {
let parsed = rnix::ast::Root::parse(code);

View file

@ -5,9 +5,9 @@
//! paying the cost when creating new strings.
use bstr::{BStr, BString, ByteSlice, Chars};
use rnix::ast;
use rustc_hash::FxHashSet;
use std::alloc::{alloc, dealloc, handle_alloc_error, Layout};
use std::borrow::{Borrow, Cow};
use std::collections::HashSet;
use std::ffi::c_void;
use std::fmt::{self, Debug, Display};
use std::hash::Hash;
@ -40,23 +40,29 @@ pub enum NixContextElement {
/// operations, e.g. concatenation, interpolation and other string operations.
#[repr(transparent)]
#[derive(Clone, Debug, Serialize, Default)]
pub struct NixContext(HashSet<NixContextElement>);
pub struct NixContext(FxHashSet<NixContextElement>);
impl From<NixContextElement> for NixContext {
fn from(value: NixContextElement) -> Self {
Self([value].into())
let mut set = FxHashSet::default();
set.insert(value);
Self(set)
}
}
impl From<HashSet<NixContextElement>> for NixContext {
fn from(value: HashSet<NixContextElement>) -> Self {
impl From<FxHashSet<NixContextElement>> for NixContext {
fn from(value: FxHashSet<NixContextElement>) -> Self {
Self(value)
}
}
impl<const N: usize> From<[NixContextElement; N]> for NixContext {
fn from(value: [NixContextElement; N]) -> Self {
Self(HashSet::from(value))
let mut set = FxHashSet::default();
for elt in value {
set.insert(elt);
}
Self(set)
}
}

View file

@ -18,9 +18,9 @@
//! object, but when forcing a thunk, the runtime *must* mutate the
//! memoisable slot.
use rustc_hash::FxHashSet;
use std::{
cell::{Ref, RefCell, RefMut},
collections::HashSet,
fmt::Debug,
rc::Rc,
};
@ -413,7 +413,7 @@ impl TotalDisplay for Thunk {
/// The inner `HashSet` is not available on the outside, as it would be
/// potentially unsafe to interact with the pointers in the set.
#[derive(Default)]
pub struct ThunkSet(HashSet<*const ThunkRepr>);
pub struct ThunkSet(FxHashSet<*const ThunkRepr>);
impl ThunkSet {
/// Check whether the given thunk has already been seen. Will mark the thunk

View file

@ -14,8 +14,9 @@ mod macros;
use bstr::{BString, ByteSlice, ByteVec};
use codemap::Span;
use rustc_hash::FxHashMap;
use serde_json::json;
use std::{cmp::Ordering, collections::HashMap, ops::DerefMut, path::PathBuf, rc::Rc};
use std::{cmp::Ordering, ops::DerefMut, path::PathBuf, rc::Rc};
use crate::{
arithmetic_op,
@ -211,7 +212,7 @@ impl Frame {
}
#[derive(Default)]
struct ImportCache(HashMap<PathBuf, Value>);
struct ImportCache(FxHashMap<PathBuf, Value>);
/// The `ImportCache` holds the `Value` resulting from `import`ing a certain
/// file, so that the same file doesn't need to be re-evaluated multiple times.

View file

@ -1223,7 +1223,7 @@ dependencies = [
"countme",
"hashbrown 0.14.5",
"memoffset",
"rustc-hash",
"rustc-hash 1.1.0",
"text-size",
]
@ -1239,6 +1239,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc-hash"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
[[package]]
name = "rustversion"
version = "1.0.17"
@ -1561,6 +1567,7 @@ dependencies = [
"regex",
"rnix",
"rowan",
"rustc-hash 2.0.0",
"serde",
"serde_json",
"sha1",

View file

@ -3668,7 +3668,7 @@ rec {
}
{
name = "rustc-hash";
packageId = "rustc-hash";
packageId = "rustc-hash 1.1.0";
}
{
name = "text-size";
@ -3695,7 +3695,7 @@ rec {
"rustc-dep-of-std" = [ "core" "compiler_builtins" ];
};
};
"rustc-hash" = rec {
"rustc-hash 1.1.0" = rec {
crateName = "rustc-hash";
version = "1.1.0";
edition = "2015";
@ -3708,6 +3708,20 @@ rec {
};
resolvedDefaultFeatures = [ "default" "std" ];
};
"rustc-hash 2.0.0" = rec {
crateName = "rustc-hash";
version = "2.0.0";
edition = "2021";
sha256 = "0lni0lf846bzrf3jvci6jaf4142n1mdqxvcpczk5ch9pfgyk8c2q";
authors = [
"The Rust Project Developers"
];
features = {
"default" = [ "std" ];
"rand" = [ "dep:rand" "std" ];
};
resolvedDefaultFeatures = [ "default" "std" ];
};
"rustversion" = rec {
crateName = "rustversion";
version = "1.0.17";
@ -4642,6 +4656,10 @@ rec {
name = "rowan";
packageId = "rowan";
}
{
name = "rustc-hash";
packageId = "rustc-hash 2.0.0";
}
{
name = "serde";
packageId = "serde";