From 8018313b6880d9fae71ba189a476502b68a26d25 Mon Sep 17 00:00:00 2001
From: Florian Klink <flokli@flokli.de>
Date: Sat, 9 Dec 2023 18:45:39 +0200
Subject: [PATCH] feat(tvix/glue): add nixpkgs eval benchmark

This adds a criterion.rs-based testbench into tvix-glue.

It can be invoked by running `cargo bench` from inside the `tvix-glue`
crate.
`target/criterion/report/index.html` contains nice graphs.
It's able to diff against the previous run, so you can invoke `cargo
bench` before and after a certain change to reason about the impact in
evaluation performance.

Currently, we need to create a bunch of Evaluator resources inside the
benchmark loop itself, which is a bit annoying, as it leaks into the
things we benchmark.

This should become better with b/262.

Fixes b/322.

Change-Id: I91656a308887baa1d459ed54d58baae919a4aaf2
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10245
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
---
 tvix/Cargo.lock           |  1 +
 tvix/Cargo.nix            |  5 +++
 tvix/glue/Cargo.toml      |  5 +++
 tvix/glue/benches/eval.rs | 68 +++++++++++++++++++++++++++++++++++++++
 tvix/shell.nix            |  9 ++++++
 5 files changed, 88 insertions(+)
 create mode 100644 tvix/glue/benches/eval.rs
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index ce6f10bb7..1df6d199a 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -3161,6 +3161,7 @@ name = "tvix-glue"
 version = "0.1.0"
 dependencies = [
  "bytes",
+ "criterion",
  "lazy_static",
  "nix-compat",
  "test-case",
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index 5e4b54d9d..91709258e 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -9840,6 +9840,11 @@ rec {
           }
         ];
         devDependencies = [
+          {
+            name = "criterion";
+            packageId = "criterion";
+            features = [ "html_reports" ];
+          }
           {
             name = "lazy_static";
             packageId = "lazy_static";
diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml
index 4469c3bab..90ad27526 100644
--- a/tvix/glue/Cargo.toml
+++ b/tvix/glue/Cargo.toml
@@ -18,5 +18,10 @@ thiserror = "1.0.38"
 git = "https://github.com/tvlfyi/wu-manber.git"
 
 [dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
 lazy_static = "1.4.0"
 test-case = "2.2.2"
+
+[[bench]]
+name = "eval"
+harness = false
diff --git a/tvix/glue/benches/eval.rs b/tvix/glue/benches/eval.rs
new file mode 100644
index 000000000..4aa9b3e5c
--- /dev/null
+++ b/tvix/glue/benches/eval.rs
@@ -0,0 +1,68 @@
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use lazy_static::lazy_static;
+use std::{cell::RefCell, env, rc::Rc, sync::Arc, time::Duration};
+use tvix_castore::{
+    blobservice::{BlobService, MemoryBlobService},
+    directoryservice::{DirectoryService, MemoryDirectoryService},
+};
+use tvix_glue::{
+    builtins::add_derivation_builtins, configure_nix_path, known_paths::KnownPaths,
+    tvix_store_io::TvixStoreIO,
+};
+use tvix_store::pathinfoservice::{MemoryPathInfoService, PathInfoService};
+
+lazy_static! {
+    static ref BLOB_SERVICE: Arc<dyn BlobService> = Arc::new(MemoryBlobService::default());
+    static ref DIRECTORY_SERVICE: Arc<dyn DirectoryService> =
+        Arc::new(MemoryDirectoryService::default());
+    static ref PATH_INFO_SERVICE: Arc<dyn PathInfoService> = Arc::new(MemoryPathInfoService::new(
+        BLOB_SERVICE.clone(),
+        DIRECTORY_SERVICE.clone(),
+    ));
+    static ref TOKIO_RUNTIME: tokio::runtime::Runtime = tokio::runtime::Runtime::new().unwrap();
+}
+
+fn interpret(code: &str) {
+    // TODO: this is a bit annoying.
+    // It'd be nice if we could set this up once and then run evaluate() with a
+    // piece of code. b/262
+    let mut eval = tvix_eval::Evaluation::new_impure(code, None);
+
+    let known_paths: Rc<RefCell<KnownPaths>> = Default::default();
+    add_derivation_builtins(&mut eval, known_paths.clone());
+    configure_nix_path(
+        &mut eval,
+        // The benchmark requires TVIX_BENCH_NIX_PATH to be set, so barf out
+        // early, rather than benchmarking tvix returning an error.
+        &Some(env::var("TVIX_BENCH_NIX_PATH").expect("TVIX_BENCH_NIX_PATH must be set")),
+    );
+
+    eval.io_handle = Box::new(tvix_glue::tvix_io::TvixIO::new(
+        known_paths.clone(),
+        TvixStoreIO::new(
+            BLOB_SERVICE.clone(),
+            DIRECTORY_SERVICE.clone(),
+            PATH_INFO_SERVICE.clone(),
+            TOKIO_RUNTIME.handle().clone(),
+        ),
+    ));
+
+    let result = eval.evaluate();
+
+    assert!(result.errors.is_empty());
+}
+
+fn eval_nixpkgs(c: &mut Criterion) {
+    c.bench_function("hello outpath", |b| {
+        b.iter(|| {
+            interpret(black_box("(import <nixpkgs> {}).hello.outPath"));
+        })
+    });
+}
+
+criterion_group!(
+    name = benches;
+    config = Criterion::default().measurement_time(Duration::from_secs(30)).sample_size(10);
+    targets = eval_nixpkgs
+);
+criterion_main!(benches);
diff --git a/tvix/shell.nix b/tvix/shell.nix
index 4859b2b1a..9a14554ac 100644
--- a/tvix/shell.nix
+++ b/tvix/shell.nix
@@ -35,4 +35,13 @@ pkgs.mkShell {
     pkgs.libiconv
     pkgs.buildPackages.darwin.apple_sdk.frameworks.Security
   ];
+
+  # Set TVIX_BENCH_NIX_PATH to a somewhat pinned nixpkgs path.
+  # This is for invoking `cargo bench` imperatively on the developer machine.
+  # For tvix benchmarking across longer periods of time (by CI), we probably
+  # should also benchmark with a more static nixpkgs checkout, so nixpkgs
+  # refactorings are not observed as eval perf changes.
+  shellHook = ''
+    export TVIX_BENCH_NIX_PATH=nixpkgs=${pkgs.path}
+  '';
 }