tvl-depot/tvix/cli/default.nix

98 lines
3.5 KiB
Nix
Raw Normal View History

{ depot, pkgs, lib, ... }:
let
mkNixpkgsEvalCheck = attrset: expectedPath: {
label = ":nix: evaluate nixpkgs.${attrset} in tvix";
needsOutput = true;
command = pkgs.writeShellScript "tvix-eval-${builtins.replaceStrings [".drv"] ["-drv"] attrset}" ''
TVIX_OUTPUT=$(result/bin/tvix -E '(import ${pkgs.path} {}).${attrset}')
EXPECTED='${/* the verbatim expected Tvix output: */ "=> \"${expectedPath}\" :: string"}'
echo "Tvix output: ''${TVIX_OUTPUT}"
if [ "$TVIX_OUTPUT" != "$EXPECTED" ]; then
echo "Correct would have been ''${EXPECTED}"
exit 1
fi
echo "Output was correct."
'';
};
feat(tvix/cli): add macrobenchmark This commit adds a simple MVP benchmark, built on our nix infrastructure instead of cargo. It simply runs `tvix-eval` inside of GNU time, and prints the three essential statistics in a short JSON blob. You can run the benchmark with a simple `nix run`, like: nix run -f . tvix.cli.benchmark-hello nix run -f . tvix.cli.benchmark-firefox nix run -f . tvix.cli.benchmark-cross-firefox Currently these blobs are stored only in the CI logs, which I'm sure get garbage-collected at some point. We should be putting them in the git trailers, but that can wait for a future CL. I tried using `cargo bench` for this but found it incredibly frustrating. Maybe I'm doing it wrong. It seems to be designed for microbenchmarks only, and very hard to control. It kept building all sorts of unnecessary stuff (like the tests), and unlike crate2nix it was doing all the builds on only a single machine instead of using more than one machine. Worse, for that single machine it kept picking my laptop instead of my fast servers! It seems excessively cargo-flavored for such a straightforward task. Benchmark: {"hello.outPath":{"kbytes":"244736","system":"0.36","user":"2.76"}} Benchmark: {"firefox.outPath":{"kbytes":"1506736","system":"2.38","user":"32.01"}} Benchmark: {"pkgsCross.aarch64-multiplatform.firefox.outPath":{"kbytes":"11334548","system":"10.70","user":"107.07"}} Change-Id: I85bc046ec551360284d7ecfc81a03914f0085909 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10216 Autosubmit: Adam Joseph <adam@westernsemico.com> Reviewed-by: grfn <grfn@gws.fyi> Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI
2023-12-09 05:15:31 +01:00
benchmark-gnutime-format-string =
description:
"Benchmark: " +
(builtins.toJSON {
"${description}" = {
kbytes = "%M";
system = "%S";
user = "%U";
};
});
in
(depot.tvix.crates.workspaceMembers.tvix-cli.build.override {
runTests = true;
feat(tvix/cli): add macrobenchmark This commit adds a simple MVP benchmark, built on our nix infrastructure instead of cargo. It simply runs `tvix-eval` inside of GNU time, and prints the three essential statistics in a short JSON blob. You can run the benchmark with a simple `nix run`, like: nix run -f . tvix.cli.benchmark-hello nix run -f . tvix.cli.benchmark-firefox nix run -f . tvix.cli.benchmark-cross-firefox Currently these blobs are stored only in the CI logs, which I'm sure get garbage-collected at some point. We should be putting them in the git trailers, but that can wait for a future CL. I tried using `cargo bench` for this but found it incredibly frustrating. Maybe I'm doing it wrong. It seems to be designed for microbenchmarks only, and very hard to control. It kept building all sorts of unnecessary stuff (like the tests), and unlike crate2nix it was doing all the builds on only a single machine instead of using more than one machine. Worse, for that single machine it kept picking my laptop instead of my fast servers! It seems excessively cargo-flavored for such a straightforward task. Benchmark: {"hello.outPath":{"kbytes":"244736","system":"0.36","user":"2.76"}} Benchmark: {"firefox.outPath":{"kbytes":"1506736","system":"2.38","user":"32.01"}} Benchmark: {"pkgsCross.aarch64-multiplatform.firefox.outPath":{"kbytes":"11334548","system":"10.70","user":"107.07"}} Change-Id: I85bc046ec551360284d7ecfc81a03914f0085909 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10216 Autosubmit: Adam Joseph <adam@westernsemico.com> Reviewed-by: grfn <grfn@gws.fyi> Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI
2023-12-09 05:15:31 +01:00
}).overrideAttrs (finalAttrs: previousAttrs:
let
tvix-cli = finalAttrs.finalPackage;
# You can run the benchmark with a simple `nix run`, like:
#
# nix run -f . tvix.cli.meta.ci.extraSteps.benchmark-nixpkgs-cross-hello-outpath
#
# TODO(amjoseph): store these results someplace more durable, like git trailers
#
mkExprBenchmark = { expr, description }:
let name = "tvix-cli-benchmark-${description}"; in
(pkgs.writeShellScriptBin name ''
${lib.escapeShellArgs [
"${pkgs.time}/bin/time"
"--format" "${benchmark-gnutime-format-string description}"
"${tvix-cli}/bin/tvix"
"--no-warnings"
"-E" expr
]}
'').overrideAttrs (finalAttrs: previousAttrs: {
passthru = (previousAttrs.passthru or { }) // {
ci = {
label = ":nix: benchmark ${description} in tvix";
needsOutput = true;
command = "${finalAttrs.finalPackage}/bin/${finalAttrs.meta.mainProgram}";
};
};
});
mkNixpkgsBenchmark = attrpath:
mkExprBenchmark {
description = builtins.replaceStrings [ ".drv" ] [ "-drv" ] attrpath;
expr = "(import ${pkgs.path} {}).${attrpath}";
};
benchmarks = {
benchmark-hello = (mkNixpkgsBenchmark "hello.outPath");
benchmark-cross-hello = (mkNixpkgsBenchmark "pkgsCross.aarch64-multiplatform.hello.outPath");
benchmark-firefox = (mkNixpkgsBenchmark "firefox.outPath");
benchmark-cross-firefox = (mkNixpkgsBenchmark "pkgsCross.aarch64-multiplatform.firefox.outPath");
# Example used for benchmarking LightSpan::Delayed in commit bf286a54bc2ac5eeb78c3d5c5ae66e9af24d74d4
benchmark-nixpkgs-attrnames = (mkExprBenchmark { expr = "builtins.length (builtins.attrNames (import ${pkgs.path} {}))"; description = "nixpkgs-attrnames"; });
feat(tvix/cli): add macrobenchmark This commit adds a simple MVP benchmark, built on our nix infrastructure instead of cargo. It simply runs `tvix-eval` inside of GNU time, and prints the three essential statistics in a short JSON blob. You can run the benchmark with a simple `nix run`, like: nix run -f . tvix.cli.benchmark-hello nix run -f . tvix.cli.benchmark-firefox nix run -f . tvix.cli.benchmark-cross-firefox Currently these blobs are stored only in the CI logs, which I'm sure get garbage-collected at some point. We should be putting them in the git trailers, but that can wait for a future CL. I tried using `cargo bench` for this but found it incredibly frustrating. Maybe I'm doing it wrong. It seems to be designed for microbenchmarks only, and very hard to control. It kept building all sorts of unnecessary stuff (like the tests), and unlike crate2nix it was doing all the builds on only a single machine instead of using more than one machine. Worse, for that single machine it kept picking my laptop instead of my fast servers! It seems excessively cargo-flavored for such a straightforward task. Benchmark: {"hello.outPath":{"kbytes":"244736","system":"0.36","user":"2.76"}} Benchmark: {"firefox.outPath":{"kbytes":"1506736","system":"2.38","user":"32.01"}} Benchmark: {"pkgsCross.aarch64-multiplatform.firefox.outPath":{"kbytes":"11334548","system":"10.70","user":"107.07"}} Change-Id: I85bc046ec551360284d7ecfc81a03914f0085909 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10216 Autosubmit: Adam Joseph <adam@westernsemico.com> Reviewed-by: grfn <grfn@gws.fyi> Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI
2023-12-09 05:15:31 +01:00
};
in
{
meta = {
ci.extraSteps = {
eval-nixpkgs-stdenv-drvpath = (mkNixpkgsEvalCheck "stdenv.drvPath" pkgs.stdenv.drvPath);
eval-nixpkgs-stdenv-outpath = (mkNixpkgsEvalCheck "stdenv.outPath" pkgs.stdenv.outPath);
eval-nixpkgs-hello-outpath = (mkNixpkgsEvalCheck "hello.outPath" pkgs.hello.outPath);
fix(tvix/eval): only finalise formal arguments if defaulting When dealing with a formal argument in a function argument pattern that has a default expression, there are two different things that can happen at runtime: Either we select its value from the passed attribute successfully or we need to use the default expression. Both of these may be thunks and both of these may need finalisers. However, in the former case this is taken care of elsewhere, the value will always be finalised already if necessary. In the latter case we may need to finalise the thunk resulting from the default expression. However, the thunk corresponding to the expression may never end up in the local's stack slot. Since finalisation goes by stack slot (and not constants), we need to prevent a case where we don't fall back to the default expression, but finalise anyways. Previously, we worked around this by making `OpFinalise` ignore non-thunks. Since finalisation of already evaluated thunks still crashed, the faulty compilation of function pattern arguments could still cause a crash. As a new approach, we reinstate the old behavior of `OpFinalise` to crash whenever encountering something that is either not a thunk or doesn't need finalisation. This can also help catching (similar) miscompilations in the future. To then prevent the crash, we need to track whether we have fallen back or not at runtime. This is done using an additional phantom on the stack that holds a new `FinaliseRequest` value. When it comes to finalisation we check this value and conditionally execute `OpFinalise` based on its value. Resolves b/261 and b/265 (partially). Change-Id: Ic04fb80ec671a2ba11fa645090769c335fb7f58b Reviewed-on: https://cl.tvl.fyi/c/depot/+/8705 Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI Autosubmit: sterni <sternenseemann@systemli.org>
2023-06-03 02:10:31 +02:00
eval-nixpkgs-cross-stdenv-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.stdenv.outPath" pkgs.pkgsCross.aarch64-multiplatform.stdenv.outPath);
eval-nixpkgs-cross-hello-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.hello.outPath" pkgs.pkgsCross.aarch64-multiplatform.hello.outPath);
};
feat(tvix/cli): add macrobenchmark This commit adds a simple MVP benchmark, built on our nix infrastructure instead of cargo. It simply runs `tvix-eval` inside of GNU time, and prints the three essential statistics in a short JSON blob. You can run the benchmark with a simple `nix run`, like: nix run -f . tvix.cli.benchmark-hello nix run -f . tvix.cli.benchmark-firefox nix run -f . tvix.cli.benchmark-cross-firefox Currently these blobs are stored only in the CI logs, which I'm sure get garbage-collected at some point. We should be putting them in the git trailers, but that can wait for a future CL. I tried using `cargo bench` for this but found it incredibly frustrating. Maybe I'm doing it wrong. It seems to be designed for microbenchmarks only, and very hard to control. It kept building all sorts of unnecessary stuff (like the tests), and unlike crate2nix it was doing all the builds on only a single machine instead of using more than one machine. Worse, for that single machine it kept picking my laptop instead of my fast servers! It seems excessively cargo-flavored for such a straightforward task. Benchmark: {"hello.outPath":{"kbytes":"244736","system":"0.36","user":"2.76"}} Benchmark: {"firefox.outPath":{"kbytes":"1506736","system":"2.38","user":"32.01"}} Benchmark: {"pkgsCross.aarch64-multiplatform.firefox.outPath":{"kbytes":"11334548","system":"10.70","user":"107.07"}} Change-Id: I85bc046ec551360284d7ecfc81a03914f0085909 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10216 Autosubmit: Adam Joseph <adam@westernsemico.com> Reviewed-by: grfn <grfn@gws.fyi> Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI
2023-12-09 05:15:31 +01:00
ci.targets = builtins.attrNames benchmarks;
};
feat(tvix/cli): add macrobenchmark This commit adds a simple MVP benchmark, built on our nix infrastructure instead of cargo. It simply runs `tvix-eval` inside of GNU time, and prints the three essential statistics in a short JSON blob. You can run the benchmark with a simple `nix run`, like: nix run -f . tvix.cli.benchmark-hello nix run -f . tvix.cli.benchmark-firefox nix run -f . tvix.cli.benchmark-cross-firefox Currently these blobs are stored only in the CI logs, which I'm sure get garbage-collected at some point. We should be putting them in the git trailers, but that can wait for a future CL. I tried using `cargo bench` for this but found it incredibly frustrating. Maybe I'm doing it wrong. It seems to be designed for microbenchmarks only, and very hard to control. It kept building all sorts of unnecessary stuff (like the tests), and unlike crate2nix it was doing all the builds on only a single machine instead of using more than one machine. Worse, for that single machine it kept picking my laptop instead of my fast servers! It seems excessively cargo-flavored for such a straightforward task. Benchmark: {"hello.outPath":{"kbytes":"244736","system":"0.36","user":"2.76"}} Benchmark: {"firefox.outPath":{"kbytes":"1506736","system":"2.38","user":"32.01"}} Benchmark: {"pkgsCross.aarch64-multiplatform.firefox.outPath":{"kbytes":"11334548","system":"10.70","user":"107.07"}} Change-Id: I85bc046ec551360284d7ecfc81a03914f0085909 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10216 Autosubmit: Adam Joseph <adam@westernsemico.com> Reviewed-by: grfn <grfn@gws.fyi> Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI
2023-12-09 05:15:31 +01:00
# Expose benchmarks as standard CI targets.
passthru = benchmarks;
})