From 4516cd09c51b7a19707de0a5ba171c9592241a18 Mon Sep 17 00:00:00 2001 From: sterni Date: Sat, 3 Jun 2023 02:10:31 +0200 Subject: [PATCH] fix(tvix/eval): only finalise formal arguments if defaulting When dealing with a formal argument in a function argument pattern that has a default expression, there are two different things that can happen at runtime: Either we select its value from the passed attribute successfully or we need to use the default expression. Both of these may be thunks and both of these may need finalisers. However, in the former case this is taken care of elsewhere, the value will always be finalised already if necessary. In the latter case we may need to finalise the thunk resulting from the default expression. However, the thunk corresponding to the expression may never end up in the local's stack slot. Since finalisation goes by stack slot (and not constants), we need to prevent a case where we don't fall back to the default expression, but finalise anyways. Previously, we worked around this by making `OpFinalise` ignore non-thunks. Since finalisation of already evaluated thunks still crashed, the faulty compilation of function pattern arguments could still cause a crash. As a new approach, we reinstate the old behavior of `OpFinalise` to crash whenever encountering something that is either not a thunk or doesn't need finalisation. This can also help catching (similar) miscompilations in the future. To then prevent the crash, we need to track whether we have fallen back or not at runtime. This is done using an additional phantom on the stack that holds a new `FinaliseRequest` value. When it comes to finalisation we check this value and conditionally execute `OpFinalise` based on its value. Resolves b/261 and b/265 (partially). Change-Id: Ic04fb80ec671a2ba11fa645090769c335fb7f58b Reviewed-on: https://cl.tvl.fyi/c/depot/+/8705 Reviewed-by: tazjin Tested-by: BuildkiteCI Autosubmit: sterni --- tvix/cli/default.nix | 5 +- .../eval/docs/known-optimisation-potential.md | 10 + tvix/eval/src/builtins/to_xml.rs | 3 +- tvix/eval/src/compiler/mod.rs | 189 +++++++++++++++--- tvix/eval/src/opcode.rs | 1 + ...ormals-miscompilation-b-261-regression.exp | 1 + ...ormals-miscompilation-b-261-regression.nix | 20 ++ tvix/eval/src/value/json.rs | 5 +- tvix/eval/src/value/mod.rs | 14 +- tvix/eval/src/vm/mod.rs | 30 +-- tvix/serde/src/de.rs | 3 +- 11 files changed, 230 insertions(+), 51 deletions(-) create mode 100644 tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.exp create mode 100644 tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.nix diff --git a/tvix/cli/default.nix b/tvix/cli/default.nix index a4bb19701..0b73fa1aa 100644 --- a/tvix/cli/default.nix +++ b/tvix/cli/default.nix @@ -28,9 +28,8 @@ in eval-nixpkgs-stdenv-drvpath = (mkNixpkgsEvalCheck "stdenv.drvPath" pkgs.stdenv.drvPath); eval-nixpkgs-stdenv-outpath = (mkNixpkgsEvalCheck "stdenv.outPath" pkgs.stdenv.outPath); eval-nixpkgs-hello-outpath = (mkNixpkgsEvalCheck "hello.outPath" pkgs.hello.outPath); - - # This is the furthest we get starting with stdenv we hit something similar to b/261 - eval-nixpkgs-cross-gcc-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.buildPackages.gcc.outPath" pkgs.pkgsCross.aarch64-multiplatform.buildPackages.gcc.outPath); + eval-nixpkgs-cross-stdenv-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.stdenv.outPath" pkgs.pkgsCross.aarch64-multiplatform.stdenv.outPath); + eval-nixpkgs-cross-hello-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.hello.outPath" pkgs.pkgsCross.aarch64-multiplatform.hello.outPath); }; }; }) diff --git a/tvix/eval/docs/known-optimisation-potential.md b/tvix/eval/docs/known-optimisation-potential.md index f45f1ee6c..e2e0497ae 100644 --- a/tvix/eval/docs/known-optimisation-potential.md +++ b/tvix/eval/docs/known-optimisation-potential.md @@ -128,3 +128,13 @@ optimisations, but note the most important ones here. very least [immutable data structures](https://docs.rs/im/latest/im/) that can be copied more efficiently than the stock structures we are using at the moment. + +* Skip finalising unfinalised thunks or non-thunks instead of crashing [easy] + + Currently `OpFinalise` crashes the VM if it is called on values that don't + need to be finalised. This helps catching miscompilations where `OpFinalise` + operates on the wrong `StackIdx`. In the case of function argument patterns, + however, this means extra VM stack and instruction overhead for dynamically + determining if finalisation is necessary or not. This wouldn't be necessary + if `OpFinalise` would just noop on any values that don't need to be finalised + (anymore). diff --git a/tvix/eval/src/builtins/to_xml.rs b/tvix/eval/src/builtins/to_xml.rs index 375785b7a..9fb6bb5f5 100644 --- a/tvix/eval/src/builtins/to_xml.rs +++ b/tvix/eval/src/builtins/to_xml.rs @@ -127,7 +127,8 @@ fn value_variant_to_xml(w: &mut EventWriter, value: &Value) -> Resu | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(_) => { + | Value::Json(_) + | Value::FinaliseRequest(_) => { return Err(ErrorKind::TvixBug { msg: "internal value variant encountered in builtins.toXML", metadata: Some(Rc::new(value.clone())), diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 436e504af..b7b919257 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -80,6 +80,47 @@ impl LambdaCtx { } } +/// When compiling functions with an argument attribute set destructuring pattern, +/// we need to do multiple passes over the declared formal arguments when setting +/// up their local bindings (similarly to `let … in` expressions and recursive +/// attribute sets. For this purpose, this struct is used to represent the two +/// kinds of formal arguments: +/// +/// - `TrackedFormal::NoDefault` is always required and causes an evaluation error +/// if the corresponding attribute is missing in a function call. +/// - `TrackedFormal::WithDefault` may be missing in the passed attribute set— +/// in which case a `default_expr` will be evaluated and placed in the formal +/// argument's local variable slot. +enum TrackedFormal { + NoDefault { + local_idx: LocalIdx, + pattern_entry: ast::PatEntry, + }, + WithDefault { + local_idx: LocalIdx, + /// Extra phantom local used for coordinating runtime dispatching not observable to + /// the language user. Detailed description in `compile_param_pattern()`. + finalise_request_idx: LocalIdx, + default_expr: ast::Expr, + pattern_entry: ast::PatEntry, + }, +} + +impl TrackedFormal { + fn pattern_entry(&self) -> &ast::PatEntry { + match self { + TrackedFormal::NoDefault { pattern_entry, .. } => pattern_entry, + TrackedFormal::WithDefault { pattern_entry, .. } => pattern_entry, + } + } + fn local_idx(&self) -> LocalIdx { + match self { + TrackedFormal::NoDefault { local_idx, .. } => *local_idx, + TrackedFormal::WithDefault { local_idx, .. } => *local_idx, + } + } +} + /// The map of globally available functions and other values that /// should implicitly be resolvable in the global scope. pub(crate) type GlobalsMap = HashMap<&'static str, Value>; @@ -894,10 +935,19 @@ impl Compiler<'_> { /// in /// ``` /// - /// The only tricky bit being that bindings have to fail if too - /// many arguments are provided. This is done by emitting a - /// special instruction that checks the set of keys from a - /// constant containing the expected keys. + /// However, there are two properties of pattern function arguments that can + /// not be compiled by desugaring in this way: + /// + /// 1. Bindings have to fail if too many arguments are provided. This is + /// done by emitting a special instruction that checks the set of keys + /// from a constant containing the expected keys. + /// 2. Formal arguments with a default expression are (as an optimization and + /// because it is simpler) not wrapped in another thunk, instead compiled + /// and accessed separately. This means that the default expression may + /// never make it into the local's stack slot if the argument is provided + /// by the caller. We need to take this into account and skip any + /// operations specific to the expression like thunk finalisation in such + /// cases. fn compile_param_pattern(&mut self, pattern: &ast::Pattern) -> Formals { let span = self.span_for(pattern); let set_idx = match pattern.pat_bind() { @@ -905,10 +955,10 @@ impl Compiler<'_> { None => self.scope_mut().declare_phantom(span, true), }; - // At call time, the attribute set is already at the top of - // the stack. + // At call time, the attribute set is already at the top of the stack. self.scope_mut().mark_initialised(set_idx); self.emit_force(pattern); + // Evaluation fails on a type error, even if the argument(s) are unused. self.push_op(OpCode::OpAssertAttrs, pattern); let ellipsis = pattern.ellipsis_token().is_some(); @@ -919,51 +969,131 @@ impl Compiler<'_> { // Similar to `let ... in ...`, we now do multiple passes over // the bindings to first declare them, then populate them, and // then finalise any necessary recursion into the scope. - let mut entries: Vec<(LocalIdx, ast::PatEntry)> = vec![]; + let mut entries: Vec = vec![]; let mut arguments = HashMap::default(); for entry in pattern.pat_entries() { let ident = entry.ident().unwrap(); let idx = self.declare_local(&ident, ident.to_string()); - let has_default = entry.default().is_some(); - entries.push((idx, entry)); - arguments.insert(ident.into(), has_default); + + arguments.insert(ident.into(), entry.default().is_some()); + + if let Some(default_expr) = entry.default() { + entries.push(TrackedFormal::WithDefault { + local_idx: idx, + // This phantom is used to track at runtime (!) whether we need to + // finalise the local's stack slot or not. The relevant instructions are + // emitted in the second pass where the mechanism is explained as well. + finalise_request_idx: { + let span = self.span_for(&default_expr); + self.scope_mut().declare_phantom(span, false) + }, + default_expr, + pattern_entry: entry, + }); + } else { + entries.push(TrackedFormal::NoDefault { + local_idx: idx, + pattern_entry: entry, + }); + } } // For each of the bindings, push the set on the stack and // attempt to select from it. let stack_idx = self.scope().stack_index(set_idx); - for (idx, entry) in (&entries).into_iter() { + for tracked_formal in (&entries).into_iter() { self.push_op(OpCode::OpGetLocal(stack_idx), pattern); - self.emit_literal_ident(&entry.ident().unwrap()); + self.emit_literal_ident(&tracked_formal.pattern_entry().ident().unwrap()); + + let idx = tracked_formal.local_idx(); // Use the same mechanism as `compile_select_or` if a // default value was provided, or simply select otherwise. - if let Some(default_expr) = entry.default() { - self.push_op(OpCode::OpAttrsTrySelect, &entry.ident().unwrap()); + match tracked_formal { + TrackedFormal::WithDefault { + default_expr, + pattern_entry, + .. + } => { + // The tricky bit about compiling a formal argument with a default value + // is that the default may be a thunk that may depend on the value of + // other formal arguments, i.e. may need to be finalised. This + // finalisation can only happen if we are actually using the default + // value—otherwise OpFinalise will crash on an already finalised (or + // non-thunk) value. + // + // Thus we use an additional local to track whether we wound up + // defaulting or not. `FinaliseRequest(false)` indicates that we should + // not finalise, as we did not default. + // + // We are being wasteful with VM stack space in case of default + // expressions that don't end up needing to be finalised. Unfortunately + // we only know better after compiling the default expression, so + // avoiding unnecessary locals would mean we'd need to modify the chunk + // after the fact. + self.push_op(OpCode::OpAttrsTrySelect, &pattern_entry.ident().unwrap()); + let jump_to_default = + self.push_op(OpCode::OpJumpIfNotFound(JumpOffset(0)), default_expr); - let jump_to_default = - self.push_op(OpCode::OpJumpIfNotFound(JumpOffset(0)), &default_expr); + self.emit_constant(Value::FinaliseRequest(false), default_expr); - let jump_over_default = self.push_op(OpCode::OpJump(JumpOffset(0)), &default_expr); + let jump_over_default = + self.push_op(OpCode::OpJump(JumpOffset(0)), default_expr); - self.patch_jump(jump_to_default); + self.patch_jump(jump_to_default); - // Does not need to thunked since compile() already does so when necessary - self.compile(*idx, default_expr); + // Does not need to thunked since compile() already does so when necessary + self.compile(idx, default_expr.clone()); - self.patch_jump(jump_over_default); - } else { - self.push_op(OpCode::OpAttrsSelect, &entry.ident().unwrap()); + self.emit_constant(Value::FinaliseRequest(true), default_expr); + + self.patch_jump(jump_over_default); + } + TrackedFormal::NoDefault { pattern_entry, .. } => { + self.push_op(OpCode::OpAttrsSelect, &pattern_entry.ident().unwrap()); + } } - self.scope_mut().mark_initialised(*idx); + self.scope_mut().mark_initialised(idx); + if let TrackedFormal::WithDefault { + finalise_request_idx, + .. + } = tracked_formal + { + self.scope_mut().mark_initialised(*finalise_request_idx); + } } - for (idx, _) in (&entries).into_iter() { - if self.scope()[*idx].needs_finaliser { - let stack_idx = self.scope().stack_index(*idx); - self.push_op(OpCode::OpFinalise(stack_idx), pattern); + for tracked_formal in (&entries).into_iter() { + if self.scope()[tracked_formal.local_idx()].needs_finaliser { + let stack_idx = self.scope().stack_index(tracked_formal.local_idx()); + match tracked_formal { + TrackedFormal::NoDefault { .. } => + panic!("Tvix bug: local for pattern formal needs finaliser, but has no default expr"), + TrackedFormal::WithDefault { finalise_request_idx, .. } => { + let finalise_request_stack_idx = self.scope().stack_index(*finalise_request_idx); + + // TODO(sterni): better spans + self.push_op( + OpCode::OpGetLocal(finalise_request_stack_idx), + pattern + ); + let jump_over_finalise = + self.push_op( + OpCode::OpJumpIfNoFinaliseRequest( + JumpOffset(0)), + pattern + ); + self.push_op( + OpCode::OpFinalise(stack_idx), + pattern, + ); + self.patch_jump(jump_over_finalise); + // Get rid of finaliser request value on the stack + self.push_op(OpCode::OpPop, pattern); + } + } } } @@ -1196,7 +1326,8 @@ impl Compiler<'_> { OpCode::OpJump(n) | OpCode::OpJumpIfFalse(n) | OpCode::OpJumpIfTrue(n) - | OpCode::OpJumpIfNotFound(n) => { + | OpCode::OpJumpIfNotFound(n) + | OpCode::OpJumpIfNoFinaliseRequest(n) => { *n = offset; } diff --git a/tvix/eval/src/opcode.rs b/tvix/eval/src/opcode.rs index e3eddc3c9..04f9ca255 100644 --- a/tvix/eval/src/opcode.rs +++ b/tvix/eval/src/opcode.rs @@ -85,6 +85,7 @@ pub enum OpCode { OpJumpIfTrue(JumpOffset), OpJumpIfFalse(JumpOffset), OpJumpIfNotFound(JumpOffset), + OpJumpIfNoFinaliseRequest(JumpOffset), // Attribute sets /// Construct an attribute set from the given number of key-value pairs on the top of the stack diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.exp new file mode 100644 index 000000000..721a052bc --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.exp @@ -0,0 +1 @@ +[ true null ] diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.nix new file mode 100644 index 000000000..43bef2938 --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.nix @@ -0,0 +1,20 @@ +# This is a regression test for https://b.tvl.fyi/261. +# +# The bug occurred when Tvix would unconditionally finalise the stack slot of +# `finalise` (as its default expression needs a finaliser): Finalising an +# manually provided, already forced thunk would cause the VM to crash. +let + thunk = x: x; + bomb = thunk true; + f = + { finalise ? later == null + , later ? null + }: + [ finalise later ]; +in + +# Note that the crash did not occur if the offending expression was the rhs +# argument to `builtins.seq`, hence we need to put the assert in between. +assert builtins.seq bomb true; + +f { finalise = bomb; } diff --git a/tvix/eval/src/value/json.rs b/tvix/eval/src/value/json.rs index 33e16ebff..1e73278fa 100644 --- a/tvix/eval/src/value/json.rs +++ b/tvix/eval/src/value/json.rs @@ -70,7 +70,10 @@ impl Value { | val @ Value::Blueprint(_) | val @ Value::DeferredUpvalue(_) | val @ Value::UnresolvedPath(_) - | val @ Value::Json(_) => return Err(ErrorKind::NotSerialisableToJson(val.type_of())), + | val @ Value::Json(_) + | val @ Value::FinaliseRequest(_) => { + return Err(ErrorKind::NotSerialisableToJson(val.type_of())) + } }; Ok(value) diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index 7e701d52b..29b73f26f 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -78,6 +78,9 @@ pub enum Value { UnresolvedPath(Box), #[serde(skip)] Json(serde_json::Value), + + #[serde(skip)] + FinaliseRequest(bool), } lazy_static! { @@ -235,7 +238,8 @@ impl Value { | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(_) => panic!( + | Value::Json(_) + | Value::FinaliseRequest(_) => panic!( "Tvix bug: internal value left on stack: {}", value.type_of() ), @@ -328,7 +332,8 @@ impl Value { | (Value::Blueprint(_), _) | (Value::DeferredUpvalue(_), _) | (Value::UnresolvedPath(_), _) - | (Value::Json(_), _) => { + | (Value::Json(_), _) + | (Value::FinaliseRequest(_), _) => { panic!("tvix bug: .coerce_to_string() called on internal value") } } @@ -520,6 +525,7 @@ impl Value { Value::DeferredUpvalue(_) => "internal[deferred_upvalue]", Value::UnresolvedPath(_) => "internal[unresolved_path]", Value::Json(_) => "internal[json]", + Value::FinaliseRequest(_) => "internal[finaliser_sentinel]", } } @@ -658,7 +664,8 @@ impl Value { | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(_) => "an internal Tvix evaluator value".into(), + | Value::Json(_) + | Value::FinaliseRequest(_) => "an internal Tvix evaluator value".into(), } } } @@ -773,6 +780,7 @@ impl TotalDisplay for Value { Value::DeferredUpvalue(_) => f.write_str("internal[deferred_upvalue]"), Value::UnresolvedPath(_) => f.write_str("internal[unresolved_path]"), Value::Json(_) => f.write_str("internal[json]"), + Value::FinaliseRequest(_) => f.write_str("internal[finaliser_sentinel]"), // Delegate thunk display to the type, as it must handle // the case of already evaluated or cyclic thunks. diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 9f973eb12..d004af3c6 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -574,6 +574,18 @@ impl<'o> VM<'o> { } } + OpCode::OpJumpIfNoFinaliseRequest(JumpOffset(offset)) => { + debug_assert!(offset != 0); + match self.stack_peek(0) { + Value::FinaliseRequest(finalise) => { + if !finalise { + frame.ip += offset; + } + }, + val => panic!("Tvix bug: OpJumIfNoFinaliseRequest: expected FinaliseRequest, but got {}", val.type_of()), + } + } + OpCode::OpPop => { self.stack.pop(); } @@ -733,19 +745,11 @@ impl<'o> VM<'o> { return Ok(false); } - OpCode::OpFinalise(StackIdx(idx)) => { - match &self.stack[frame.stack_offset + idx] { - Value::Closure(_) => panic!("attempted to finalise a closure"), - Value::Thunk(thunk) => thunk.finalise(&self.stack[frame.stack_offset..]), - - // In functions with "formals" attributes, it is - // possible for `OpFinalise` to be called on a - // non-capturing value, in which case it is a no-op. - // - // TODO: detect this in some phase and skip the finalise; fail here - _ => { /* TODO: panic here again to catch bugs */ } - } - } + OpCode::OpFinalise(StackIdx(idx)) => match &self.stack[frame.stack_offset + idx] { + Value::Closure(_) => panic!("attempted to finalise a closure"), + Value::Thunk(thunk) => thunk.finalise(&self.stack[frame.stack_offset..]), + _ => panic!("attempted to finalise a non-thunk"), + }, OpCode::OpCoerceToString => { let value = self.stack_pop(); diff --git a/tvix/serde/src/de.rs b/tvix/serde/src/de.rs index 49a069a51..fddda6590 100644 --- a/tvix/serde/src/de.rs +++ b/tvix/serde/src/de.rs @@ -108,7 +108,8 @@ impl<'de> de::Deserializer<'de> for NixDeserializer { | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(_) => Err(Error::Unserializable { + | Value::Json(_) + | Value::FinaliseRequest(_) => Err(Error::Unserializable { value_type: self.value.type_of(), }), }