diff --git a/tvix/cli/default.nix b/tvix/cli/default.nix index a4bb19701..0b73fa1aa 100644 --- a/tvix/cli/default.nix +++ b/tvix/cli/default.nix @@ -28,9 +28,8 @@ in eval-nixpkgs-stdenv-drvpath = (mkNixpkgsEvalCheck "stdenv.drvPath" pkgs.stdenv.drvPath); eval-nixpkgs-stdenv-outpath = (mkNixpkgsEvalCheck "stdenv.outPath" pkgs.stdenv.outPath); eval-nixpkgs-hello-outpath = (mkNixpkgsEvalCheck "hello.outPath" pkgs.hello.outPath); - - # This is the furthest we get starting with stdenv we hit something similar to b/261 - eval-nixpkgs-cross-gcc-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.buildPackages.gcc.outPath" pkgs.pkgsCross.aarch64-multiplatform.buildPackages.gcc.outPath); + eval-nixpkgs-cross-stdenv-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.stdenv.outPath" pkgs.pkgsCross.aarch64-multiplatform.stdenv.outPath); + eval-nixpkgs-cross-hello-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.hello.outPath" pkgs.pkgsCross.aarch64-multiplatform.hello.outPath); }; }; }) diff --git a/tvix/eval/docs/known-optimisation-potential.md b/tvix/eval/docs/known-optimisation-potential.md index f45f1ee6c..e2e0497ae 100644 --- a/tvix/eval/docs/known-optimisation-potential.md +++ b/tvix/eval/docs/known-optimisation-potential.md @@ -128,3 +128,13 @@ optimisations, but note the most important ones here. very least [immutable data structures](https://docs.rs/im/latest/im/) that can be copied more efficiently than the stock structures we are using at the moment. + +* Skip finalising unfinalised thunks or non-thunks instead of crashing [easy] + + Currently `OpFinalise` crashes the VM if it is called on values that don't + need to be finalised. This helps catching miscompilations where `OpFinalise` + operates on the wrong `StackIdx`. In the case of function argument patterns, + however, this means extra VM stack and instruction overhead for dynamically + determining if finalisation is necessary or not. This wouldn't be necessary + if `OpFinalise` would just noop on any values that don't need to be finalised + (anymore). diff --git a/tvix/eval/src/builtins/to_xml.rs b/tvix/eval/src/builtins/to_xml.rs index 375785b7a..9fb6bb5f5 100644 --- a/tvix/eval/src/builtins/to_xml.rs +++ b/tvix/eval/src/builtins/to_xml.rs @@ -127,7 +127,8 @@ fn value_variant_to_xml(w: &mut EventWriter, value: &Value) -> Resu | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(_) => { + | Value::Json(_) + | Value::FinaliseRequest(_) => { return Err(ErrorKind::TvixBug { msg: "internal value variant encountered in builtins.toXML", metadata: Some(Rc::new(value.clone())), diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 436e504af..b7b919257 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -80,6 +80,47 @@ impl LambdaCtx { } } +/// When compiling functions with an argument attribute set destructuring pattern, +/// we need to do multiple passes over the declared formal arguments when setting +/// up their local bindings (similarly to `let … in` expressions and recursive +/// attribute sets. For this purpose, this struct is used to represent the two +/// kinds of formal arguments: +/// +/// - `TrackedFormal::NoDefault` is always required and causes an evaluation error +/// if the corresponding attribute is missing in a function call. +/// - `TrackedFormal::WithDefault` may be missing in the passed attribute set— +/// in which case a `default_expr` will be evaluated and placed in the formal +/// argument's local variable slot. +enum TrackedFormal { + NoDefault { + local_idx: LocalIdx, + pattern_entry: ast::PatEntry, + }, + WithDefault { + local_idx: LocalIdx, + /// Extra phantom local used for coordinating runtime dispatching not observable to + /// the language user. Detailed description in `compile_param_pattern()`. + finalise_request_idx: LocalIdx, + default_expr: ast::Expr, + pattern_entry: ast::PatEntry, + }, +} + +impl TrackedFormal { + fn pattern_entry(&self) -> &ast::PatEntry { + match self { + TrackedFormal::NoDefault { pattern_entry, .. } => pattern_entry, + TrackedFormal::WithDefault { pattern_entry, .. } => pattern_entry, + } + } + fn local_idx(&self) -> LocalIdx { + match self { + TrackedFormal::NoDefault { local_idx, .. } => *local_idx, + TrackedFormal::WithDefault { local_idx, .. } => *local_idx, + } + } +} + /// The map of globally available functions and other values that /// should implicitly be resolvable in the global scope. pub(crate) type GlobalsMap = HashMap<&'static str, Value>; @@ -894,10 +935,19 @@ impl Compiler<'_> { /// in /// ``` /// - /// The only tricky bit being that bindings have to fail if too - /// many arguments are provided. This is done by emitting a - /// special instruction that checks the set of keys from a - /// constant containing the expected keys. + /// However, there are two properties of pattern function arguments that can + /// not be compiled by desugaring in this way: + /// + /// 1. Bindings have to fail if too many arguments are provided. This is + /// done by emitting a special instruction that checks the set of keys + /// from a constant containing the expected keys. + /// 2. Formal arguments with a default expression are (as an optimization and + /// because it is simpler) not wrapped in another thunk, instead compiled + /// and accessed separately. This means that the default expression may + /// never make it into the local's stack slot if the argument is provided + /// by the caller. We need to take this into account and skip any + /// operations specific to the expression like thunk finalisation in such + /// cases. fn compile_param_pattern(&mut self, pattern: &ast::Pattern) -> Formals { let span = self.span_for(pattern); let set_idx = match pattern.pat_bind() { @@ -905,10 +955,10 @@ impl Compiler<'_> { None => self.scope_mut().declare_phantom(span, true), }; - // At call time, the attribute set is already at the top of - // the stack. + // At call time, the attribute set is already at the top of the stack. self.scope_mut().mark_initialised(set_idx); self.emit_force(pattern); + // Evaluation fails on a type error, even if the argument(s) are unused. self.push_op(OpCode::OpAssertAttrs, pattern); let ellipsis = pattern.ellipsis_token().is_some(); @@ -919,51 +969,131 @@ impl Compiler<'_> { // Similar to `let ... in ...`, we now do multiple passes over // the bindings to first declare them, then populate them, and // then finalise any necessary recursion into the scope. - let mut entries: Vec<(LocalIdx, ast::PatEntry)> = vec![]; + let mut entries: Vec = vec![]; let mut arguments = HashMap::default(); for entry in pattern.pat_entries() { let ident = entry.ident().unwrap(); let idx = self.declare_local(&ident, ident.to_string()); - let has_default = entry.default().is_some(); - entries.push((idx, entry)); - arguments.insert(ident.into(), has_default); + + arguments.insert(ident.into(), entry.default().is_some()); + + if let Some(default_expr) = entry.default() { + entries.push(TrackedFormal::WithDefault { + local_idx: idx, + // This phantom is used to track at runtime (!) whether we need to + // finalise the local's stack slot or not. The relevant instructions are + // emitted in the second pass where the mechanism is explained as well. + finalise_request_idx: { + let span = self.span_for(&default_expr); + self.scope_mut().declare_phantom(span, false) + }, + default_expr, + pattern_entry: entry, + }); + } else { + entries.push(TrackedFormal::NoDefault { + local_idx: idx, + pattern_entry: entry, + }); + } } // For each of the bindings, push the set on the stack and // attempt to select from it. let stack_idx = self.scope().stack_index(set_idx); - for (idx, entry) in (&entries).into_iter() { + for tracked_formal in (&entries).into_iter() { self.push_op(OpCode::OpGetLocal(stack_idx), pattern); - self.emit_literal_ident(&entry.ident().unwrap()); + self.emit_literal_ident(&tracked_formal.pattern_entry().ident().unwrap()); + + let idx = tracked_formal.local_idx(); // Use the same mechanism as `compile_select_or` if a // default value was provided, or simply select otherwise. - if let Some(default_expr) = entry.default() { - self.push_op(OpCode::OpAttrsTrySelect, &entry.ident().unwrap()); + match tracked_formal { + TrackedFormal::WithDefault { + default_expr, + pattern_entry, + .. + } => { + // The tricky bit about compiling a formal argument with a default value + // is that the default may be a thunk that may depend on the value of + // other formal arguments, i.e. may need to be finalised. This + // finalisation can only happen if we are actually using the default + // value—otherwise OpFinalise will crash on an already finalised (or + // non-thunk) value. + // + // Thus we use an additional local to track whether we wound up + // defaulting or not. `FinaliseRequest(false)` indicates that we should + // not finalise, as we did not default. + // + // We are being wasteful with VM stack space in case of default + // expressions that don't end up needing to be finalised. Unfortunately + // we only know better after compiling the default expression, so + // avoiding unnecessary locals would mean we'd need to modify the chunk + // after the fact. + self.push_op(OpCode::OpAttrsTrySelect, &pattern_entry.ident().unwrap()); + let jump_to_default = + self.push_op(OpCode::OpJumpIfNotFound(JumpOffset(0)), default_expr); - let jump_to_default = - self.push_op(OpCode::OpJumpIfNotFound(JumpOffset(0)), &default_expr); + self.emit_constant(Value::FinaliseRequest(false), default_expr); - let jump_over_default = self.push_op(OpCode::OpJump(JumpOffset(0)), &default_expr); + let jump_over_default = + self.push_op(OpCode::OpJump(JumpOffset(0)), default_expr); - self.patch_jump(jump_to_default); + self.patch_jump(jump_to_default); - // Does not need to thunked since compile() already does so when necessary - self.compile(*idx, default_expr); + // Does not need to thunked since compile() already does so when necessary + self.compile(idx, default_expr.clone()); - self.patch_jump(jump_over_default); - } else { - self.push_op(OpCode::OpAttrsSelect, &entry.ident().unwrap()); + self.emit_constant(Value::FinaliseRequest(true), default_expr); + + self.patch_jump(jump_over_default); + } + TrackedFormal::NoDefault { pattern_entry, .. } => { + self.push_op(OpCode::OpAttrsSelect, &pattern_entry.ident().unwrap()); + } } - self.scope_mut().mark_initialised(*idx); + self.scope_mut().mark_initialised(idx); + if let TrackedFormal::WithDefault { + finalise_request_idx, + .. + } = tracked_formal + { + self.scope_mut().mark_initialised(*finalise_request_idx); + } } - for (idx, _) in (&entries).into_iter() { - if self.scope()[*idx].needs_finaliser { - let stack_idx = self.scope().stack_index(*idx); - self.push_op(OpCode::OpFinalise(stack_idx), pattern); + for tracked_formal in (&entries).into_iter() { + if self.scope()[tracked_formal.local_idx()].needs_finaliser { + let stack_idx = self.scope().stack_index(tracked_formal.local_idx()); + match tracked_formal { + TrackedFormal::NoDefault { .. } => + panic!("Tvix bug: local for pattern formal needs finaliser, but has no default expr"), + TrackedFormal::WithDefault { finalise_request_idx, .. } => { + let finalise_request_stack_idx = self.scope().stack_index(*finalise_request_idx); + + // TODO(sterni): better spans + self.push_op( + OpCode::OpGetLocal(finalise_request_stack_idx), + pattern + ); + let jump_over_finalise = + self.push_op( + OpCode::OpJumpIfNoFinaliseRequest( + JumpOffset(0)), + pattern + ); + self.push_op( + OpCode::OpFinalise(stack_idx), + pattern, + ); + self.patch_jump(jump_over_finalise); + // Get rid of finaliser request value on the stack + self.push_op(OpCode::OpPop, pattern); + } + } } } @@ -1196,7 +1326,8 @@ impl Compiler<'_> { OpCode::OpJump(n) | OpCode::OpJumpIfFalse(n) | OpCode::OpJumpIfTrue(n) - | OpCode::OpJumpIfNotFound(n) => { + | OpCode::OpJumpIfNotFound(n) + | OpCode::OpJumpIfNoFinaliseRequest(n) => { *n = offset; } diff --git a/tvix/eval/src/opcode.rs b/tvix/eval/src/opcode.rs index e3eddc3c9..04f9ca255 100644 --- a/tvix/eval/src/opcode.rs +++ b/tvix/eval/src/opcode.rs @@ -85,6 +85,7 @@ pub enum OpCode { OpJumpIfTrue(JumpOffset), OpJumpIfFalse(JumpOffset), OpJumpIfNotFound(JumpOffset), + OpJumpIfNoFinaliseRequest(JumpOffset), // Attribute sets /// Construct an attribute set from the given number of key-value pairs on the top of the stack diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.exp new file mode 100644 index 000000000..721a052bc --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.exp @@ -0,0 +1 @@ +[ true null ] diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.nix new file mode 100644 index 000000000..43bef2938 --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-formals-miscompilation-b-261-regression.nix @@ -0,0 +1,20 @@ +# This is a regression test for https://b.tvl.fyi/261. +# +# The bug occurred when Tvix would unconditionally finalise the stack slot of +# `finalise` (as its default expression needs a finaliser): Finalising an +# manually provided, already forced thunk would cause the VM to crash. +let + thunk = x: x; + bomb = thunk true; + f = + { finalise ? later == null + , later ? null + }: + [ finalise later ]; +in + +# Note that the crash did not occur if the offending expression was the rhs +# argument to `builtins.seq`, hence we need to put the assert in between. +assert builtins.seq bomb true; + +f { finalise = bomb; } diff --git a/tvix/eval/src/value/json.rs b/tvix/eval/src/value/json.rs index 33e16ebff..1e73278fa 100644 --- a/tvix/eval/src/value/json.rs +++ b/tvix/eval/src/value/json.rs @@ -70,7 +70,10 @@ impl Value { | val @ Value::Blueprint(_) | val @ Value::DeferredUpvalue(_) | val @ Value::UnresolvedPath(_) - | val @ Value::Json(_) => return Err(ErrorKind::NotSerialisableToJson(val.type_of())), + | val @ Value::Json(_) + | val @ Value::FinaliseRequest(_) => { + return Err(ErrorKind::NotSerialisableToJson(val.type_of())) + } }; Ok(value) diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index 7e701d52b..29b73f26f 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -78,6 +78,9 @@ pub enum Value { UnresolvedPath(Box), #[serde(skip)] Json(serde_json::Value), + + #[serde(skip)] + FinaliseRequest(bool), } lazy_static! { @@ -235,7 +238,8 @@ impl Value { | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(_) => panic!( + | Value::Json(_) + | Value::FinaliseRequest(_) => panic!( "Tvix bug: internal value left on stack: {}", value.type_of() ), @@ -328,7 +332,8 @@ impl Value { | (Value::Blueprint(_), _) | (Value::DeferredUpvalue(_), _) | (Value::UnresolvedPath(_), _) - | (Value::Json(_), _) => { + | (Value::Json(_), _) + | (Value::FinaliseRequest(_), _) => { panic!("tvix bug: .coerce_to_string() called on internal value") } } @@ -520,6 +525,7 @@ impl Value { Value::DeferredUpvalue(_) => "internal[deferred_upvalue]", Value::UnresolvedPath(_) => "internal[unresolved_path]", Value::Json(_) => "internal[json]", + Value::FinaliseRequest(_) => "internal[finaliser_sentinel]", } } @@ -658,7 +664,8 @@ impl Value { | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(_) => "an internal Tvix evaluator value".into(), + | Value::Json(_) + | Value::FinaliseRequest(_) => "an internal Tvix evaluator value".into(), } } } @@ -773,6 +780,7 @@ impl TotalDisplay for Value { Value::DeferredUpvalue(_) => f.write_str("internal[deferred_upvalue]"), Value::UnresolvedPath(_) => f.write_str("internal[unresolved_path]"), Value::Json(_) => f.write_str("internal[json]"), + Value::FinaliseRequest(_) => f.write_str("internal[finaliser_sentinel]"), // Delegate thunk display to the type, as it must handle // the case of already evaluated or cyclic thunks. diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 9f973eb12..d004af3c6 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -574,6 +574,18 @@ impl<'o> VM<'o> { } } + OpCode::OpJumpIfNoFinaliseRequest(JumpOffset(offset)) => { + debug_assert!(offset != 0); + match self.stack_peek(0) { + Value::FinaliseRequest(finalise) => { + if !finalise { + frame.ip += offset; + } + }, + val => panic!("Tvix bug: OpJumIfNoFinaliseRequest: expected FinaliseRequest, but got {}", val.type_of()), + } + } + OpCode::OpPop => { self.stack.pop(); } @@ -733,19 +745,11 @@ impl<'o> VM<'o> { return Ok(false); } - OpCode::OpFinalise(StackIdx(idx)) => { - match &self.stack[frame.stack_offset + idx] { - Value::Closure(_) => panic!("attempted to finalise a closure"), - Value::Thunk(thunk) => thunk.finalise(&self.stack[frame.stack_offset..]), - - // In functions with "formals" attributes, it is - // possible for `OpFinalise` to be called on a - // non-capturing value, in which case it is a no-op. - // - // TODO: detect this in some phase and skip the finalise; fail here - _ => { /* TODO: panic here again to catch bugs */ } - } - } + OpCode::OpFinalise(StackIdx(idx)) => match &self.stack[frame.stack_offset + idx] { + Value::Closure(_) => panic!("attempted to finalise a closure"), + Value::Thunk(thunk) => thunk.finalise(&self.stack[frame.stack_offset..]), + _ => panic!("attempted to finalise a non-thunk"), + }, OpCode::OpCoerceToString => { let value = self.stack_pop(); diff --git a/tvix/serde/src/de.rs b/tvix/serde/src/de.rs index 49a069a51..fddda6590 100644 --- a/tvix/serde/src/de.rs +++ b/tvix/serde/src/de.rs @@ -108,7 +108,8 @@ impl<'de> de::Deserializer<'de> for NixDeserializer { | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(_) => Err(Error::Unserializable { + | Value::Json(_) + | Value::FinaliseRequest(_) => Err(Error::Unserializable { value_type: self.value.type_of(), }), }