From f3d8c633f2b4c44a6911d751a2efd93ef926a8bf Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 4 Apr 2024 19:03:44 +0300 Subject: [PATCH] feat(tvix/nix-compat/wire): introduce BytesWriter This deals with writing byte packets of larger sizes to an underlying AsyncWrite. Its constructor receives the expected size. It also deals with writing padding if flush/shutdown is called after writing all the payload. Change-Id: I8acbf992467f3862ffb8c7d669e8c0c8eced14c1 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11355 Reviewed-by: picnoir picnoir Autosubmit: flokli Reviewed-by: Brian Olsen Tested-by: BuildkiteCI --- tvix/Cargo.lock | 1 + tvix/Cargo.nix | 10 +- tvix/nix-compat/Cargo.toml | 6 +- tvix/nix-compat/src/wire/bytes_writer.rs | 514 +++++++++++++++++++++++ tvix/nix-compat/src/wire/mod.rs | 5 + users/picnoir/tvix-daemon/Cargo.lock | 1 + users/picnoir/tvix-daemon/Cargo.nix | 25 +- 7 files changed, 555 insertions(+), 7 deletions(-) create mode 100644 tvix/nix-compat/src/wire/bytes_writer.rs diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 4e95d39b6..576387c65 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -1882,6 +1882,7 @@ dependencies = [ "lazy_static", "nom", "num-traits", + "pin-project-lite", "pretty_assertions", "serde", "serde_json", diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index e6920342d..0dc2f7c1d 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -5769,6 +5769,11 @@ rec { name = "num-traits"; packageId = "num-traits"; } + { + name = "pin-project-lite"; + packageId = "pin-project-lite"; + optional = true; + } { name = "serde"; packageId = "serde"; @@ -5839,11 +5844,12 @@ rec { } ]; features = { - "async" = [ "futures-util" "tokio" ]; + "async" = [ "futures-util" "tokio" "pin-project-lite" ]; "futures-util" = [ "dep:futures-util" ]; + "pin-project-lite" = [ "dep:pin-project-lite" ]; "tokio" = [ "dep:tokio" ]; }; - resolvedDefaultFeatures = [ "async" "futures-util" "tokio" ]; + resolvedDefaultFeatures = [ "async" "futures-util" "pin-project-lite" "tokio" ]; }; "nom" = rec { crateName = "nom"; diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml index 6e3df0485..6fc55d5c0 100644 --- a/tvix/nix-compat/Cargo.toml +++ b/tvix/nix-compat/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -async = ["futures-util", "tokio"] +async = ["futures-util", "tokio", "pin-project-lite"] [dependencies] bitflags = "2.4.1" @@ -29,6 +29,10 @@ optional = true version = "1.32.0" features = ["io-util", "macros"] +[dependencies.pin-project-lite] +optional = true +version = "0.2.13" + [dev-dependencies] futures = { version = "0.3.30", default-features = false, features = ["executor"] } lazy_static = "1.4.0" diff --git a/tvix/nix-compat/src/wire/bytes_writer.rs b/tvix/nix-compat/src/wire/bytes_writer.rs new file mode 100644 index 000000000..cf0e227e1 --- /dev/null +++ b/tvix/nix-compat/src/wire/bytes_writer.rs @@ -0,0 +1,514 @@ +use pin_project_lite::pin_project; +use std::task::{ready, Poll}; + +use tokio::io::AsyncWrite; + +/// The length of the size field, in bytes is always 8. +const LEN_SIZE: usize = 8; + +/// 8 null bytes, used to write out padding. +const EMPTY_BYTES: &[u8; 8] = &[0u8; 8]; + +pin_project! { + /// Writes a "bytes wire packet" to the underlying writer. + /// The format is the same as in [crate::wire::bytes::write_bytes], + /// however this structure provides a [AsyncWrite] interface, + /// allowing to not having to pass around the entire payload in memory. + /// + /// It internally takes care of writing (non-payload) framing (size and + /// padding). + /// + /// During construction, the expected payload size needs to be provided. + /// + /// After writing the payload to it, the user MUST call flush (or shutdown), + /// which will validate the written payload size to match, and write the + /// necessary padding. + /// + /// In case flush is not called at the end, invalid data might be sent + /// silently. + /// + /// The underlying writer returning `Ok(0)` is considered an EOF situation, + /// which is stronger than the "typically means the underlying object is no + /// longer able to accept bytes" interpretation from the docs. If such a + /// situation occurs, an error is returned. + /// + /// The struct holds three fields, the underlying writer, the (expected) + /// payload length, and an enum, tracking the state. + pub struct BytesWriter + where + W: AsyncWrite, + { + #[pin] + inner: W, + payload_len: u64, + state: BytesWriterState, + } +} + +/// Models the state [BytesWriter] currently is in. +/// It can be in three stages, writing size, payload or padding fields. +/// The number tracks the number of bytes written in the current state. +/// There shall be no ambiguous states, at the end of a stage we immediately +/// move to the beginning of the next one: +/// - Size(LEN_SIZE) must be expressed as Payload(0) +/// - Payload(self.payload_len) must be expressed as Padding(0) +/// +/// Padding(padding_len) means everything that needed to be written was written. +#[derive(Clone, Debug, PartialEq, Eq)] +enum BytesWriterState { + Size(usize), + Payload(u64), + Padding(usize), +} + +impl BytesWriter +where + W: AsyncWrite, +{ + /// Constructs a new BytesWriter, using the underlying passed writer. + pub fn new(w: W, payload_len: u64) -> Self { + Self { + inner: w, + payload_len, + state: BytesWriterState::Size(0), + } + } +} + +/// Returns an error if the passed usize is 0. +fn ensure_nonzero_bytes_written(bytes_written: usize) -> Result { + if bytes_written == 0 { + Err(std::io::Error::new( + std::io::ErrorKind::WriteZero, + "underlying writer accepted 0 bytes", + )) + } else { + Ok(bytes_written) + } +} + +impl AsyncWrite for BytesWriter +where + W: AsyncWrite, +{ + fn poll_write( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &[u8], + ) -> Poll> { + // Use a loop, so we can deal with (multiple) state transitions. + let mut this = self.project(); + + loop { + match *this.state { + BytesWriterState::Size(LEN_SIZE) => unreachable!(), + BytesWriterState::Size(pos) => { + let size_field = &this.payload_len.to_le_bytes(); + + let bytes_written = ensure_nonzero_bytes_written(ready!(this + .inner + .as_mut() + .poll_write(cx, &size_field[pos..]))?)?; + + let new_pos = pos + bytes_written; + if new_pos == LEN_SIZE { + *this.state = BytesWriterState::Payload(0); + } else { + *this.state = BytesWriterState::Size(new_pos); + } + } + BytesWriterState::Payload(pos) => { + // Ensure we still have space for more payload + if pos + (buf.len() as u64) > *this.payload_len { + return Poll::Ready(Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "tried to write excess bytes", + ))); + } + let bytes_written = ready!(this.inner.as_mut().poll_write(cx, buf))?; + ensure_nonzero_bytes_written(bytes_written)?; + let new_pos = pos + (bytes_written as u64); + if new_pos == *this.payload_len { + *this.state = BytesWriterState::Padding(0) + } else { + *this.state = BytesWriterState::Payload(new_pos) + } + + return Poll::Ready(Ok(bytes_written)); + } + // If we're already in padding state, there should be no more payload left to write! + BytesWriterState::Padding(_pos) => { + return Poll::Ready(Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "tried to write excess bytes", + ))) + } + } + } + } + + fn poll_flush( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + let mut this = self.project(); + + loop { + match *this.state { + BytesWriterState::Size(LEN_SIZE) => unreachable!(), + BytesWriterState::Size(pos) => { + // More bytes to write in the size field + let size_field = &this.payload_len.to_le_bytes()[..]; + let bytes_written = ensure_nonzero_bytes_written(ready!(this + .inner + .as_mut() + .poll_write(cx, &size_field[pos..]))?)?; + let new_pos = pos + bytes_written; + if new_pos == LEN_SIZE { + // Size field written, now ready to receive payload + *this.state = BytesWriterState::Payload(0); + } else { + *this.state = BytesWriterState::Size(new_pos); + } + } + BytesWriterState::Payload(_pos) => { + // If we're at position 0 and want to write 0 bytes of payload + // in total, we can transition to padding. + // Otherwise, break, as we're expecting more payload to + // be written. + if *this.payload_len == 0 { + *this.state = BytesWriterState::Padding(0); + } else { + break; + } + } + BytesWriterState::Padding(pos) => { + // Write remaining padding, if there is padding to write. + let padding_len = super::bytes::padding_len(*this.payload_len) as usize; + + if pos != padding_len { + let bytes_written = ensure_nonzero_bytes_written(ready!(this + .inner + .as_mut() + .poll_write(cx, &EMPTY_BYTES[..padding_len]))?)?; + *this.state = BytesWriterState::Padding(pos + bytes_written); + } else { + // everything written, break + break; + } + } + } + } + // Flush the underlying writer. + this.inner.as_mut().poll_flush(cx) + } + + fn poll_shutdown( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + // Call flush. + ready!(self.as_mut().poll_flush(cx))?; + + let this = self.project(); + + // After a flush, being inside the padding state, and at the end of the padding + // is the only way to prevent a dirty shutdown. + if let BytesWriterState::Padding(pos) = *this.state { + let padding_len = super::bytes::padding_len(*this.payload_len) as usize; + if padding_len == pos { + // Shutdown the underlying writer + return this.inner.poll_shutdown(cx); + } + } + + // Shutdown the underlying writer, bubbling up any errors. + ready!(this.inner.poll_shutdown(cx))?; + + // return an error about unclean shutdown + Poll::Ready(Err(std::io::Error::new( + std::io::ErrorKind::BrokenPipe, + "unclean shutdown", + ))) + } +} + +#[cfg(test)] +mod tests { + use crate::wire::bytes::write_bytes; + use hex_literal::hex; + use lazy_static::lazy_static; + use tokio::io::AsyncWriteExt; + use tokio_test::{assert_err, assert_ok, io::Builder}; + + use super::*; + + lazy_static! { + pub static ref LARGE_PAYLOAD: Vec = (0..255).collect::>().repeat(4 * 1024); + } + + /// Helper function, calling the (simpler) write_bytes with the payload. + /// We use this to create data we want to see on the wire. + async fn produce_exp_bytes(payload: &[u8]) -> Vec { + let mut exp = vec![]; + write_bytes(&mut exp, payload).await.unwrap(); + exp + } + + /// Write an empty bytes packet. + #[tokio::test] + async fn write_empty() { + let payload = &[]; + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, 0); + assert_ok!(w.write_all(&[]).await, "write all data"); + assert_ok!(w.flush().await, "flush"); + } + + /// Write an empty bytes packet, not calling write. + #[tokio::test] + async fn write_empty_only_flush() { + let payload = &[]; + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, 0); + assert_ok!(w.flush().await, "flush"); + } + + /// Write an empty bytes packet, not calling write or flush, only shutdown. + #[tokio::test] + async fn write_empty_only_shutdown() { + let payload = &[]; + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, 0); + assert_ok!(w.shutdown().await, "shutdown"); + } + + /// Write a 1 bytes packet + #[tokio::test] + async fn write_1b() { + let payload = &[0xff]; + + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + assert_ok!(w.write_all(payload).await); + assert_ok!(w.flush().await, "flush"); + } + + /// Write a 8 bytes payload (no padding) + #[tokio::test] + async fn write_8b() { + let payload = &hex!("000102030405060708"); + + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + assert_ok!(w.write_all(payload).await); + assert_ok!(w.flush().await, "flush"); + } + + /// Write a 9 bytes payload (7 bytes padding) + #[tokio::test] + async fn write_9b() { + let payload = &hex!("00010203040506070809"); + + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + assert_ok!(w.write_all(payload).await); + assert_ok!(w.flush().await, "flush"); + } + + /// Write a 9 bytes packet very granularly, with a lot of flushing in between, + /// and a shutdown at the end. + #[tokio::test] + async fn write_9b_flush() { + let payload = &hex!("00010203040506070809"); + let exp_bytes = produce_exp_bytes(payload).await; + + let mut mock = Builder::new().write(&exp_bytes).build(); + + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + assert_ok!(w.flush().await); + + assert_ok!(w.write_all(&payload[..4]).await); + assert_ok!(w.flush().await); + + // empty write, cause why not + assert_ok!(w.write_all(&[]).await); + assert_ok!(w.flush().await); + + assert_ok!(w.write_all(&payload[4..]).await); + assert_ok!(w.flush().await); + assert_ok!(w.shutdown().await); + } + + /// Write a larger bytes packet + #[tokio::test] + async fn write_1m_debug() { + let payload = LARGE_PAYLOAD.as_slice(); + let exp_bytes = produce_exp_bytes(payload).await; + + let mut mock = Builder::new().write(&exp_bytes).build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_ok!(w.write_all(payload).await); + assert_ok!(w.flush().await, "flush"); + } + + /// Not calling flush at the end, but shutdown is also ok if we wrote all + /// bytes we promised to write (as shutdown implies flush) + #[tokio::test] + async fn write_shutdown_without_flush_end() { + let payload = &[0xf0, 0xff]; + let exp_bytes = produce_exp_bytes(payload).await; + + let mut mock = Builder::new().write(&exp_bytes).build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + // call flush to write the size field + assert_ok!(w.flush().await); + + // write payload + assert_ok!(w.write_all(payload).await); + + // call shutdown + assert_ok!(w.shutdown().await); + } + + /// Writing more bytes than previously signalled should fail. + #[tokio::test] + async fn write_more_than_signalled_fail() { + let mut buf = Vec::new(); + let mut w = BytesWriter::new(&mut buf, 2); + + assert_err!(w.write_all(&hex!("000102")).await); + } + /// Writing more bytes than previously signalled, but in two parts + #[tokio::test] + async fn write_more_than_signalled_split_fail() { + let mut buf = Vec::new(); + let mut w = BytesWriter::new(&mut buf, 2); + + // write two bytes + assert_ok!(w.write_all(&hex!("0001")).await); + + // write the excess byte. + assert_err!(w.write_all(&hex!("02")).await); + } + + /// Writing more bytes than previously signalled, but flushing after the + /// signalled amount should fail. + #[tokio::test] + async fn write_more_than_signalled_flush_fail() { + let mut buf = Vec::new(); + let mut w = BytesWriter::new(&mut buf, 2); + + // write two bytes, then flush + assert_ok!(w.write_all(&hex!("0001")).await); + assert_ok!(w.flush().await); + + // write the excess byte. + assert_err!(w.write_all(&hex!("02")).await); + } + + /// Calling shutdown while not having written all bytes that were promised + /// returns an error. + /// Note there's still cases of silent corruption if the user doesn't call + /// shutdown explicitly (only drops). + #[tokio::test] + async fn premature_shutdown() { + let payload = &[0xf0, 0xff]; + let mut buf = Vec::new(); + let mut w = BytesWriter::new(&mut buf, payload.len() as u64); + + // call flush to write the size field + assert_ok!(w.flush().await); + + // write half of the payload (!) + assert_ok!(w.write_all(&payload[0..1]).await); + + // call shutdown, ensure it fails + assert_err!(w.shutdown().await); + } + + /// Write to a Writer that fails to write during the size packet (after 4 bytes). + /// Ensure this error gets propagated on the first call to write. + #[tokio::test] + async fn inner_writer_fail_during_size_firstwrite() { + let payload = &[0xf0]; + + let mut mock = Builder::new() + .write(&1u32.to_le_bytes()) + .write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿")) + .build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_err!(w.write_all(payload).await); + } + + /// Write to a Writer that fails to write during the size packet (after 4 bytes). + /// Ensure this error gets propagated during an initial flush + #[tokio::test] + async fn inner_writer_fail_during_size_initial_flush() { + let payload = &[0xf0]; + + let mut mock = Builder::new() + .write(&1u32.to_le_bytes()) + .write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿")) + .build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_err!(w.flush().await); + } + + /// Write to a writer that fails to write during the payload (after 9 bytes). + /// Ensure this error gets propagated when we're writing this byte. + #[tokio::test] + async fn inner_writer_fail_during_write() { + let payload = &hex!("f0ff"); + + let mut mock = Builder::new() + .write(&2u64.to_le_bytes()) + .write(&hex!("f0")) + .write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿")) + .build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_ok!(w.write(&hex!("f0")).await); + assert_err!(w.write(&hex!("ff")).await); + } + + /// Write to a writer that fails to write during the padding (after 10 bytes). + /// Ensure this error gets propagated during a flush. + #[tokio::test] + async fn inner_writer_fail_during_padding_flush() { + let payload = &hex!("f0"); + + let mut mock = Builder::new() + .write(&1u64.to_le_bytes()) + .write(&hex!("f0")) + .write(&hex!("00")) + .write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿")) + .build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_ok!(w.write(&hex!("f0")).await); + assert_err!(w.flush().await); + } +} diff --git a/tvix/nix-compat/src/wire/mod.rs b/tvix/nix-compat/src/wire/mod.rs index cd7b87aac..cb14e4d42 100644 --- a/tvix/nix-compat/src/wire/mod.rs +++ b/tvix/nix-compat/src/wire/mod.rs @@ -4,6 +4,11 @@ #[cfg(feature = "async")] pub mod bytes; +#[cfg(feature = "async")] +mod bytes_writer; +#[cfg(feature = "async")] +pub use bytes_writer::BytesWriter; + #[cfg(feature = "async")] pub mod primitive; diff --git a/users/picnoir/tvix-daemon/Cargo.lock b/users/picnoir/tvix-daemon/Cargo.lock index cdbb82e37..5522c6303 100644 --- a/users/picnoir/tvix-daemon/Cargo.lock +++ b/users/picnoir/tvix-daemon/Cargo.lock @@ -763,6 +763,7 @@ dependencies = [ "glob", "nom", "num-traits", + "pin-project-lite", "serde", "serde_json", "sha2", diff --git a/users/picnoir/tvix-daemon/Cargo.nix b/users/picnoir/tvix-daemon/Cargo.nix index 14fd16afb..aa3faf4a8 100644 --- a/users/picnoir/tvix-daemon/Cargo.nix +++ b/users/picnoir/tvix-daemon/Cargo.nix @@ -1,5 +1,5 @@ -# This file was @generated by crate2nix 0.12.0 with the command: -# "generate" +# This file was @generated by crate2nix 0.13.0 with the command: +# "generate" "--all-features" # See https://github.com/kolloch/crate2nix for more info. { nixpkgs ? @@ -2372,6 +2372,11 @@ rec { name = "num-traits"; packageId = "num-traits"; } + { + name = "pin-project-lite"; + packageId = "pin-project-lite"; + optional = true; + } { name = "serde"; packageId = "serde"; @@ -2403,11 +2408,12 @@ rec { } ]; features = { - "async" = [ "futures-util" "tokio" ]; + "async" = [ "futures-util" "tokio" "pin-project-lite" ]; "futures-util" = [ "dep:futures-util" ]; + "pin-project-lite" = [ "dep:pin-project-lite" ]; "tokio" = [ "dep:tokio" ]; }; - resolvedDefaultFeatures = [ "async" "futures-util" "tokio" ]; + resolvedDefaultFeatures = [ "async" "futures-util" "pin-project-lite" "tokio" ]; }; "nom" = rec { crateName = "nom"; @@ -5205,6 +5211,7 @@ rec { ( _: { buildTests = true; + release = false; } ); # If the user hasn't set any pre/post commands, we don't want to @@ -5229,6 +5236,16 @@ rec { # recreate a file hierarchy as when running tests with cargo # the source for test data + # It's necessary to locate the source in $NIX_BUILD_TOP/source/ + # instead of $NIX_BUILD_TOP/ + # because we compiled those test binaries in the former and not the latter. + # So all paths will expect source tree to be there and not in the build top directly. + # For example: $NIX_BUILD_TOP := /build in general, if you ask yourself. + # TODO(raitobezarius): I believe there could be more edge cases if `crate.sourceRoot` + # do exist but it's very hard to reason about them, so let's wait until the first bug report. + mkdir -p source/ + cd source/ + ${pkgs.buildPackages.xorg.lndir}/bin/lndir ${crate.src} # build outputs