feat(tvix/nix-compat/wire): introduce BytesWriter

This deals with writing byte packets of larger sizes to an underlying
AsyncWrite.

Its constructor receives the expected size. It also deals with writing
padding if flush/shutdown is called after writing all the payload.

Change-Id: I8acbf992467f3862ffb8c7d669e8c0c8eced14c1
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11355
Reviewed-by: picnoir picnoir <picnoir@alternativebit.fr>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: Brian Olsen <me@griff.name>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2024-04-04 19:03:44 +03:00 committed by clbot
parent 39276dc5b4
commit f3d8c633f2
7 changed files with 555 additions and 7 deletions

1
tvix/Cargo.lock generated
View file

@ -1882,6 +1882,7 @@ dependencies = [
"lazy_static", "lazy_static",
"nom", "nom",
"num-traits", "num-traits",
"pin-project-lite",
"pretty_assertions", "pretty_assertions",
"serde", "serde",
"serde_json", "serde_json",

View file

@ -5769,6 +5769,11 @@ rec {
name = "num-traits"; name = "num-traits";
packageId = "num-traits"; packageId = "num-traits";
} }
{
name = "pin-project-lite";
packageId = "pin-project-lite";
optional = true;
}
{ {
name = "serde"; name = "serde";
packageId = "serde"; packageId = "serde";
@ -5839,11 +5844,12 @@ rec {
} }
]; ];
features = { features = {
"async" = [ "futures-util" "tokio" ]; "async" = [ "futures-util" "tokio" "pin-project-lite" ];
"futures-util" = [ "dep:futures-util" ]; "futures-util" = [ "dep:futures-util" ];
"pin-project-lite" = [ "dep:pin-project-lite" ];
"tokio" = [ "dep:tokio" ]; "tokio" = [ "dep:tokio" ];
}; };
resolvedDefaultFeatures = [ "async" "futures-util" "tokio" ]; resolvedDefaultFeatures = [ "async" "futures-util" "pin-project-lite" "tokio" ];
}; };
"nom" = rec { "nom" = rec {
crateName = "nom"; crateName = "nom";

View file

@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features] [features]
async = ["futures-util", "tokio"] async = ["futures-util", "tokio", "pin-project-lite"]
[dependencies] [dependencies]
bitflags = "2.4.1" bitflags = "2.4.1"
@ -29,6 +29,10 @@ optional = true
version = "1.32.0" version = "1.32.0"
features = ["io-util", "macros"] features = ["io-util", "macros"]
[dependencies.pin-project-lite]
optional = true
version = "0.2.13"
[dev-dependencies] [dev-dependencies]
futures = { version = "0.3.30", default-features = false, features = ["executor"] } futures = { version = "0.3.30", default-features = false, features = ["executor"] }
lazy_static = "1.4.0" lazy_static = "1.4.0"

View file

@ -0,0 +1,514 @@
use pin_project_lite::pin_project;
use std::task::{ready, Poll};
use tokio::io::AsyncWrite;
/// The length of the size field, in bytes is always 8.
const LEN_SIZE: usize = 8;
/// 8 null bytes, used to write out padding.
const EMPTY_BYTES: &[u8; 8] = &[0u8; 8];
pin_project! {
/// Writes a "bytes wire packet" to the underlying writer.
/// The format is the same as in [crate::wire::bytes::write_bytes],
/// however this structure provides a [AsyncWrite] interface,
/// allowing to not having to pass around the entire payload in memory.
///
/// It internally takes care of writing (non-payload) framing (size and
/// padding).
///
/// During construction, the expected payload size needs to be provided.
///
/// After writing the payload to it, the user MUST call flush (or shutdown),
/// which will validate the written payload size to match, and write the
/// necessary padding.
///
/// In case flush is not called at the end, invalid data might be sent
/// silently.
///
/// The underlying writer returning `Ok(0)` is considered an EOF situation,
/// which is stronger than the "typically means the underlying object is no
/// longer able to accept bytes" interpretation from the docs. If such a
/// situation occurs, an error is returned.
///
/// The struct holds three fields, the underlying writer, the (expected)
/// payload length, and an enum, tracking the state.
pub struct BytesWriter<W>
where
W: AsyncWrite,
{
#[pin]
inner: W,
payload_len: u64,
state: BytesWriterState,
}
}
/// Models the state [BytesWriter] currently is in.
/// It can be in three stages, writing size, payload or padding fields.
/// The number tracks the number of bytes written in the current state.
/// There shall be no ambiguous states, at the end of a stage we immediately
/// move to the beginning of the next one:
/// - Size(LEN_SIZE) must be expressed as Payload(0)
/// - Payload(self.payload_len) must be expressed as Padding(0)
///
/// Padding(padding_len) means everything that needed to be written was written.
#[derive(Clone, Debug, PartialEq, Eq)]
enum BytesWriterState {
Size(usize),
Payload(u64),
Padding(usize),
}
impl<W> BytesWriter<W>
where
W: AsyncWrite,
{
/// Constructs a new BytesWriter, using the underlying passed writer.
pub fn new(w: W, payload_len: u64) -> Self {
Self {
inner: w,
payload_len,
state: BytesWriterState::Size(0),
}
}
}
/// Returns an error if the passed usize is 0.
fn ensure_nonzero_bytes_written(bytes_written: usize) -> Result<usize, std::io::Error> {
if bytes_written == 0 {
Err(std::io::Error::new(
std::io::ErrorKind::WriteZero,
"underlying writer accepted 0 bytes",
))
} else {
Ok(bytes_written)
}
}
impl<W> AsyncWrite for BytesWriter<W>
where
W: AsyncWrite,
{
fn poll_write(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
buf: &[u8],
) -> Poll<Result<usize, std::io::Error>> {
// Use a loop, so we can deal with (multiple) state transitions.
let mut this = self.project();
loop {
match *this.state {
BytesWriterState::Size(LEN_SIZE) => unreachable!(),
BytesWriterState::Size(pos) => {
let size_field = &this.payload_len.to_le_bytes();
let bytes_written = ensure_nonzero_bytes_written(ready!(this
.inner
.as_mut()
.poll_write(cx, &size_field[pos..]))?)?;
let new_pos = pos + bytes_written;
if new_pos == LEN_SIZE {
*this.state = BytesWriterState::Payload(0);
} else {
*this.state = BytesWriterState::Size(new_pos);
}
}
BytesWriterState::Payload(pos) => {
// Ensure we still have space for more payload
if pos + (buf.len() as u64) > *this.payload_len {
return Poll::Ready(Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"tried to write excess bytes",
)));
}
let bytes_written = ready!(this.inner.as_mut().poll_write(cx, buf))?;
ensure_nonzero_bytes_written(bytes_written)?;
let new_pos = pos + (bytes_written as u64);
if new_pos == *this.payload_len {
*this.state = BytesWriterState::Padding(0)
} else {
*this.state = BytesWriterState::Payload(new_pos)
}
return Poll::Ready(Ok(bytes_written));
}
// If we're already in padding state, there should be no more payload left to write!
BytesWriterState::Padding(_pos) => {
return Poll::Ready(Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"tried to write excess bytes",
)))
}
}
}
}
fn poll_flush(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> Poll<Result<(), std::io::Error>> {
let mut this = self.project();
loop {
match *this.state {
BytesWriterState::Size(LEN_SIZE) => unreachable!(),
BytesWriterState::Size(pos) => {
// More bytes to write in the size field
let size_field = &this.payload_len.to_le_bytes()[..];
let bytes_written = ensure_nonzero_bytes_written(ready!(this
.inner
.as_mut()
.poll_write(cx, &size_field[pos..]))?)?;
let new_pos = pos + bytes_written;
if new_pos == LEN_SIZE {
// Size field written, now ready to receive payload
*this.state = BytesWriterState::Payload(0);
} else {
*this.state = BytesWriterState::Size(new_pos);
}
}
BytesWriterState::Payload(_pos) => {
// If we're at position 0 and want to write 0 bytes of payload
// in total, we can transition to padding.
// Otherwise, break, as we're expecting more payload to
// be written.
if *this.payload_len == 0 {
*this.state = BytesWriterState::Padding(0);
} else {
break;
}
}
BytesWriterState::Padding(pos) => {
// Write remaining padding, if there is padding to write.
let padding_len = super::bytes::padding_len(*this.payload_len) as usize;
if pos != padding_len {
let bytes_written = ensure_nonzero_bytes_written(ready!(this
.inner
.as_mut()
.poll_write(cx, &EMPTY_BYTES[..padding_len]))?)?;
*this.state = BytesWriterState::Padding(pos + bytes_written);
} else {
// everything written, break
break;
}
}
}
}
// Flush the underlying writer.
this.inner.as_mut().poll_flush(cx)
}
fn poll_shutdown(
mut self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> Poll<Result<(), std::io::Error>> {
// Call flush.
ready!(self.as_mut().poll_flush(cx))?;
let this = self.project();
// After a flush, being inside the padding state, and at the end of the padding
// is the only way to prevent a dirty shutdown.
if let BytesWriterState::Padding(pos) = *this.state {
let padding_len = super::bytes::padding_len(*this.payload_len) as usize;
if padding_len == pos {
// Shutdown the underlying writer
return this.inner.poll_shutdown(cx);
}
}
// Shutdown the underlying writer, bubbling up any errors.
ready!(this.inner.poll_shutdown(cx))?;
// return an error about unclean shutdown
Poll::Ready(Err(std::io::Error::new(
std::io::ErrorKind::BrokenPipe,
"unclean shutdown",
)))
}
}
#[cfg(test)]
mod tests {
use crate::wire::bytes::write_bytes;
use hex_literal::hex;
use lazy_static::lazy_static;
use tokio::io::AsyncWriteExt;
use tokio_test::{assert_err, assert_ok, io::Builder};
use super::*;
lazy_static! {
pub static ref LARGE_PAYLOAD: Vec<u8> = (0..255).collect::<Vec<u8>>().repeat(4 * 1024);
}
/// Helper function, calling the (simpler) write_bytes with the payload.
/// We use this to create data we want to see on the wire.
async fn produce_exp_bytes(payload: &[u8]) -> Vec<u8> {
let mut exp = vec![];
write_bytes(&mut exp, payload).await.unwrap();
exp
}
/// Write an empty bytes packet.
#[tokio::test]
async fn write_empty() {
let payload = &[];
let mut mock = Builder::new()
.write(&produce_exp_bytes(payload).await)
.build();
let mut w = BytesWriter::new(&mut mock, 0);
assert_ok!(w.write_all(&[]).await, "write all data");
assert_ok!(w.flush().await, "flush");
}
/// Write an empty bytes packet, not calling write.
#[tokio::test]
async fn write_empty_only_flush() {
let payload = &[];
let mut mock = Builder::new()
.write(&produce_exp_bytes(payload).await)
.build();
let mut w = BytesWriter::new(&mut mock, 0);
assert_ok!(w.flush().await, "flush");
}
/// Write an empty bytes packet, not calling write or flush, only shutdown.
#[tokio::test]
async fn write_empty_only_shutdown() {
let payload = &[];
let mut mock = Builder::new()
.write(&produce_exp_bytes(payload).await)
.build();
let mut w = BytesWriter::new(&mut mock, 0);
assert_ok!(w.shutdown().await, "shutdown");
}
/// Write a 1 bytes packet
#[tokio::test]
async fn write_1b() {
let payload = &[0xff];
let mut mock = Builder::new()
.write(&produce_exp_bytes(payload).await)
.build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
assert_ok!(w.write_all(payload).await);
assert_ok!(w.flush().await, "flush");
}
/// Write a 8 bytes payload (no padding)
#[tokio::test]
async fn write_8b() {
let payload = &hex!("000102030405060708");
let mut mock = Builder::new()
.write(&produce_exp_bytes(payload).await)
.build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
assert_ok!(w.write_all(payload).await);
assert_ok!(w.flush().await, "flush");
}
/// Write a 9 bytes payload (7 bytes padding)
#[tokio::test]
async fn write_9b() {
let payload = &hex!("00010203040506070809");
let mut mock = Builder::new()
.write(&produce_exp_bytes(payload).await)
.build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
assert_ok!(w.write_all(payload).await);
assert_ok!(w.flush().await, "flush");
}
/// Write a 9 bytes packet very granularly, with a lot of flushing in between,
/// and a shutdown at the end.
#[tokio::test]
async fn write_9b_flush() {
let payload = &hex!("00010203040506070809");
let exp_bytes = produce_exp_bytes(payload).await;
let mut mock = Builder::new().write(&exp_bytes).build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
assert_ok!(w.flush().await);
assert_ok!(w.write_all(&payload[..4]).await);
assert_ok!(w.flush().await);
// empty write, cause why not
assert_ok!(w.write_all(&[]).await);
assert_ok!(w.flush().await);
assert_ok!(w.write_all(&payload[4..]).await);
assert_ok!(w.flush().await);
assert_ok!(w.shutdown().await);
}
/// Write a larger bytes packet
#[tokio::test]
async fn write_1m_debug() {
let payload = LARGE_PAYLOAD.as_slice();
let exp_bytes = produce_exp_bytes(payload).await;
let mut mock = Builder::new().write(&exp_bytes).build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
assert_ok!(w.write_all(payload).await);
assert_ok!(w.flush().await, "flush");
}
/// Not calling flush at the end, but shutdown is also ok if we wrote all
/// bytes we promised to write (as shutdown implies flush)
#[tokio::test]
async fn write_shutdown_without_flush_end() {
let payload = &[0xf0, 0xff];
let exp_bytes = produce_exp_bytes(payload).await;
let mut mock = Builder::new().write(&exp_bytes).build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
// call flush to write the size field
assert_ok!(w.flush().await);
// write payload
assert_ok!(w.write_all(payload).await);
// call shutdown
assert_ok!(w.shutdown().await);
}
/// Writing more bytes than previously signalled should fail.
#[tokio::test]
async fn write_more_than_signalled_fail() {
let mut buf = Vec::new();
let mut w = BytesWriter::new(&mut buf, 2);
assert_err!(w.write_all(&hex!("000102")).await);
}
/// Writing more bytes than previously signalled, but in two parts
#[tokio::test]
async fn write_more_than_signalled_split_fail() {
let mut buf = Vec::new();
let mut w = BytesWriter::new(&mut buf, 2);
// write two bytes
assert_ok!(w.write_all(&hex!("0001")).await);
// write the excess byte.
assert_err!(w.write_all(&hex!("02")).await);
}
/// Writing more bytes than previously signalled, but flushing after the
/// signalled amount should fail.
#[tokio::test]
async fn write_more_than_signalled_flush_fail() {
let mut buf = Vec::new();
let mut w = BytesWriter::new(&mut buf, 2);
// write two bytes, then flush
assert_ok!(w.write_all(&hex!("0001")).await);
assert_ok!(w.flush().await);
// write the excess byte.
assert_err!(w.write_all(&hex!("02")).await);
}
/// Calling shutdown while not having written all bytes that were promised
/// returns an error.
/// Note there's still cases of silent corruption if the user doesn't call
/// shutdown explicitly (only drops).
#[tokio::test]
async fn premature_shutdown() {
let payload = &[0xf0, 0xff];
let mut buf = Vec::new();
let mut w = BytesWriter::new(&mut buf, payload.len() as u64);
// call flush to write the size field
assert_ok!(w.flush().await);
// write half of the payload (!)
assert_ok!(w.write_all(&payload[0..1]).await);
// call shutdown, ensure it fails
assert_err!(w.shutdown().await);
}
/// Write to a Writer that fails to write during the size packet (after 4 bytes).
/// Ensure this error gets propagated on the first call to write.
#[tokio::test]
async fn inner_writer_fail_during_size_firstwrite() {
let payload = &[0xf0];
let mut mock = Builder::new()
.write(&1u32.to_le_bytes())
.write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿"))
.build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
assert_err!(w.write_all(payload).await);
}
/// Write to a Writer that fails to write during the size packet (after 4 bytes).
/// Ensure this error gets propagated during an initial flush
#[tokio::test]
async fn inner_writer_fail_during_size_initial_flush() {
let payload = &[0xf0];
let mut mock = Builder::new()
.write(&1u32.to_le_bytes())
.write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿"))
.build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
assert_err!(w.flush().await);
}
/// Write to a writer that fails to write during the payload (after 9 bytes).
/// Ensure this error gets propagated when we're writing this byte.
#[tokio::test]
async fn inner_writer_fail_during_write() {
let payload = &hex!("f0ff");
let mut mock = Builder::new()
.write(&2u64.to_le_bytes())
.write(&hex!("f0"))
.write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿"))
.build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
assert_ok!(w.write(&hex!("f0")).await);
assert_err!(w.write(&hex!("ff")).await);
}
/// Write to a writer that fails to write during the padding (after 10 bytes).
/// Ensure this error gets propagated during a flush.
#[tokio::test]
async fn inner_writer_fail_during_padding_flush() {
let payload = &hex!("f0");
let mut mock = Builder::new()
.write(&1u64.to_le_bytes())
.write(&hex!("f0"))
.write(&hex!("00"))
.write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿"))
.build();
let mut w = BytesWriter::new(&mut mock, payload.len() as u64);
assert_ok!(w.write(&hex!("f0")).await);
assert_err!(w.flush().await);
}
}

View file

@ -4,6 +4,11 @@
#[cfg(feature = "async")] #[cfg(feature = "async")]
pub mod bytes; pub mod bytes;
#[cfg(feature = "async")]
mod bytes_writer;
#[cfg(feature = "async")]
pub use bytes_writer::BytesWriter;
#[cfg(feature = "async")] #[cfg(feature = "async")]
pub mod primitive; pub mod primitive;

View file

@ -763,6 +763,7 @@ dependencies = [
"glob", "glob",
"nom", "nom",
"num-traits", "num-traits",
"pin-project-lite",
"serde", "serde",
"serde_json", "serde_json",
"sha2", "sha2",

View file

@ -1,5 +1,5 @@
# This file was @generated by crate2nix 0.12.0 with the command: # This file was @generated by crate2nix 0.13.0 with the command:
# "generate" # "generate" "--all-features"
# See https://github.com/kolloch/crate2nix for more info. # See https://github.com/kolloch/crate2nix for more info.
{ nixpkgs ? <nixpkgs> { nixpkgs ? <nixpkgs>
@ -2372,6 +2372,11 @@ rec {
name = "num-traits"; name = "num-traits";
packageId = "num-traits"; packageId = "num-traits";
} }
{
name = "pin-project-lite";
packageId = "pin-project-lite";
optional = true;
}
{ {
name = "serde"; name = "serde";
packageId = "serde"; packageId = "serde";
@ -2403,11 +2408,12 @@ rec {
} }
]; ];
features = { features = {
"async" = [ "futures-util" "tokio" ]; "async" = [ "futures-util" "tokio" "pin-project-lite" ];
"futures-util" = [ "dep:futures-util" ]; "futures-util" = [ "dep:futures-util" ];
"pin-project-lite" = [ "dep:pin-project-lite" ];
"tokio" = [ "dep:tokio" ]; "tokio" = [ "dep:tokio" ];
}; };
resolvedDefaultFeatures = [ "async" "futures-util" "tokio" ]; resolvedDefaultFeatures = [ "async" "futures-util" "pin-project-lite" "tokio" ];
}; };
"nom" = rec { "nom" = rec {
crateName = "nom"; crateName = "nom";
@ -5205,6 +5211,7 @@ rec {
( (
_: { _: {
buildTests = true; buildTests = true;
release = false;
} }
); );
# If the user hasn't set any pre/post commands, we don't want to # If the user hasn't set any pre/post commands, we don't want to
@ -5229,6 +5236,16 @@ rec {
# recreate a file hierarchy as when running tests with cargo # recreate a file hierarchy as when running tests with cargo
# the source for test data # the source for test data
# It's necessary to locate the source in $NIX_BUILD_TOP/source/
# instead of $NIX_BUILD_TOP/
# because we compiled those test binaries in the former and not the latter.
# So all paths will expect source tree to be there and not in the build top directly.
# For example: $NIX_BUILD_TOP := /build in general, if you ask yourself.
# TODO(raitobezarius): I believe there could be more edge cases if `crate.sourceRoot`
# do exist but it's very hard to reason about them, so let's wait until the first bug report.
mkdir -p source/
cd source/
${pkgs.buildPackages.xorg.lndir}/bin/lndir ${crate.src} ${pkgs.buildPackages.xorg.lndir}/bin/lndir ${crate.src}
# build outputs # build outputs