From 31d73cd443311c5ef4bfbd8c2b66e3b9691340ec Mon Sep 17 00:00:00 2001 From: edef Date: Wed, 8 May 2024 07:13:39 +0000 Subject: [PATCH] refactor(nix-compat/nar/reader): reuse prev_name allocation We reuse the prev_name allocation for Entry, instead of allocating and returning a separate Vec. We encode the `prev_name: None` case as an empty vector, since we don't allow empty names anyway, and the sorting is equivalent. Change-Id: I975b37ff873805f5ff099bc82128706891052247 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11607 Reviewed-by: Brian Olsen Reviewed-by: flokli Tested-by: BuildkiteCI --- tvix/nix-compat/src/nar/reader/async/mod.rs | 31 +++++++++---------- tvix/nix-compat/src/nar/reader/async/test.rs | 12 ++++---- tvix/nix-compat/src/nar/reader/mod.rs | 31 ++++++++----------- tvix/nix-compat/src/nar/reader/read.rs | 32 ++++++++++++++++++++ tvix/nix-compat/src/nar/reader/test.rs | 12 ++++---- tvix/nix-compat/src/wire/bytes/mod.rs | 1 - tvix/store/src/nar/import.rs | 2 +- tvix/tools/crunch-v2/src/main.rs | 2 +- 8 files changed, 73 insertions(+), 50 deletions(-) diff --git a/tvix/nix-compat/src/nar/reader/async/mod.rs b/tvix/nix-compat/src/nar/reader/async/mod.rs index 048283a67..0808fba38 100644 --- a/tvix/nix-compat/src/nar/reader/async/mod.rs +++ b/tvix/nix-compat/src/nar/reader/async/mod.rs @@ -1,4 +1,5 @@ use std::{ + mem::MaybeUninit, pin::Pin, task::{self, Poll}, }; @@ -110,11 +111,11 @@ pub struct DirReader<'a, 'r> { reader: &'a mut Reader<'r>, /// Previous directory entry name. /// We have to hang onto this to enforce name monotonicity. - prev_name: Option>, + prev_name: Vec, } pub struct Entry<'a, 'r> { - pub name: Vec, + pub name: &'a [u8], pub node: Node<'a, 'r>, } @@ -122,7 +123,7 @@ impl<'a, 'r> DirReader<'a, 'r> { fn new(reader: &'a mut Reader<'r>) -> Self { Self { reader, - prev_name: None, + prev_name: vec![], } } @@ -138,7 +139,7 @@ impl<'a, 'r> DirReader<'a, 'r> { pub async fn next(&mut self) -> io::Result>> { // COME FROM the previous iteration: if we've already read an entry, // read its terminating TOK_PAR here. - if self.prev_name.is_some() { + if !self.prev_name.is_empty() { read::token(self.reader, &nar::wire::TOK_PAR).await?; } @@ -146,30 +147,26 @@ impl<'a, 'r> DirReader<'a, 'r> { return Ok(None); } - let name = wire::read_bytes(self.reader, 1..=nar::wire::MAX_NAME_LEN).await?; + let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1]; + let name = + wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?; if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." { return Err(InvalidData.into()); } // Enforce strict monotonicity of directory entry names. - match &mut self.prev_name { - None => { - self.prev_name = Some(name.clone()); - } - Some(prev_name) => { - if *prev_name >= name { - return Err(InvalidData.into()); - } - - name[..].clone_into(prev_name); - } + if &self.prev_name[..] >= name { + return Err(InvalidData.into()); } + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + read::token(self.reader, &nar::wire::TOK_NOD).await?; Ok(Some(Entry { - name, + name: &self.prev_name, node: Node::new(self.reader).await?, })) } diff --git a/tvix/nix-compat/src/nar/reader/async/test.rs b/tvix/nix-compat/src/nar/reader/async/test.rs index 58bb651fc..7bc1f8942 100644 --- a/tvix/nix-compat/src/nar/reader/async/test.rs +++ b/tvix/nix-compat/src/nar/reader/async/test.rs @@ -80,7 +80,7 @@ async fn complicated() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -133,7 +133,7 @@ async fn file_read_abandoned() { .expect("next must succeed") .expect("must be some"); - assert_eq!(&b".keep"[..], &entry.name); + assert_eq!(b".keep", entry.name); // don't bother to finish reading it. }; @@ -183,7 +183,7 @@ async fn dir_read_abandoned() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(_) => { @@ -239,7 +239,7 @@ async fn dir_read_after_none() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -280,7 +280,7 @@ async fn dir_read_after_none() { } async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::File { @@ -299,7 +299,7 @@ fn must_be_symlink( exp_target: &'static str, entry: nar::reader::Entry<'_, '_>, ) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::Symlink { target } => { diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs index bddf17508..9e9237ead 100644 --- a/tvix/nix-compat/src/nar/reader/mod.rs +++ b/tvix/nix-compat/src/nar/reader/mod.rs @@ -264,11 +264,11 @@ pub struct DirReader<'a, 'r> { reader: ArchiveReader<'a, 'r>, /// Previous directory entry name. /// We have to hang onto this to enforce name monotonicity. - prev_name: Option>, + prev_name: Vec, } pub struct Entry<'a, 'r> { - pub name: Vec, + pub name: &'a [u8], pub node: Node<'a, 'r>, } @@ -276,7 +276,7 @@ impl<'a, 'r> DirReader<'a, 'r> { fn new(reader: ArchiveReader<'a, 'r>) -> Self { Self { reader, - prev_name: None, + prev_name: vec![], } } @@ -295,7 +295,7 @@ impl<'a, 'r> DirReader<'a, 'r> { // COME FROM the previous iteration: if we've already read an entry, // read its terminating TOK_PAR here. - if self.prev_name.is_some() { + if !self.prev_name.is_empty() { try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); } @@ -306,9 +306,10 @@ impl<'a, 'r> DirReader<'a, 'r> { return Ok(None); } + let mut name = [0; wire::MAX_NAME_LEN + 1]; let name = try_or_poison!( self.reader, - read::bytes(self.reader.inner, wire::MAX_NAME_LEN) + read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN) ); if name.is_empty() @@ -322,24 +323,18 @@ impl<'a, 'r> DirReader<'a, 'r> { } // Enforce strict monotonicity of directory entry names. - match &mut self.prev_name { - None => { - self.prev_name = Some(name.clone()); - } - Some(prev_name) => { - if *prev_name >= name { - self.reader.status.poison(); - return Err(InvalidData.into()); - } - - name[..].clone_into(prev_name); - } + if &self.prev_name[..] >= name { + self.reader.status.poison(); + return Err(InvalidData.into()); } + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD)); Ok(Some(Entry { - name, + name: &self.prev_name, // Don't need to worry about poisoning here: Node::new will do it for us if needed node: Node::new(self.reader.child())?, })) diff --git a/tvix/nix-compat/src/nar/reader/read.rs b/tvix/nix-compat/src/nar/reader/read.rs index 1ce161376..9938581f2 100644 --- a/tvix/nix-compat/src/nar/reader/read.rs +++ b/tvix/nix-compat/src/nar/reader/read.rs @@ -15,6 +15,38 @@ pub fn u64(reader: &mut Reader) -> io::Result { Ok(u64::from_le_bytes(buf)) } +/// Consume a byte string from the reader into a provided buffer, +/// returning the data bytes. +pub fn bytes_buf<'a, const N: usize>( + reader: &mut Reader, + buf: &'a mut [u8; N], + max_len: usize, +) -> io::Result<&'a [u8]> { + assert_eq!(N % 8, 0); + assert!(max_len <= N); + + // read the length, and reject excessively large values + let len = self::u64(reader)?; + if len > max_len as u64 { + return Err(InvalidData.into()); + } + // we know the length fits in a usize now + let len = len as usize; + + // read the data and padding into a buffer + let buf_len = (len + 7) & !7; + reader.read_exact(&mut buf[..buf_len])?; + + // verify that the padding is all zeroes + for &b in &buf[len..buf_len] { + if b != 0 { + return Err(InvalidData.into()); + } + } + + Ok(&buf[..len]) +} + /// Consume a byte string of up to `max_len` bytes from the reader. pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result> { assert!(max_len <= isize::MAX as usize); diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs index 02dc4767c..63e4fb289 100644 --- a/tvix/nix-compat/src/nar/reader/test.rs +++ b/tvix/nix-compat/src/nar/reader/test.rs @@ -71,7 +71,7 @@ fn complicated() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -117,7 +117,7 @@ fn file_read_abandoned() { .expect("next must succeed") .expect("must be some"); - assert_eq!(&b".keep"[..], &entry.name); + assert_eq!(b".keep", entry.name); // don't bother to finish reading it. }; @@ -162,7 +162,7 @@ fn dir_read_abandoned() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(_) => { @@ -213,7 +213,7 @@ fn dir_read_after_none() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -248,7 +248,7 @@ fn dir_read_after_none() { } fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::File { @@ -267,7 +267,7 @@ fn must_be_symlink( exp_target: &'static str, entry: nar::reader::Entry<'_, '_>, ) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::Symlink { target } => { diff --git a/tvix/nix-compat/src/wire/bytes/mod.rs b/tvix/nix-compat/src/wire/bytes/mod.rs index f48b5000a..2ed071e37 100644 --- a/tvix/nix-compat/src/wire/bytes/mod.rs +++ b/tvix/nix-compat/src/wire/bytes/mod.rs @@ -82,7 +82,6 @@ where Ok(buf) } -#[allow(dead_code)] pub(crate) async fn read_bytes_buf<'a, const N: usize, R: ?Sized>( reader: &mut R, buf: &'a mut [MaybeUninit; N], diff --git a/tvix/store/src/nar/import.rs b/tvix/store/src/nar/import.rs index 7a9f1231e..3d7c50014 100644 --- a/tvix/store/src/nar/import.rs +++ b/tvix/store/src/nar/import.rs @@ -87,7 +87,7 @@ where let mut path = path.clone(); // valid NAR names are valid castore names - path.try_push(&entry.name) + path.try_push(entry.name) .expect("Tvix bug: failed to join name"); let entry = Box::pin(produce_nar_inner( diff --git a/tvix/tools/crunch-v2/src/main.rs b/tvix/tools/crunch-v2/src/main.rs index a5d538f6b..5be8c28e2 100644 --- a/tvix/tools/crunch-v2/src/main.rs +++ b/tvix/tools/crunch-v2/src/main.rs @@ -147,7 +147,7 @@ fn ingest(node: nar::Node, name: Vec, avg_chunk_size: u32) -> Result { directories.push(node); }