refactor(nix-compat/nar/reader): reuse prev_name allocation

We reuse the prev_name allocation for Entry, instead of allocating and
returning a separate Vec.

We encode the `prev_name: None` case as an empty vector, since we don't
allow empty names anyway, and the sorting is equivalent.

Change-Id: I975b37ff873805f5ff099bc82128706891052247
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11607
Reviewed-by: Brian Olsen <me@griff.name>
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
edef 2024-05-08 07:13:39 +00:00
parent 17a7dac94f
commit 31d73cd443
8 changed files with 73 additions and 50 deletions

View file

@ -1,4 +1,5 @@
use std::{ use std::{
mem::MaybeUninit,
pin::Pin, pin::Pin,
task::{self, Poll}, task::{self, Poll},
}; };
@ -110,11 +111,11 @@ pub struct DirReader<'a, 'r> {
reader: &'a mut Reader<'r>, reader: &'a mut Reader<'r>,
/// Previous directory entry name. /// Previous directory entry name.
/// We have to hang onto this to enforce name monotonicity. /// We have to hang onto this to enforce name monotonicity.
prev_name: Option<Vec<u8>>, prev_name: Vec<u8>,
} }
pub struct Entry<'a, 'r> { pub struct Entry<'a, 'r> {
pub name: Vec<u8>, pub name: &'a [u8],
pub node: Node<'a, 'r>, pub node: Node<'a, 'r>,
} }
@ -122,7 +123,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
fn new(reader: &'a mut Reader<'r>) -> Self { fn new(reader: &'a mut Reader<'r>) -> Self {
Self { Self {
reader, reader,
prev_name: None, prev_name: vec![],
} }
} }
@ -138,7 +139,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> { pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> {
// COME FROM the previous iteration: if we've already read an entry, // COME FROM the previous iteration: if we've already read an entry,
// read its terminating TOK_PAR here. // read its terminating TOK_PAR here.
if self.prev_name.is_some() { if !self.prev_name.is_empty() {
read::token(self.reader, &nar::wire::TOK_PAR).await?; read::token(self.reader, &nar::wire::TOK_PAR).await?;
} }
@ -146,30 +147,26 @@ impl<'a, 'r> DirReader<'a, 'r> {
return Ok(None); return Ok(None);
} }
let name = wire::read_bytes(self.reader, 1..=nar::wire::MAX_NAME_LEN).await?; let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1];
let name =
wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?;
if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." { if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." {
return Err(InvalidData.into()); return Err(InvalidData.into());
} }
// Enforce strict monotonicity of directory entry names. // Enforce strict monotonicity of directory entry names.
match &mut self.prev_name { if &self.prev_name[..] >= name {
None => { return Err(InvalidData.into());
self.prev_name = Some(name.clone());
}
Some(prev_name) => {
if *prev_name >= name {
return Err(InvalidData.into());
}
name[..].clone_into(prev_name);
}
} }
self.prev_name.clear();
self.prev_name.extend_from_slice(name);
read::token(self.reader, &nar::wire::TOK_NOD).await?; read::token(self.reader, &nar::wire::TOK_NOD).await?;
Ok(Some(Entry { Ok(Some(Entry {
name, name: &self.prev_name,
node: Node::new(self.reader).await?, node: Node::new(self.reader).await?,
})) }))
} }

View file

@ -80,7 +80,7 @@ async fn complicated() {
.expect("next must be some") .expect("next must be some")
.expect("must be some"); .expect("must be some");
assert_eq!(&b"keep"[..], &entry.name); assert_eq!(b"keep", entry.name);
match entry.node { match entry.node {
nar::reader::Node::Directory(mut subdir_reader) => { nar::reader::Node::Directory(mut subdir_reader) => {
@ -133,7 +133,7 @@ async fn file_read_abandoned() {
.expect("next must succeed") .expect("next must succeed")
.expect("must be some"); .expect("must be some");
assert_eq!(&b".keep"[..], &entry.name); assert_eq!(b".keep", entry.name);
// don't bother to finish reading it. // don't bother to finish reading it.
}; };
@ -183,7 +183,7 @@ async fn dir_read_abandoned() {
.expect("next must be some") .expect("next must be some")
.expect("must be some"); .expect("must be some");
assert_eq!(&b"keep"[..], &entry.name); assert_eq!(b"keep", entry.name);
match entry.node { match entry.node {
nar::reader::Node::Directory(_) => { nar::reader::Node::Directory(_) => {
@ -239,7 +239,7 @@ async fn dir_read_after_none() {
.expect("next must be some") .expect("next must be some")
.expect("must be some"); .expect("must be some");
assert_eq!(&b"keep"[..], &entry.name); assert_eq!(b"keep", entry.name);
match entry.node { match entry.node {
nar::reader::Node::Directory(mut subdir_reader) => { nar::reader::Node::Directory(mut subdir_reader) => {
@ -280,7 +280,7 @@ async fn dir_read_after_none() {
} }
async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
assert_eq!(name.as_bytes(), &entry.name); assert_eq!(name.as_bytes(), entry.name);
match entry.node { match entry.node {
nar::reader::Node::File { nar::reader::Node::File {
@ -299,7 +299,7 @@ fn must_be_symlink(
exp_target: &'static str, exp_target: &'static str,
entry: nar::reader::Entry<'_, '_>, entry: nar::reader::Entry<'_, '_>,
) { ) {
assert_eq!(name.as_bytes(), &entry.name); assert_eq!(name.as_bytes(), entry.name);
match entry.node { match entry.node {
nar::reader::Node::Symlink { target } => { nar::reader::Node::Symlink { target } => {

View file

@ -264,11 +264,11 @@ pub struct DirReader<'a, 'r> {
reader: ArchiveReader<'a, 'r>, reader: ArchiveReader<'a, 'r>,
/// Previous directory entry name. /// Previous directory entry name.
/// We have to hang onto this to enforce name monotonicity. /// We have to hang onto this to enforce name monotonicity.
prev_name: Option<Vec<u8>>, prev_name: Vec<u8>,
} }
pub struct Entry<'a, 'r> { pub struct Entry<'a, 'r> {
pub name: Vec<u8>, pub name: &'a [u8],
pub node: Node<'a, 'r>, pub node: Node<'a, 'r>,
} }
@ -276,7 +276,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
fn new(reader: ArchiveReader<'a, 'r>) -> Self { fn new(reader: ArchiveReader<'a, 'r>) -> Self {
Self { Self {
reader, reader,
prev_name: None, prev_name: vec![],
} }
} }
@ -295,7 +295,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
// COME FROM the previous iteration: if we've already read an entry, // COME FROM the previous iteration: if we've already read an entry,
// read its terminating TOK_PAR here. // read its terminating TOK_PAR here.
if self.prev_name.is_some() { if !self.prev_name.is_empty() {
try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
} }
@ -306,9 +306,10 @@ impl<'a, 'r> DirReader<'a, 'r> {
return Ok(None); return Ok(None);
} }
let mut name = [0; wire::MAX_NAME_LEN + 1];
let name = try_or_poison!( let name = try_or_poison!(
self.reader, self.reader,
read::bytes(self.reader.inner, wire::MAX_NAME_LEN) read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN)
); );
if name.is_empty() if name.is_empty()
@ -322,24 +323,18 @@ impl<'a, 'r> DirReader<'a, 'r> {
} }
// Enforce strict monotonicity of directory entry names. // Enforce strict monotonicity of directory entry names.
match &mut self.prev_name { if &self.prev_name[..] >= name {
None => { self.reader.status.poison();
self.prev_name = Some(name.clone()); return Err(InvalidData.into());
}
Some(prev_name) => {
if *prev_name >= name {
self.reader.status.poison();
return Err(InvalidData.into());
}
name[..].clone_into(prev_name);
}
} }
self.prev_name.clear();
self.prev_name.extend_from_slice(name);
try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD)); try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD));
Ok(Some(Entry { Ok(Some(Entry {
name, name: &self.prev_name,
// Don't need to worry about poisoning here: Node::new will do it for us if needed // Don't need to worry about poisoning here: Node::new will do it for us if needed
node: Node::new(self.reader.child())?, node: Node::new(self.reader.child())?,
})) }))

View file

@ -15,6 +15,38 @@ pub fn u64(reader: &mut Reader) -> io::Result<u64> {
Ok(u64::from_le_bytes(buf)) Ok(u64::from_le_bytes(buf))
} }
/// Consume a byte string from the reader into a provided buffer,
/// returning the data bytes.
pub fn bytes_buf<'a, const N: usize>(
reader: &mut Reader,
buf: &'a mut [u8; N],
max_len: usize,
) -> io::Result<&'a [u8]> {
assert_eq!(N % 8, 0);
assert!(max_len <= N);
// read the length, and reject excessively large values
let len = self::u64(reader)?;
if len > max_len as u64 {
return Err(InvalidData.into());
}
// we know the length fits in a usize now
let len = len as usize;
// read the data and padding into a buffer
let buf_len = (len + 7) & !7;
reader.read_exact(&mut buf[..buf_len])?;
// verify that the padding is all zeroes
for &b in &buf[len..buf_len] {
if b != 0 {
return Err(InvalidData.into());
}
}
Ok(&buf[..len])
}
/// Consume a byte string of up to `max_len` bytes from the reader. /// Consume a byte string of up to `max_len` bytes from the reader.
pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> { pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> {
assert!(max_len <= isize::MAX as usize); assert!(max_len <= isize::MAX as usize);

View file

@ -71,7 +71,7 @@ fn complicated() {
.expect("next must be some") .expect("next must be some")
.expect("must be some"); .expect("must be some");
assert_eq!(&b"keep"[..], &entry.name); assert_eq!(b"keep", entry.name);
match entry.node { match entry.node {
nar::reader::Node::Directory(mut subdir_reader) => { nar::reader::Node::Directory(mut subdir_reader) => {
@ -117,7 +117,7 @@ fn file_read_abandoned() {
.expect("next must succeed") .expect("next must succeed")
.expect("must be some"); .expect("must be some");
assert_eq!(&b".keep"[..], &entry.name); assert_eq!(b".keep", entry.name);
// don't bother to finish reading it. // don't bother to finish reading it.
}; };
@ -162,7 +162,7 @@ fn dir_read_abandoned() {
.expect("next must be some") .expect("next must be some")
.expect("must be some"); .expect("must be some");
assert_eq!(&b"keep"[..], &entry.name); assert_eq!(b"keep", entry.name);
match entry.node { match entry.node {
nar::reader::Node::Directory(_) => { nar::reader::Node::Directory(_) => {
@ -213,7 +213,7 @@ fn dir_read_after_none() {
.expect("next must be some") .expect("next must be some")
.expect("must be some"); .expect("must be some");
assert_eq!(&b"keep"[..], &entry.name); assert_eq!(b"keep", entry.name);
match entry.node { match entry.node {
nar::reader::Node::Directory(mut subdir_reader) => { nar::reader::Node::Directory(mut subdir_reader) => {
@ -248,7 +248,7 @@ fn dir_read_after_none() {
} }
fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
assert_eq!(name.as_bytes(), &entry.name); assert_eq!(name.as_bytes(), entry.name);
match entry.node { match entry.node {
nar::reader::Node::File { nar::reader::Node::File {
@ -267,7 +267,7 @@ fn must_be_symlink(
exp_target: &'static str, exp_target: &'static str,
entry: nar::reader::Entry<'_, '_>, entry: nar::reader::Entry<'_, '_>,
) { ) {
assert_eq!(name.as_bytes(), &entry.name); assert_eq!(name.as_bytes(), entry.name);
match entry.node { match entry.node {
nar::reader::Node::Symlink { target } => { nar::reader::Node::Symlink { target } => {

View file

@ -82,7 +82,6 @@ where
Ok(buf) Ok(buf)
} }
#[allow(dead_code)]
pub(crate) async fn read_bytes_buf<'a, const N: usize, R: ?Sized>( pub(crate) async fn read_bytes_buf<'a, const N: usize, R: ?Sized>(
reader: &mut R, reader: &mut R,
buf: &'a mut [MaybeUninit<u8>; N], buf: &'a mut [MaybeUninit<u8>; N],

View file

@ -87,7 +87,7 @@ where
let mut path = path.clone(); let mut path = path.clone();
// valid NAR names are valid castore names // valid NAR names are valid castore names
path.try_push(&entry.name) path.try_push(entry.name)
.expect("Tvix bug: failed to join name"); .expect("Tvix bug: failed to join name");
let entry = Box::pin(produce_nar_inner( let entry = Box::pin(produce_nar_inner(

View file

@ -147,7 +147,7 @@ fn ingest(node: nar::Node, name: Vec<u8>, avg_chunk_size: u32) -> Result<proto::
let mut symlinks = vec![]; let mut symlinks = vec![];
while let Some(node) = reader.next()? { while let Some(node) = reader.next()? {
match ingest(node.node, node.name, avg_chunk_size)? { match ingest(node.node, node.name.to_owned(), avg_chunk_size)? {
proto::path::Node::Directory(node) => { proto::path::Node::Directory(node) => {
directories.push(node); directories.push(node);
} }