refactor(nix-compat/nar/reader): reuse prev_name allocation
We reuse the prev_name allocation for Entry, instead of allocating and returning a separate Vec. We encode the `prev_name: None` case as an empty vector, since we don't allow empty names anyway, and the sorting is equivalent. Change-Id: I975b37ff873805f5ff099bc82128706891052247 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11607 Reviewed-by: Brian Olsen <me@griff.name> Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
17a7dac94f
commit
31d73cd443
8 changed files with 73 additions and 50 deletions
|
@ -1,4 +1,5 @@
|
||||||
use std::{
|
use std::{
|
||||||
|
mem::MaybeUninit,
|
||||||
pin::Pin,
|
pin::Pin,
|
||||||
task::{self, Poll},
|
task::{self, Poll},
|
||||||
};
|
};
|
||||||
|
@ -110,11 +111,11 @@ pub struct DirReader<'a, 'r> {
|
||||||
reader: &'a mut Reader<'r>,
|
reader: &'a mut Reader<'r>,
|
||||||
/// Previous directory entry name.
|
/// Previous directory entry name.
|
||||||
/// We have to hang onto this to enforce name monotonicity.
|
/// We have to hang onto this to enforce name monotonicity.
|
||||||
prev_name: Option<Vec<u8>>,
|
prev_name: Vec<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Entry<'a, 'r> {
|
pub struct Entry<'a, 'r> {
|
||||||
pub name: Vec<u8>,
|
pub name: &'a [u8],
|
||||||
pub node: Node<'a, 'r>,
|
pub node: Node<'a, 'r>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,7 +123,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
|
||||||
fn new(reader: &'a mut Reader<'r>) -> Self {
|
fn new(reader: &'a mut Reader<'r>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
reader,
|
reader,
|
||||||
prev_name: None,
|
prev_name: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -138,7 +139,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
|
||||||
pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> {
|
pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> {
|
||||||
// COME FROM the previous iteration: if we've already read an entry,
|
// COME FROM the previous iteration: if we've already read an entry,
|
||||||
// read its terminating TOK_PAR here.
|
// read its terminating TOK_PAR here.
|
||||||
if self.prev_name.is_some() {
|
if !self.prev_name.is_empty() {
|
||||||
read::token(self.reader, &nar::wire::TOK_PAR).await?;
|
read::token(self.reader, &nar::wire::TOK_PAR).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -146,30 +147,26 @@ impl<'a, 'r> DirReader<'a, 'r> {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
let name = wire::read_bytes(self.reader, 1..=nar::wire::MAX_NAME_LEN).await?;
|
let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1];
|
||||||
|
let name =
|
||||||
|
wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?;
|
||||||
|
|
||||||
if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." {
|
if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." {
|
||||||
return Err(InvalidData.into());
|
return Err(InvalidData.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enforce strict monotonicity of directory entry names.
|
// Enforce strict monotonicity of directory entry names.
|
||||||
match &mut self.prev_name {
|
if &self.prev_name[..] >= name {
|
||||||
None => {
|
return Err(InvalidData.into());
|
||||||
self.prev_name = Some(name.clone());
|
|
||||||
}
|
|
||||||
Some(prev_name) => {
|
|
||||||
if *prev_name >= name {
|
|
||||||
return Err(InvalidData.into());
|
|
||||||
}
|
|
||||||
|
|
||||||
name[..].clone_into(prev_name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.prev_name.clear();
|
||||||
|
self.prev_name.extend_from_slice(name);
|
||||||
|
|
||||||
read::token(self.reader, &nar::wire::TOK_NOD).await?;
|
read::token(self.reader, &nar::wire::TOK_NOD).await?;
|
||||||
|
|
||||||
Ok(Some(Entry {
|
Ok(Some(Entry {
|
||||||
name,
|
name: &self.prev_name,
|
||||||
node: Node::new(self.reader).await?,
|
node: Node::new(self.reader).await?,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,7 +80,7 @@ async fn complicated() {
|
||||||
.expect("next must be some")
|
.expect("next must be some")
|
||||||
.expect("must be some");
|
.expect("must be some");
|
||||||
|
|
||||||
assert_eq!(&b"keep"[..], &entry.name);
|
assert_eq!(b"keep", entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::Directory(mut subdir_reader) => {
|
nar::reader::Node::Directory(mut subdir_reader) => {
|
||||||
|
@ -133,7 +133,7 @@ async fn file_read_abandoned() {
|
||||||
.expect("next must succeed")
|
.expect("next must succeed")
|
||||||
.expect("must be some");
|
.expect("must be some");
|
||||||
|
|
||||||
assert_eq!(&b".keep"[..], &entry.name);
|
assert_eq!(b".keep", entry.name);
|
||||||
// don't bother to finish reading it.
|
// don't bother to finish reading it.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -183,7 +183,7 @@ async fn dir_read_abandoned() {
|
||||||
.expect("next must be some")
|
.expect("next must be some")
|
||||||
.expect("must be some");
|
.expect("must be some");
|
||||||
|
|
||||||
assert_eq!(&b"keep"[..], &entry.name);
|
assert_eq!(b"keep", entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::Directory(_) => {
|
nar::reader::Node::Directory(_) => {
|
||||||
|
@ -239,7 +239,7 @@ async fn dir_read_after_none() {
|
||||||
.expect("next must be some")
|
.expect("next must be some")
|
||||||
.expect("must be some");
|
.expect("must be some");
|
||||||
|
|
||||||
assert_eq!(&b"keep"[..], &entry.name);
|
assert_eq!(b"keep", entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::Directory(mut subdir_reader) => {
|
nar::reader::Node::Directory(mut subdir_reader) => {
|
||||||
|
@ -280,7 +280,7 @@ async fn dir_read_after_none() {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
|
async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
|
||||||
assert_eq!(name.as_bytes(), &entry.name);
|
assert_eq!(name.as_bytes(), entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::File {
|
nar::reader::Node::File {
|
||||||
|
@ -299,7 +299,7 @@ fn must_be_symlink(
|
||||||
exp_target: &'static str,
|
exp_target: &'static str,
|
||||||
entry: nar::reader::Entry<'_, '_>,
|
entry: nar::reader::Entry<'_, '_>,
|
||||||
) {
|
) {
|
||||||
assert_eq!(name.as_bytes(), &entry.name);
|
assert_eq!(name.as_bytes(), entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::Symlink { target } => {
|
nar::reader::Node::Symlink { target } => {
|
||||||
|
|
|
@ -264,11 +264,11 @@ pub struct DirReader<'a, 'r> {
|
||||||
reader: ArchiveReader<'a, 'r>,
|
reader: ArchiveReader<'a, 'r>,
|
||||||
/// Previous directory entry name.
|
/// Previous directory entry name.
|
||||||
/// We have to hang onto this to enforce name monotonicity.
|
/// We have to hang onto this to enforce name monotonicity.
|
||||||
prev_name: Option<Vec<u8>>,
|
prev_name: Vec<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Entry<'a, 'r> {
|
pub struct Entry<'a, 'r> {
|
||||||
pub name: Vec<u8>,
|
pub name: &'a [u8],
|
||||||
pub node: Node<'a, 'r>,
|
pub node: Node<'a, 'r>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -276,7 +276,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
|
||||||
fn new(reader: ArchiveReader<'a, 'r>) -> Self {
|
fn new(reader: ArchiveReader<'a, 'r>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
reader,
|
reader,
|
||||||
prev_name: None,
|
prev_name: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -295,7 +295,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
|
||||||
|
|
||||||
// COME FROM the previous iteration: if we've already read an entry,
|
// COME FROM the previous iteration: if we've already read an entry,
|
||||||
// read its terminating TOK_PAR here.
|
// read its terminating TOK_PAR here.
|
||||||
if self.prev_name.is_some() {
|
if !self.prev_name.is_empty() {
|
||||||
try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
|
try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,9 +306,10 @@ impl<'a, 'r> DirReader<'a, 'r> {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut name = [0; wire::MAX_NAME_LEN + 1];
|
||||||
let name = try_or_poison!(
|
let name = try_or_poison!(
|
||||||
self.reader,
|
self.reader,
|
||||||
read::bytes(self.reader.inner, wire::MAX_NAME_LEN)
|
read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN)
|
||||||
);
|
);
|
||||||
|
|
||||||
if name.is_empty()
|
if name.is_empty()
|
||||||
|
@ -322,24 +323,18 @@ impl<'a, 'r> DirReader<'a, 'r> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enforce strict monotonicity of directory entry names.
|
// Enforce strict monotonicity of directory entry names.
|
||||||
match &mut self.prev_name {
|
if &self.prev_name[..] >= name {
|
||||||
None => {
|
self.reader.status.poison();
|
||||||
self.prev_name = Some(name.clone());
|
return Err(InvalidData.into());
|
||||||
}
|
|
||||||
Some(prev_name) => {
|
|
||||||
if *prev_name >= name {
|
|
||||||
self.reader.status.poison();
|
|
||||||
return Err(InvalidData.into());
|
|
||||||
}
|
|
||||||
|
|
||||||
name[..].clone_into(prev_name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.prev_name.clear();
|
||||||
|
self.prev_name.extend_from_slice(name);
|
||||||
|
|
||||||
try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD));
|
try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD));
|
||||||
|
|
||||||
Ok(Some(Entry {
|
Ok(Some(Entry {
|
||||||
name,
|
name: &self.prev_name,
|
||||||
// Don't need to worry about poisoning here: Node::new will do it for us if needed
|
// Don't need to worry about poisoning here: Node::new will do it for us if needed
|
||||||
node: Node::new(self.reader.child())?,
|
node: Node::new(self.reader.child())?,
|
||||||
}))
|
}))
|
||||||
|
|
|
@ -15,6 +15,38 @@ pub fn u64(reader: &mut Reader) -> io::Result<u64> {
|
||||||
Ok(u64::from_le_bytes(buf))
|
Ok(u64::from_le_bytes(buf))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Consume a byte string from the reader into a provided buffer,
|
||||||
|
/// returning the data bytes.
|
||||||
|
pub fn bytes_buf<'a, const N: usize>(
|
||||||
|
reader: &mut Reader,
|
||||||
|
buf: &'a mut [u8; N],
|
||||||
|
max_len: usize,
|
||||||
|
) -> io::Result<&'a [u8]> {
|
||||||
|
assert_eq!(N % 8, 0);
|
||||||
|
assert!(max_len <= N);
|
||||||
|
|
||||||
|
// read the length, and reject excessively large values
|
||||||
|
let len = self::u64(reader)?;
|
||||||
|
if len > max_len as u64 {
|
||||||
|
return Err(InvalidData.into());
|
||||||
|
}
|
||||||
|
// we know the length fits in a usize now
|
||||||
|
let len = len as usize;
|
||||||
|
|
||||||
|
// read the data and padding into a buffer
|
||||||
|
let buf_len = (len + 7) & !7;
|
||||||
|
reader.read_exact(&mut buf[..buf_len])?;
|
||||||
|
|
||||||
|
// verify that the padding is all zeroes
|
||||||
|
for &b in &buf[len..buf_len] {
|
||||||
|
if b != 0 {
|
||||||
|
return Err(InvalidData.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(&buf[..len])
|
||||||
|
}
|
||||||
|
|
||||||
/// Consume a byte string of up to `max_len` bytes from the reader.
|
/// Consume a byte string of up to `max_len` bytes from the reader.
|
||||||
pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> {
|
pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> {
|
||||||
assert!(max_len <= isize::MAX as usize);
|
assert!(max_len <= isize::MAX as usize);
|
||||||
|
|
|
@ -71,7 +71,7 @@ fn complicated() {
|
||||||
.expect("next must be some")
|
.expect("next must be some")
|
||||||
.expect("must be some");
|
.expect("must be some");
|
||||||
|
|
||||||
assert_eq!(&b"keep"[..], &entry.name);
|
assert_eq!(b"keep", entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::Directory(mut subdir_reader) => {
|
nar::reader::Node::Directory(mut subdir_reader) => {
|
||||||
|
@ -117,7 +117,7 @@ fn file_read_abandoned() {
|
||||||
.expect("next must succeed")
|
.expect("next must succeed")
|
||||||
.expect("must be some");
|
.expect("must be some");
|
||||||
|
|
||||||
assert_eq!(&b".keep"[..], &entry.name);
|
assert_eq!(b".keep", entry.name);
|
||||||
// don't bother to finish reading it.
|
// don't bother to finish reading it.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -162,7 +162,7 @@ fn dir_read_abandoned() {
|
||||||
.expect("next must be some")
|
.expect("next must be some")
|
||||||
.expect("must be some");
|
.expect("must be some");
|
||||||
|
|
||||||
assert_eq!(&b"keep"[..], &entry.name);
|
assert_eq!(b"keep", entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::Directory(_) => {
|
nar::reader::Node::Directory(_) => {
|
||||||
|
@ -213,7 +213,7 @@ fn dir_read_after_none() {
|
||||||
.expect("next must be some")
|
.expect("next must be some")
|
||||||
.expect("must be some");
|
.expect("must be some");
|
||||||
|
|
||||||
assert_eq!(&b"keep"[..], &entry.name);
|
assert_eq!(b"keep", entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::Directory(mut subdir_reader) => {
|
nar::reader::Node::Directory(mut subdir_reader) => {
|
||||||
|
@ -248,7 +248,7 @@ fn dir_read_after_none() {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
|
fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
|
||||||
assert_eq!(name.as_bytes(), &entry.name);
|
assert_eq!(name.as_bytes(), entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::File {
|
nar::reader::Node::File {
|
||||||
|
@ -267,7 +267,7 @@ fn must_be_symlink(
|
||||||
exp_target: &'static str,
|
exp_target: &'static str,
|
||||||
entry: nar::reader::Entry<'_, '_>,
|
entry: nar::reader::Entry<'_, '_>,
|
||||||
) {
|
) {
|
||||||
assert_eq!(name.as_bytes(), &entry.name);
|
assert_eq!(name.as_bytes(), entry.name);
|
||||||
|
|
||||||
match entry.node {
|
match entry.node {
|
||||||
nar::reader::Node::Symlink { target } => {
|
nar::reader::Node::Symlink { target } => {
|
||||||
|
|
|
@ -82,7 +82,6 @@ where
|
||||||
Ok(buf)
|
Ok(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) async fn read_bytes_buf<'a, const N: usize, R: ?Sized>(
|
pub(crate) async fn read_bytes_buf<'a, const N: usize, R: ?Sized>(
|
||||||
reader: &mut R,
|
reader: &mut R,
|
||||||
buf: &'a mut [MaybeUninit<u8>; N],
|
buf: &'a mut [MaybeUninit<u8>; N],
|
||||||
|
|
|
@ -87,7 +87,7 @@ where
|
||||||
let mut path = path.clone();
|
let mut path = path.clone();
|
||||||
|
|
||||||
// valid NAR names are valid castore names
|
// valid NAR names are valid castore names
|
||||||
path.try_push(&entry.name)
|
path.try_push(entry.name)
|
||||||
.expect("Tvix bug: failed to join name");
|
.expect("Tvix bug: failed to join name");
|
||||||
|
|
||||||
let entry = Box::pin(produce_nar_inner(
|
let entry = Box::pin(produce_nar_inner(
|
||||||
|
|
|
@ -147,7 +147,7 @@ fn ingest(node: nar::Node, name: Vec<u8>, avg_chunk_size: u32) -> Result<proto::
|
||||||
let mut symlinks = vec![];
|
let mut symlinks = vec![];
|
||||||
|
|
||||||
while let Some(node) = reader.next()? {
|
while let Some(node) = reader.next()? {
|
||||||
match ingest(node.node, node.name, avg_chunk_size)? {
|
match ingest(node.node, node.name.to_owned(), avg_chunk_size)? {
|
||||||
proto::path::Node::Directory(node) => {
|
proto::path::Node::Directory(node) => {
|
||||||
directories.push(node);
|
directories.push(node);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue