test(tvix/nix-compat): add debug assertions for nar reader

Adds debug assertions to ensure that the reader's variants are upheld.
If any of the following happens, then the currently in use reader must
be abandoned:
  * A directory or file reader encounters an error
  * A directory or file reader is dropped before being fully read from
Additionally, a directory reader must not be read from again after it
has returned None.
These checks are only used when debug_assertions are on, so vanish in
release mode.

Resolves two TODO items added by edef

Change-Id: I27bd9643a632798db5351957506c166b9bd5ca4e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11508
Reviewed-by: flokli <flokli@flokli.de>
Reviewed-by: edef <edef@edef.eu>
Autosubmit: Aria Shrimpton <me@aria.rip>
Tested-by: BuildkiteCI
This commit is contained in:
tcmal 2024-04-24 13:59:11 +01:00 committed by clbot
parent f0e428db75
commit 671bdff5dc
2 changed files with 425 additions and 83 deletions

View file

@ -19,10 +19,49 @@ mod test;
pub type Reader<'a> = dyn BufRead + Send + 'a;
struct ArchiveReader<'a, 'r> {
inner: &'a mut Reader<'r>,
/// In debug mode, also track when we need to abandon this archive reader.
/// The archive reader must be abandoned when:
/// * An error is encountered at any point
/// * A file or directory reader is dropped before being read entirely.
/// All of these checks vanish in release mode.
#[cfg(debug_assertions)]
status: ArchiveReaderStatus<'a>,
}
macro_rules! poison {
($it:expr) => {
#[cfg(debug_assertions)]
{
$it.status.poison();
}
};
}
macro_rules! try_or_poison {
($it:expr, $ex:expr) => {
match $ex {
Ok(x) => x,
Err(e) => {
poison!($it);
return Err(e.into());
}
}
};
}
/// Start reading a NAR file from `reader`.
pub fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> {
read::token(reader, &wire::TOK_NAR)?;
Node::new(reader)
Node::new(ArchiveReader {
inner: reader,
#[cfg(debug_assertions)]
status: ArchiveReaderStatus::StackTop {
poisoned: false,
ready: true,
},
})
}
pub enum Node<'a, 'r> {
@ -41,21 +80,28 @@ impl<'a, 'r> Node<'a, 'r> {
///
/// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink],
/// but is otherwise left to [DirReader] or [FileReader].
fn new(reader: &'a mut Reader<'r>) -> io::Result<Self> {
Ok(match read::tag(reader)? {
#[allow(unused_mut)] // due to debug_assertions code
fn new(mut reader: ArchiveReader<'a, 'r>) -> io::Result<Self> {
Ok(match read::tag(reader.inner)? {
wire::Node::Sym => {
let target = read::bytes(reader, wire::MAX_TARGET_LEN)?;
let target =
try_or_poison!(reader, read::bytes(reader.inner, wire::MAX_TARGET_LEN));
if target.is_empty() || target.contains(&0) {
poison!(reader);
return Err(InvalidData.into());
}
read::token(reader, &wire::TOK_PAR)?;
try_or_poison!(reader, read::token(reader.inner, &wire::TOK_PAR));
#[cfg(debug_assertions)]
{
reader.status.ready_parent(); // Immediately allow reading from parent again
}
Node::Symlink { target }
}
tag @ (wire::Node::Reg | wire::Node::Exe) => {
let len = read::u64(reader)?;
let len = try_or_poison!(&mut reader, read::u64(reader.inner));
Node::File {
executable: tag == wire::Node::Exe,
@ -74,10 +120,8 @@ impl<'a, 'r> Node<'a, 'r> {
/// * You must abandon the entire archive reader upon the first error.
///
/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF.
///
/// TODO(edef): enforce these in `#[cfg(debug_assertions)]`
pub struct FileReader<'a, 'r> {
reader: &'a mut Reader<'r>,
reader: ArchiveReader<'a, 'r>,
len: u64,
/// Truncated original file length for padding computation.
/// We only care about the 3 least significant bits; semantically, this is a u3.
@ -87,12 +131,17 @@ pub struct FileReader<'a, 'r> {
impl<'a, 'r> FileReader<'a, 'r> {
/// Instantiate a new reader, starting after [wire::TOK_REG] or [wire::TOK_EXE].
/// We handle the terminating [wire::TOK_PAR] on semantic EOF.
fn new(reader: &'a mut Reader<'r>, len: u64) -> io::Result<Self> {
#[allow(unused_mut)] // due to debug_assertions code
fn new(mut reader: ArchiveReader<'a, 'r>, len: u64) -> io::Result<Self> {
// For zero-length files, we have to read the terminating TOK_PAR
// immediately, since FileReader::read may never be called; we've
// already reached semantic EOF by definition.
if len == 0 {
read::token(reader, &wire::TOK_PAR)?;
read::token(reader.inner, &wire::TOK_PAR)?;
#[cfg(debug_assertions)]
{
reader.status.ready_parent();
}
}
Ok(Self {
@ -121,9 +170,12 @@ impl FileReader<'_, '_> {
return Ok(&[]);
}
let mut buf = self.reader.fill_buf()?;
self.reader.check_correct();
let mut buf = try_or_poison!(self.reader, self.reader.inner.fill_buf());
if buf.is_empty() {
poison!(self.reader);
return Err(UnexpectedEof.into());
}
@ -141,12 +193,14 @@ impl FileReader<'_, '_> {
return Ok(());
}
self.reader.check_correct();
self.len = self
.len
.checked_sub(n as u64)
.expect("consumed bytes past EOF");
self.reader.consume(n);
self.reader.inner.consume(n);
if self.is_empty() {
self.finish()?;
@ -159,7 +213,7 @@ impl FileReader<'_, '_> {
pub fn copy(&mut self, mut dst: impl Write) -> io::Result<()> {
while !self.is_empty() {
let buf = self.fill_buf()?;
let n = dst.write(buf)?;
let n = try_or_poison!(self.reader, dst.write(buf));
self.consume(n)?;
}
@ -173,14 +227,17 @@ impl Read for FileReader<'_, '_> {
return Ok(0);
}
self.reader.check_correct();
if buf.len() as u64 > self.len {
buf = &mut buf[..self.len as usize];
}
let n = self.reader.read(buf)?;
let n = try_or_poison!(self.reader, self.reader.inner.read(buf));
self.len -= n as u64;
if n == 0 {
poison!(self.reader);
return Err(UnexpectedEof.into());
}
@ -200,21 +257,30 @@ impl FileReader<'_, '_> {
if pad != 0 {
let mut buf = [0; 8];
self.reader.read_exact(&mut buf[pad..])?;
try_or_poison!(self.reader, self.reader.inner.read_exact(&mut buf[pad..]));
if buf != [0; 8] {
poison!(self.reader);
return Err(InvalidData.into());
}
}
read::token(self.reader, &wire::TOK_PAR)
try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
#[cfg(debug_assertions)]
{
// Done with reading this file, allow going back up the chain of readers
self.reader.status.ready_parent();
}
Ok(())
}
}
/// A directory iterator, yielding a sequence of [Node]s.
/// It must be fully consumed before reading further from the [DirReader] that produced it, if any.
pub struct DirReader<'a, 'r> {
reader: &'a mut Reader<'r>,
reader: ArchiveReader<'a, 'r>,
/// Previous directory entry name.
/// We have to hang onto this to enforce name monotonicity.
prev_name: Option<Vec<u8>>,
@ -226,7 +292,7 @@ pub struct Entry<'a, 'r> {
}
impl<'a, 'r> DirReader<'a, 'r> {
fn new(reader: &'a mut Reader<'r>) -> Self {
fn new(reader: ArchiveReader<'a, 'r>) -> Self {
Self {
reader,
prev_name: None,
@ -242,23 +308,30 @@ impl<'a, 'r> DirReader<'a, 'r> {
/// * You must abandon the entire archive reader on the first error.
/// * You must abandon the directory reader upon the first [None].
/// * Even if you know the amount of elements up front, you must keep reading until you encounter [None].
///
/// TODO(edef): enforce these in `#[cfg(debug_assertions)]`
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> io::Result<Option<Entry>> {
pub fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> {
self.reader.check_correct();
// COME FROM the previous iteration: if we've already read an entry,
// read its terminating TOK_PAR here.
if self.prev_name.is_some() {
read::token(self.reader, &wire::TOK_PAR)?;
try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
}
// Determine if there are more entries to follow
if let wire::Entry::None = read::tag(self.reader)? {
if let wire::Entry::None = try_or_poison!(self.reader, read::tag(self.reader.inner)) {
// We've reached the end of this directory.
#[cfg(debug_assertions)]
{
self.reader.status.ready_parent();
}
return Ok(None);
}
let name = read::bytes(self.reader, wire::MAX_NAME_LEN)?;
let name = try_or_poison!(
self.reader,
read::bytes(self.reader.inner, wire::MAX_NAME_LEN)
);
if name.is_empty()
|| name.contains(&0)
@ -266,6 +339,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
|| name == b"."
|| name == b".."
{
poison!(self.reader);
return Err(InvalidData.into());
}
@ -276,6 +350,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
}
Some(prev_name) => {
if *prev_name >= name {
poison!(self.reader);
return Err(InvalidData.into());
}
@ -283,11 +358,120 @@ impl<'a, 'r> DirReader<'a, 'r> {
}
}
read::token(self.reader, &wire::TOK_NOD)?;
try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD));
Ok(Some(Entry {
name,
node: Node::new(&mut self.reader)?,
// Don't need to worry about poisoning here: Node::new will do it for us if needed
node: Node::new(self.reader.child())?,
}))
}
}
/// We use a stack of statuses to:
/// * Share poisoned state across all objects from the same underlying reader,
/// so we can check they are abandoned when an error occurs
/// * Make sure only the most recently created object is read from, and is fully exhausted
/// before anything it was created from is used again.
#[cfg(debug_assertions)]
enum ArchiveReaderStatus<'a> {
StackTop {
poisoned: bool,
ready: bool,
},
StackChild {
poisoned: &'a mut bool,
parent_ready: &'a mut bool,
ready: bool,
},
}
#[cfg(debug_assertions)]
impl ArchiveReaderStatus<'_> {
/// Poison all the objects sharing the same reader, to be used when an error occurs
fn poison(&mut self) {
match self {
ArchiveReaderStatus::StackTop { poisoned: x, .. } => *x = true,
ArchiveReaderStatus::StackChild { poisoned: x, .. } => **x = true,
}
}
/// Mark the parent as ready, allowing it to be used again and preventing this reference to the reader being used again.
fn ready_parent(&mut self) {
match self {
Self::StackTop { ready, .. } => {
*ready = false;
}
Self::StackChild {
ready,
parent_ready,
..
} => {
*ready = false;
**parent_ready = true;
}
};
}
fn poisoned(&self) -> bool {
match self {
Self::StackTop { poisoned, .. } => *poisoned,
Self::StackChild { poisoned, .. } => **poisoned,
}
}
fn ready(&self) -> bool {
match self {
Self::StackTop { ready, .. } => *ready,
Self::StackChild { ready, .. } => *ready,
}
}
}
impl<'a, 'r> ArchiveReader<'a, 'r> {
/// Create a new child reader from this one.
/// In debug mode, this reader will panic if called before the new child is exhausted / calls `ready_parent`
fn child(&mut self) -> ArchiveReader<'_, 'r> {
ArchiveReader {
inner: self.inner,
#[cfg(debug_assertions)]
status: match &mut self.status {
ArchiveReaderStatus::StackTop { poisoned, ready } => {
*ready = false;
ArchiveReaderStatus::StackChild {
poisoned,
parent_ready: ready,
ready: true,
}
}
ArchiveReaderStatus::StackChild {
poisoned, ready, ..
} => {
*ready = false;
ArchiveReaderStatus::StackChild {
poisoned,
parent_ready: ready,
ready: true,
}
}
},
}
}
/// Check the reader is in the correct status.
/// Only does anything when debug assertions are on.
#[inline(always)]
fn check_correct(&self) {
#[cfg(debug_assertions)]
{
debug_assert!(
!self.status.poisoned(),
"Archive reader used after it was meant to be abandoned!"
);
debug_assert!(
self.status.ready(),
"Non-ready archive reader used! (Should've been reading from something else)"
)
}
}
}

View file

@ -46,71 +46,54 @@ fn complicated() {
match node {
nar::reader::Node::Directory(mut dir_reader) => {
// first entry is .keep, an empty regular file.
let entry = dir_reader
.next()
.expect("next must succeed")
.expect("must be some");
assert_eq!(&b".keep"[..], &entry.name);
match entry.node {
nar::reader::Node::File {
executable,
mut reader,
} => {
assert!(!executable);
assert_eq!(reader.read(&mut [0]).unwrap(), 0);
}
_ => panic!("unexpected type for .keep"),
}
must_read_file(
".keep",
dir_reader
.next()
.expect("next must succeed")
.expect("must be some"),
);
// second entry is aa, a symlink to /nix/store/somewhereelse
let entry = dir_reader
.next()
.expect("next must be some")
.expect("must be some");
must_be_symlink(
"aa",
"/nix/store/somewhereelse",
dir_reader
.next()
.expect("next must be some")
.expect("must be some"),
);
assert_eq!(&b"aa"[..], &entry.name);
{
// third entry is a directory called "keep"
let entry = dir_reader
.next()
.expect("next must be some")
.expect("must be some");
match entry.node {
nar::reader::Node::Symlink { target } => {
assert_eq!(&b"/nix/store/somewhereelse"[..], &target);
}
_ => panic!("unexpected type for aa"),
}
assert_eq!(&b"keep"[..], &entry.name);
// third entry is a directory called "keep"
let entry = dir_reader
.next()
.expect("next must be some")
.expect("must be some");
match entry.node {
nar::reader::Node::Directory(mut subdir_reader) => {
{
// first entry is .keep, an empty regular file.
let entry = subdir_reader
.next()
.expect("next must succeed")
.expect("must be some");
assert_eq!(&b"keep"[..], &entry.name);
match entry.node {
nar::reader::Node::Directory(mut subdir_reader) => {
// first entry is .keep, an empty regular file.
let entry = subdir_reader
.next()
.expect("next must succeed")
.expect("must be some");
// … it contains a single .keep, an empty regular file.
assert_eq!(&b".keep"[..], &entry.name);
match entry.node {
nar::reader::Node::File {
executable,
mut reader,
} => {
assert!(!executable);
assert_eq!(reader.read(&mut [0]).unwrap(), 0);
must_read_file(".keep", entry);
}
_ => panic!("unexpected type for keep/.keep"),
// we must read the None
assert!(
subdir_reader.next().expect("next must succeed").is_none(),
"keep directory contains only .keep"
);
}
_ => panic!("unexpected type for keep/.keep"),
}
_ => panic!("unexpected type for keep/.keep"),
}
};
// reading more entries yields None (and we actually must read until this)
assert!(dir_reader.next().expect("must succeed").is_none());
@ -118,3 +101,178 @@ fn complicated() {
_ => panic!("unexpected type"),
}
}
#[test]
#[should_panic]
fn file_read_abandoned() {
let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar"));
let node = nar::reader::open(&mut f).unwrap();
match node {
nar::reader::Node::Directory(mut dir_reader) => {
// first entry is .keep, an empty regular file.
{
let entry = dir_reader
.next()
.expect("next must succeed")
.expect("must be some");
assert_eq!(&b".keep"[..], &entry.name);
// don't bother to finish reading it.
};
// this should panic (not return an error), because we are meant to abandon the archive reader now.
assert!(dir_reader.next().expect("must succeed").is_none());
}
_ => panic!("unexpected type"),
}
}
#[test]
#[should_panic]
fn dir_read_abandoned() {
let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar"));
let node = nar::reader::open(&mut f).unwrap();
match node {
nar::reader::Node::Directory(mut dir_reader) => {
// first entry is .keep, an empty regular file.
must_read_file(
".keep",
dir_reader
.next()
.expect("next must succeed")
.expect("must be some"),
);
// second entry is aa, a symlink to /nix/store/somewhereelse
must_be_symlink(
"aa",
"/nix/store/somewhereelse",
dir_reader
.next()
.expect("next must be some")
.expect("must be some"),
);
{
// third entry is a directory called "keep"
let entry = dir_reader
.next()
.expect("next must be some")
.expect("must be some");
assert_eq!(&b"keep"[..], &entry.name);
match entry.node {
nar::reader::Node::Directory(_) => {
// don't finish using it, which poisons the archive reader
}
_ => panic!("unexpected type for keep/.keep"),
}
};
// this should panic, because we didn't finish reading the child subdirectory
assert!(dir_reader.next().expect("must succeed").is_none());
}
_ => panic!("unexpected type"),
}
}
#[test]
#[should_panic]
fn dir_read_after_none() {
let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar"));
let node = nar::reader::open(&mut f).unwrap();
match node {
nar::reader::Node::Directory(mut dir_reader) => {
// first entry is .keep, an empty regular file.
must_read_file(
".keep",
dir_reader
.next()
.expect("next must succeed")
.expect("must be some"),
);
// second entry is aa, a symlink to /nix/store/somewhereelse
must_be_symlink(
"aa",
"/nix/store/somewhereelse",
dir_reader
.next()
.expect("next must be some")
.expect("must be some"),
);
{
// third entry is a directory called "keep"
let entry = dir_reader
.next()
.expect("next must be some")
.expect("must be some");
assert_eq!(&b"keep"[..], &entry.name);
match entry.node {
nar::reader::Node::Directory(mut subdir_reader) => {
// first entry is .keep, an empty regular file.
must_read_file(
".keep",
subdir_reader
.next()
.expect("next must succeed")
.expect("must be some"),
);
// we must read the None
assert!(
subdir_reader.next().expect("next must succeed").is_none(),
"keep directory contains only .keep"
);
}
_ => panic!("unexpected type for keep/.keep"),
}
};
// reading more entries yields None (and we actually must read until this)
assert!(dir_reader.next().expect("must succeed").is_none());
// this should panic, because we already got a none so we're meant to stop.
dir_reader.next().unwrap();
unreachable!()
}
_ => panic!("unexpected type"),
}
}
fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
assert_eq!(name.as_bytes(), &entry.name);
match entry.node {
nar::reader::Node::File {
executable,
mut reader,
} => {
assert!(!executable);
assert_eq!(reader.read(&mut [0]).unwrap(), 0);
}
_ => panic!("unexpected type for {}", name),
}
}
fn must_be_symlink(
name: &'static str,
exp_target: &'static str,
entry: nar::reader::Entry<'_, '_>,
) {
assert_eq!(name.as_bytes(), &entry.name);
match entry.node {
nar::reader::Node::Symlink { target } => {
assert_eq!(exp_target.as_bytes(), &target);
}
_ => panic!("unexpected type for {}", name),
}
}