From 9b7629826f45af70ed2668353ff4d28446cd1417 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 12 Oct 2023 22:26:53 +0200 Subject: [PATCH] refactor(tvix/castore): factor out node checks Implement `validate()` on `node::Node`, and call it from PathInfo's validate() too. Node-related errors are moved to a ValidateNodeError error type. This additionally adds some more validations for symlink targets (they must not be empty, and not contain null bytes). Change-Id: Ib9b89f1c9c795e868a1533281239bc8a36d97c5d Reviewed-on: https://cl.tvl.fyi/c/depot/+/9715 Autosubmit: flokli Tested-by: BuildkiteCI Reviewed-by: edef --- tvix/castore/src/proto/mod.rs | 92 ++++++++++++++++------- tvix/castore/src/proto/tests/directory.rs | 16 ++-- tvix/store/src/proto/mod.rs | 38 ++-------- tvix/store/src/proto/tests/pathinfo.rs | 31 +++++++- 4 files changed, 109 insertions(+), 68 deletions(-) diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index 66ed5b0f1..ae00d4f15 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -2,7 +2,6 @@ // https://github.com/hyperium/tonic/issues/1056 use bstr::ByteSlice; use std::{collections::HashSet, iter::Peekable}; -use thiserror::Error; use prost::Message; @@ -26,7 +25,7 @@ pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix mod tests; /// Errors that can occur during the validation of Directory messages. -#[derive(Debug, PartialEq, Eq, Error)] +#[derive(Debug, PartialEq, Eq, thiserror::Error)] pub enum ValidateDirectoryError { /// Elements are not in sorted order #[error("{:?} is not sorted", .0.as_bstr())] @@ -34,28 +33,37 @@ pub enum ValidateDirectoryError { /// Multiple elements with the same name encountered #[error("{:?} is a duplicate name", .0.as_bstr())] DuplicateName(Vec), - /// Invalid name encountered - #[error("Invalid name in {:?}", .0.as_bstr())] - InvalidName(Vec), - /// Invalid digest length encountered - #[error("Invalid Digest length: {0}")] - InvalidDigestLen(usize), + /// Invalid node + #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())] + InvalidNode(Vec, ValidateNodeError), #[error("Total size exceeds u32::MAX")] SizeOverflow, } -/// Checks a Node name for validity as an intermediate node, and returns an -/// error that's generated from the supplied constructor. -/// +/// Errors that occur during Node validation +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum ValidateNodeError { + /// Invalid digest length encountered + #[error("Invalid Digest length: {0}")] + InvalidDigestLen(usize), + /// Invalid name encountered + #[error("Invalid name")] + InvalidName(), + /// Invalid symlink target + #[error("Invalid symlink target: {}", .0.as_bstr())] + InvalidSymlinkTarget(Vec), +} + +/// Checks a Node name for validity as an intermediate node. /// We disallow slashes, null bytes, '.', '..' and the empty string. -fn validate_node_name(name: &[u8], err: fn(Vec) -> E) -> Result<(), E> { +fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> { if name.is_empty() || name == b".." || name == b"." || name.contains(&0x00) || name.contains(&b'/') { - return Err(err(name.to_vec())); + Err(ValidateNodeError::InvalidName())?; } Ok(()) } @@ -103,6 +111,39 @@ impl node::Node { node::Node::Symlink(n) => node::Node::Symlink(SymlinkNode { name, ..n }), } } + pub fn validate(&self) -> Result<(), ValidateNodeError> { + match self { + // for a directory root node, ensure the digest has the appropriate size. + node::Node::Directory(directory_node) => { + if TryInto::::try_into(directory_node.digest.clone()).is_err() { + Err(ValidateNodeError::InvalidDigestLen( + directory_node.digest.len(), + ))?; + }; + validate_node_name(&directory_node.name)?; + } + // for a file root node, ensure the digest has the appropriate size. + node::Node::File(file_node) => { + if TryInto::::try_into(file_node.digest.clone()).is_err() { + Err(ValidateNodeError::InvalidDigestLen(file_node.digest.len()))?; + }; + validate_node_name(&file_node.name)?; + } + // ensure the symlink target is not empty and doesn't contain null bytes. + node::Node::Symlink(symlink_node) => { + if symlink_node.target.len() == 0 { + Err(ValidateNodeError::InvalidSymlinkTarget(vec![]))?; + } + if symlink_node.target.contains(&b'\0') { + Err(ValidateNodeError::InvalidSymlinkTarget( + symlink_node.target.to_vec(), + ))?; + } + validate_node_name(&symlink_node.name)?; + } + } + Ok(()) + } } /// Accepts a name, and a mutable reference to the previous name. @@ -183,13 +224,11 @@ impl Directory { // check directories for directory_node in &self.directories { - validate_node_name(&directory_node.name, ValidateDirectoryError::InvalidName)?; - // ensure the digest has the appropriate size. - if TryInto::::try_into(directory_node.digest.clone()).is_err() { - return Err(ValidateDirectoryError::InvalidDigestLen( - directory_node.digest.len(), - )); - } + node::Node::Directory(directory_node.clone()) + .validate() + .map_err(|e| { + ValidateDirectoryError::InvalidNode(directory_node.name.to_vec(), e) + })?; update_if_lt_prev(&mut last_directory_name, &directory_node.name)?; insert_once(&mut seen_names, &directory_node.name)?; @@ -197,12 +236,9 @@ impl Directory { // check files for file_node in &self.files { - validate_node_name(&file_node.name, ValidateDirectoryError::InvalidName)?; - if TryInto::::try_into(file_node.digest.clone()).is_err() { - return Err(ValidateDirectoryError::InvalidDigestLen( - file_node.digest.len(), - )); - } + node::Node::File(file_node.clone()) + .validate() + .map_err(|e| ValidateDirectoryError::InvalidNode(file_node.name.to_vec(), e))?; update_if_lt_prev(&mut last_file_name, &file_node.name)?; insert_once(&mut seen_names, &file_node.name)?; @@ -210,7 +246,9 @@ impl Directory { // check symlinks for symlink_node in &self.symlinks { - validate_node_name(&symlink_node.name, ValidateDirectoryError::InvalidName)?; + node::Node::Symlink(symlink_node.clone()) + .validate() + .map_err(|e| ValidateDirectoryError::InvalidNode(symlink_node.name.to_vec(), e))?; update_if_lt_prev(&mut last_symlink_name, &symlink_node.name)?; insert_once(&mut seen_names, &symlink_node.name)?; diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs index d46e8cb6c..69d9b5b4e 100644 --- a/tvix/castore/src/proto/tests/directory.rs +++ b/tvix/castore/src/proto/tests/directory.rs @@ -1,4 +1,6 @@ -use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError}; +use crate::proto::{ + Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError, ValidateNodeError, +}; use lazy_static::lazy_static; lazy_static! { @@ -171,7 +173,7 @@ fn validate_invalid_names() { ..Default::default() }; match d.validate().expect_err("must fail") { - ValidateDirectoryError::InvalidName(n) => { + ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => { assert_eq!(n, b"") } _ => panic!("unexpected error"), @@ -188,7 +190,7 @@ fn validate_invalid_names() { ..Default::default() }; match d.validate().expect_err("must fail") { - ValidateDirectoryError::InvalidName(n) => { + ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => { assert_eq!(n, b".") } _ => panic!("unexpected error"), @@ -206,7 +208,7 @@ fn validate_invalid_names() { ..Default::default() }; match d.validate().expect_err("must fail") { - ValidateDirectoryError::InvalidName(n) => { + ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => { assert_eq!(n, b"..") } _ => panic!("unexpected error"), @@ -222,7 +224,7 @@ fn validate_invalid_names() { ..Default::default() }; match d.validate().expect_err("must fail") { - ValidateDirectoryError::InvalidName(n) => { + ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => { assert_eq!(n, b"\x00") } _ => panic!("unexpected error"), @@ -238,7 +240,7 @@ fn validate_invalid_names() { ..Default::default() }; match d.validate().expect_err("must fail") { - ValidateDirectoryError::InvalidName(n) => { + ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => { assert_eq!(n, b"foo/bar") } _ => panic!("unexpected error"), @@ -257,7 +259,7 @@ fn validate_invalid_digest() { ..Default::default() }; match d.validate().expect_err("must fail") { - ValidateDirectoryError::InvalidDigestLen(n) => { + ValidateDirectoryError::InvalidNode(_, ValidateNodeError::InvalidDigestLen(n)) => { assert_eq!(n, 2) } _ => panic!("unexpected error"), diff --git a/tvix/store/src/proto/mod.rs b/tvix/store/src/proto/mod.rs index 033fb79db..7921adc40 100644 --- a/tvix/store/src/proto/mod.rs +++ b/tvix/store/src/proto/mod.rs @@ -3,10 +3,7 @@ use data_encoding::BASE64; // https://github.com/hyperium/tonic/issues/1056 use nix_compat::store_path::{self, StorePath}; use thiserror::Error; -use tvix_castore::{ - proto::{self as castorepb, NamedNode}, - B3Digest, B3_LEN, -}; +use tvix_castore::proto::{self as castorepb, NamedNode, ValidateNodeError}; mod grpc_pathinfoservice_wrapper; @@ -34,14 +31,14 @@ pub enum ValidatePathInfoError { #[error("No node present")] NoNodePresent(), - /// Invalid node name encountered. + /// Node fails validation + #[error("Invalid root node: {:?}", .0.to_string())] + InvalidRootNode(ValidateNodeError), + + /// Invalid node name encountered. Root nodes in PathInfos have more strict name requirements #[error("Failed to parse {0:?} as StorePath: {1}")] InvalidNodeName(Vec, store_path::Error), - /// The digest the (root) node refers to has invalid length. - #[error("Invalid Digest length: expected {}, got {}", B3_LEN, .0)] - InvalidNodeDigestLen(usize), - /// The digest in narinfo.nar_sha256 has an invalid len. #[error("Invalid narinfo.nar_sha256 length: expected {}, got {}", 32, .0)] InvalidNarSha256DigestLen(usize), @@ -146,27 +143,8 @@ impl PathInfo { Err(ValidatePathInfoError::NoNodePresent())? } Some(castorepb::Node { node: Some(node) }) => { - match node { - // for a directory root node, ensure the digest has the appropriate size. - castorepb::node::Node::Directory(directory_node) => { - if TryInto::::try_into(directory_node.digest.clone()).is_err() { - return Err(ValidatePathInfoError::InvalidNodeDigestLen( - directory_node.digest.len(), - )); - } - } - // for a file root node, ensure the digest has the appropriate size. - castorepb::node::Node::File(file_node) => { - // ensure the digest has the appropriate size. - if TryInto::::try_into(file_node.digest.clone()).is_err() { - return Err(ValidatePathInfoError::InvalidNodeDigestLen( - file_node.digest.len(), - )); - } - } - // nothing to do specifically for symlinks - castorepb::node::Node::Symlink(_) => {} - } + node.validate() + .map_err(|e| ValidatePathInfoError::InvalidRootNode(e))?; // parse the name of the node itself and return parse_node_name_root(node.get_name(), ValidatePathInfoError::InvalidNodeName)? } diff --git a/tvix/store/src/proto/tests/pathinfo.rs b/tvix/store/src/proto/tests/pathinfo.rs index cfecbac3b..5e1ae9c45 100644 --- a/tvix/store/src/proto/tests/pathinfo.rs +++ b/tvix/store/src/proto/tests/pathinfo.rs @@ -43,7 +43,7 @@ fn validate_no_node( digest: Bytes::new(), size: 0, }, - Err(ValidatePathInfoError::InvalidNodeDigestLen(0)); + Err(ValidatePathInfoError::InvalidRootNode(castorepb::ValidateNodeError::InvalidDigestLen(0))); "invalid digest length" )] #[test_case( @@ -88,7 +88,7 @@ fn validate_directory( digest: Bytes::new(), ..Default::default() }, - Err(ValidatePathInfoError::InvalidNodeDigestLen(0)); + Err(ValidatePathInfoError::InvalidRootNode(castorepb::ValidateNodeError::InvalidDigestLen(0))); "invalid digest length" )] #[test_case( @@ -120,7 +120,7 @@ fn validate_file( #[test_case( castorepb::SymlinkNode { name: DUMMY_NAME.into(), - ..Default::default() + target: "foo".into(), }, Ok(StorePath::from_str(DUMMY_NAME).expect("must succeed")); "ok" @@ -128,7 +128,7 @@ fn validate_file( #[test_case( castorepb::SymlinkNode { name: "invalid".into(), - ..Default::default() + target: "foo".into(), }, Err(ValidatePathInfoError::InvalidNodeName( "invalid".into(), @@ -239,3 +239,26 @@ fn validate_inconsistent_narinfo_reference_name_digest() { e => panic!("unexpected error: {:?}", e), } } + +/// Create a node with an empty symlink target, and ensure it fails validation. +#[test] +fn validate_symlink_empty_target_invalid() { + let node = castorepb::node::Node::Symlink(castorepb::SymlinkNode { + name: "foo".into(), + target: "".into(), + }); + + node.validate().expect_err("must fail validation"); +} + +/// Create a node with a symlink target including null bytes, and ensure it +/// fails validation. +#[test] +fn validate_symlink_target_null_byte_invalid() { + let node = castorepb::node::Node::Symlink(castorepb::SymlinkNode { + name: "foo".into(), + target: "foo\0".into(), + }); + + node.validate().expect_err("must fail validation"); +}