feat(tvix/castore): bump [Directory,File]Node size to u64

Having more than 4GiB files is quite possible (think about the NixOS
graphical installer, and an uncompressed iso of it).

No wire format changes.

Change-Id: Ia78a07e4c554e91b93c5b9f8533266e4bd7f22b6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9950
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2023-11-05 10:53:42 +02:00 committed by flokli
parent 47e34b2c36
commit 2546446d51
16 changed files with 55 additions and 57 deletions

View file

@ -12,9 +12,9 @@ import (
// The size of a directory is calculated by summing up the numbers of
// `directories`, `files` and `symlinks`, and for each directory, its size
// field.
func (d *Directory) Size() uint32 {
var size uint32
size = uint32(len(d.Files) + len(d.Symlinks))
func (d *Directory) Size() uint64 {
var size uint64
size = uint64(len(d.Files) + len(d.Symlinks))
for _, d := range d.Directories {
size += 1 + d.Size
}

View file

@ -116,7 +116,7 @@ type DirectoryNode struct {
// A credulous implementation won't reject an excessive size, but this is
// harmless: you'll have some ordinals without nodes. Undersizing is
// obvious and easy to reject: you won't have an ordinal for some nodes.
Size uint32 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"`
Size uint64 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"`
}
func (x *DirectoryNode) Reset() {
@ -165,7 +165,7 @@ func (x *DirectoryNode) GetDigest() []byte {
return nil
}
func (x *DirectoryNode) GetSize() uint32 {
func (x *DirectoryNode) GetSize() uint64 {
if x != nil {
return x.Size
}
@ -183,7 +183,7 @@ type FileNode struct {
// The blake3 digest of the file contents
Digest []byte `protobuf:"bytes,2,opt,name=digest,proto3" json:"digest,omitempty"`
// The file content size
Size uint32 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"`
Size uint64 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"`
// Whether the file is executable
Executable bool `protobuf:"varint,4,opt,name=executable,proto3" json:"executable,omitempty"`
}
@ -234,7 +234,7 @@ func (x *FileNode) GetDigest() []byte {
return nil
}
func (x *FileNode) GetSize() uint32 {
func (x *FileNode) GetSize() uint64 {
if x != nil {
return x.Size
}
@ -424,12 +424,12 @@ var file_tvix_castore_protos_castore_proto_rawDesc = []byte{
0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04,
0x6e, 0x61, 0x6d, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x02,
0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04,
0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65,
0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65,
0x22, 0x6a, 0x0a, 0x08, 0x46, 0x69, 0x6c, 0x65, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04,
0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65,
0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c,
0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65,
0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x1e, 0x0a, 0x0a,
0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x1e, 0x0a, 0x0a,
0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08,
0x52, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x39, 0x0a, 0x0b,
0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e,
@ -447,10 +447,10 @@ var file_tvix_castore_protos_castore_proto_rawDesc = []byte{
0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72,
0x65, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x4e, 0x6f, 0x64, 0x65,
0x48, 0x00, 0x52, 0x07, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x42, 0x06, 0x0a, 0x04, 0x6e,
0x6f, 0x64, 0x65, 0x42, 0x2c, 0x5a, 0x2a, 0x63, 0x6f, 0x64, 0x65, 0x2e, 0x74, 0x76, 0x6c, 0x2e,
0x6f, 0x64, 0x65, 0x42, 0x28, 0x5a, 0x26, 0x63, 0x6f, 0x64, 0x65, 0x2e, 0x74, 0x76, 0x6c, 0x2e,
0x66, 0x79, 0x69, 0x2f, 0x74, 0x76, 0x69, 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65,
0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x3b, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76,
0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
0x2d, 0x67, 0x6f, 0x3b, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0x62, 0x06, 0x70,
0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (

View file

@ -23,7 +23,7 @@ func TestDirectorySize(t *testing.T) {
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.Equal(t, uint32(0), d.Size())
assert.Equal(t, uint64(0), d.Size())
})
t.Run("containing single empty directory", func(t *testing.T) {
@ -37,7 +37,7 @@ func TestDirectorySize(t *testing.T) {
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.Equal(t, uint32(1), d.Size())
assert.Equal(t, uint64(1), d.Size())
})
t.Run("containing single non-empty directory", func(t *testing.T) {
@ -51,7 +51,7 @@ func TestDirectorySize(t *testing.T) {
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.Equal(t, uint32(5), d.Size())
assert.Equal(t, uint64(5), d.Size())
})
t.Run("containing single file", func(t *testing.T) {
@ -66,7 +66,7 @@ func TestDirectorySize(t *testing.T) {
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.Equal(t, uint32(1), d.Size())
assert.Equal(t, uint64(1), d.Size())
})
t.Run("containing single symlink", func(t *testing.T) {
@ -79,7 +79,7 @@ func TestDirectorySize(t *testing.T) {
}},
}
assert.Equal(t, uint32(1), d.Size())
assert.Equal(t, uint64(1), d.Size())
})
}

View file

@ -285,11 +285,10 @@ var file_tvix_castore_protos_rpc_blobstore_proto_rawDesc = []byte{
0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x42,
0x6c, 0x6f, 0x62, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x1a, 0x20, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e,
0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x75, 0x74, 0x42, 0x6c,
0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x28, 0x01, 0x42, 0x2c, 0x5a, 0x2a,
0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x28, 0x01, 0x42, 0x28, 0x5a, 0x26,
0x63, 0x6f, 0x64, 0x65, 0x2e, 0x74, 0x76, 0x6c, 0x2e, 0x66, 0x79, 0x69, 0x2f, 0x74, 0x76, 0x69,
0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73,
0x3b, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74,
0x6f, 0x33,
0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2d, 0x67, 0x6f, 0x3b, 0x63, 0x61, 0x73,
0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (

View file

@ -180,11 +180,10 @@ var file_tvix_castore_protos_rpc_directory_proto_rawDesc = []byte{
0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74,
0x6f, 0x72, 0x79, 0x1a, 0x25, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f,
0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x75, 0x74, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f,
0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x28, 0x01, 0x42, 0x2c, 0x5a, 0x2a,
0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x28, 0x01, 0x42, 0x28, 0x5a, 0x26,
0x63, 0x6f, 0x64, 0x65, 0x2e, 0x74, 0x76, 0x6c, 0x2e, 0x66, 0x79, 0x69, 0x2f, 0x74, 0x76, 0x69,
0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73,
0x3b, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74,
0x6f, 0x33,
0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2d, 0x67, 0x6f, 0x3b, 0x63, 0x61, 0x73,
0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (

View file

@ -38,7 +38,7 @@ message DirectoryNode {
// A credulous implementation won't reject an excessive size, but this is
// harmless: you'll have some ordinals without nodes. Undersizing is
// obvious and easy to reject: you won't have an ordinal for some nodes.
uint32 size = 3;
uint64 size = 3;
}
// A FileNode represents a regular or executable file in a Directory.
@ -48,7 +48,7 @@ message FileNode {
// The blake3 digest of the file contents
bytes digest = 2;
// The file content size
uint32 size = 3;
uint64 size = 3;
// Whether the file is executable
bool executable = 4;
}

View file

@ -122,7 +122,7 @@ async fn process_entry<'a>(
return Ok(Node::File(FileNode {
name: entry.file_name().as_bytes().to_vec().into(),
digest: digest.into(),
size: metadata.len() as u32,
size: metadata.len(),
// If it's executable by the user, it'll become executable.
// This matches nix's dump() function behaviour.
executable: metadata.permissions().mode() & 64 != 0,

View file

@ -96,7 +96,7 @@ impl proto::directory_service_server::DirectoryService for GRPCDirectoryServiceW
// We don't need to keep the contents around, they're stored in the DB.
// https://github.com/rust-lang/rust-clippy/issues/5812
#[allow(clippy::mutable_key_type)]
let mut seen_directories_sizes: HashMap<B3Digest, u32> = HashMap::new();
let mut seen_directories_sizes: HashMap<B3Digest, u64> = HashMap::new();
let mut last_directory_dgst: Option<B3Digest> = None;
// Consume directories, and insert them into the store.

View file

@ -172,23 +172,23 @@ fn insert_once<'n>(
Ok(())
}
fn checked_sum(iter: impl IntoIterator<Item = u32>) -> Option<u32> {
iter.into_iter().try_fold(0u32, |acc, i| acc.checked_add(i))
fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i))
}
impl Directory {
/// The size of a directory is the number of all regular and symlink elements,
/// the number of directory elements, and their size fields.
pub fn size(&self) -> u32 {
pub fn size(&self) -> u64 {
if cfg!(debug_assertions) {
self.size_checked()
.expect("Directory::size exceeds u32::MAX")
.expect("Directory::size exceeds u64::MAX")
} else {
self.size_checked().unwrap_or(u32::MAX)
self.size_checked().unwrap_or(u64::MAX)
}
}
fn size_checked(&self) -> Option<u32> {
fn size_checked(&self) -> Option<u64> {
checked_sum([
self.files.len().try_into().ok()?,
self.symlinks.len().try_into().ok()?,

View file

@ -60,13 +60,13 @@ fn size() {
#[test]
#[cfg_attr(not(debug_assertions), ignore)]
#[should_panic = "Directory::size exceeds u32::MAX"]
#[should_panic = "Directory::size exceeds u64::MAX"]
fn size_unchecked_panic() {
let d = Directory {
directories: vec![DirectoryNode {
name: "foo".into(),
digest: DUMMY_DIGEST.to_vec().into(),
size: u32::MAX,
size: u64::MAX,
}],
..Default::default()
};
@ -81,12 +81,12 @@ fn size_unchecked_saturate() {
directories: vec![DirectoryNode {
name: "foo".into(),
digest: DUMMY_DIGEST.to_vec().into(),
size: u32::MAX,
size: u64::MAX,
}],
..Default::default()
};
assert_eq!(d.size(), u32::MAX);
assert_eq!(d.size(), u64::MAX);
}
#[test]
@ -98,18 +98,18 @@ fn size_checked() {
directories: vec![DirectoryNode {
name: "foo".into(),
digest: DUMMY_DIGEST.to_vec().into(),
size: u32::MAX - 1,
size: u64::MAX - 1,
}],
..Default::default()
};
assert_eq!(d.size_checked(), Some(u32::MAX));
assert_eq!(d.size_checked(), Some(u64::MAX));
}
{
let d = Directory {
directories: vec![DirectoryNode {
name: "foo".into(),
digest: DUMMY_DIGEST.to_vec().into(),
size: u32::MAX,
size: u64::MAX,
}],
..Default::default()
};
@ -121,12 +121,12 @@ fn size_checked() {
DirectoryNode {
name: "foo".into(),
digest: DUMMY_DIGEST.to_vec().into(),
size: u32::MAX / 2,
size: u64::MAX / 2,
},
DirectoryNode {
name: "foo".into(),
digest: DUMMY_DIGEST.to_vec().into(),
size: u32::MAX / 2,
size: u64::MAX / 2,
},
],
..Default::default()
@ -363,7 +363,7 @@ fn validate_overflow() {
directories: vec![DirectoryNode {
name: "foo".into(),
digest: DUMMY_DIGEST.to_vec().into(),
size: u32::MAX,
size: u64::MAX,
}],
..Default::default()
};

View file

@ -56,7 +56,7 @@ async fn single_file() {
proto::node::Node::File(proto::FileNode {
name: "root".into(),
digest: HELLOWORLD_BLOB_DIGEST.clone().into(),
size: HELLOWORLD_BLOB_CONTENTS.len() as u32,
size: HELLOWORLD_BLOB_CONTENTS.len() as u64,
executable: false,
}),
root_node,

View file

@ -217,7 +217,7 @@ mod tests {
let mut inode_tracker = InodeTracker::default();
let f = InodeData::Regular(
fixtures::BLOB_A_DIGEST.clone(),
fixtures::BLOB_A.len() as u32,
fixtures::BLOB_A.len() as u64,
false,
);
@ -241,7 +241,7 @@ mod tests {
ino,
inode_tracker.put(InodeData::Regular(
fixtures::BLOB_B_DIGEST.clone(),
fixtures::BLOB_B.len() as u32,
fixtures::BLOB_B.len() as u64,
false,
))
);

View file

@ -5,7 +5,7 @@ use tvix_castore::B3Digest;
#[derive(Clone, Debug)]
pub enum InodeData {
Regular(B3Digest, u32, bool), // digest, size, executable
Regular(B3Digest, u64, bool), // digest, size, executable
Symlink(bytes::Bytes), // target
Directory(DirectoryInodeData), // either [DirectoryInodeData:Sparse] or [DirectoryInodeData:Populated]
}
@ -16,7 +16,7 @@ pub enum InodeData {
/// lookup and did fetch the data.
#[derive(Clone, Debug)]
pub enum DirectoryInodeData {
Sparse(B3Digest, u32), // digest, size
Sparse(B3Digest, u64), // digest, size
Populated(B3Digest, Vec<(u64, castorepb::node::Node)>), // [(child_inode, node)]
}

View file

@ -71,7 +71,7 @@ async fn populate_blob_a(
node: Some(castorepb::node::Node::File(castorepb::FileNode {
name: BLOB_A_NAME.into(),
digest: fixtures::BLOB_A_DIGEST.clone().into(),
size: fixtures::BLOB_A.len() as u32,
size: fixtures::BLOB_A.len() as u64,
executable: false,
})),
}),
@ -101,7 +101,7 @@ async fn populate_blob_b(
node: Some(castorepb::node::Node::File(castorepb::FileNode {
name: BLOB_B_NAME.into(),
digest: fixtures::BLOB_B_DIGEST.clone().into(),
size: fixtures::BLOB_B.len() as u32,
size: fixtures::BLOB_B.len() as u64,
executable: false,
})),
}),
@ -135,7 +135,7 @@ async fn populate_helloworld_blob(
node: Some(castorepb::node::Node::File(castorepb::FileNode {
name: HELLOWORLD_BLOB_NAME.into(),
digest: fixtures::HELLOWORLD_BLOB_DIGEST.clone().into(),
size: fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u32,
size: fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64,
executable: true,
})),
}),
@ -262,7 +262,7 @@ async fn populate_blob_a_without_blob(
node: Some(castorepb::node::Node::File(castorepb::FileNode {
name: BLOB_A_NAME.into(),
digest: fixtures::BLOB_A_DIGEST.clone().into(),
size: fixtures::BLOB_A.len() as u32,
size: fixtures::BLOB_A.len() as u64,
executable: false,
})),
}),

View file

@ -136,7 +136,7 @@ async fn walk_node(
nar_node
.file(
proto_file_node.executable,
proto_file_node.size.into(),
proto_file_node.size,
&mut blob_reader.compat(),
)
.await

View file

@ -37,7 +37,7 @@ async fn single_file_missing_blob() {
&castorepb::node::Node::File(FileNode {
name: "doesntmatter".into(),
digest: HELLOWORLD_BLOB_DIGEST.clone().into(),
size: HELLOWORLD_BLOB_CONTENTS.len() as u32,
size: HELLOWORLD_BLOB_CONTENTS.len() as u64,
executable: false,
}),
// the blobservice is empty intentionally, to provoke the error.
@ -150,7 +150,7 @@ async fn single_file() {
&castorepb::node::Node::File(FileNode {
name: "doesntmatter".into(),
digest: HELLOWORLD_BLOB_DIGEST.clone().into(),
size: HELLOWORLD_BLOB_CONTENTS.len() as u32,
size: HELLOWORLD_BLOB_CONTENTS.len() as u64,
executable: false,
}),
blob_service,