diff --git a/tvix/tools/weave/src/lib.rs b/tvix/tools/weave/src/lib.rs index 12a86f9fb..bc2221bf5 100644 --- a/tvix/tools/weave/src/lib.rs +++ b/tvix/tools/weave/src/lib.rs @@ -1,6 +1,6 @@ use anyhow::Result; use rayon::prelude::*; -use std::{fs::File, slice}; +use std::{fs::File, ops::Range, slice}; use polars::{ datatypes::BinaryChunked, @@ -49,8 +49,8 @@ pub fn as_fixed_binary( chunked: &BinaryChunked, ) -> impl Iterator + DoubleEndedIterator { chunked.downcast_iter().map(|array| { - assert_fixed_dense::(array); - exact_chunks(array.values()).unwrap() + let range = assert_fixed_dense::(array); + exact_chunks(&array.values()[range]).unwrap() }) } @@ -61,20 +61,22 @@ fn into_fixed_binary_rechunk(chunked: &BinaryChunked) -> FixedBy let mut iter = chunked.downcast_iter(); let array = iter.next().unwrap(); - assert_fixed_dense::(array); - Bytes(array.values().clone()).map(|buf| exact_chunks(buf).unwrap()) + let range = assert_fixed_dense::(array); + Bytes(array.values().clone().sliced(range.start, range.len())) + .map(|buf| exact_chunks(buf).unwrap()) } /// Ensures that the supplied Arrow array consists of densely packed bytestrings of length `N`. /// In other words, ensure that it is free of nulls, and that the offsets have a fixed stride of `N`. -fn assert_fixed_dense(array: &BinaryArray) { +#[must_use = "only the range returned is guaranteed to be conformant"] +fn assert_fixed_dense(array: &BinaryArray) -> Range { let null_count = array.validity().map_or(0, |bits| bits.unset_bits()); if null_count > 0 { panic!("null values present"); } - let length_check = array - .offsets() + let offsets = array.offsets(); + let length_check = offsets .as_slice() .par_windows(2) .all(|w| (w[1] - w[0]) == N as i64); @@ -82,6 +84,8 @@ fn assert_fixed_dense(array: &BinaryArray) { if !length_check { panic!("lengths are inconsistent"); } + + (*offsets.first() as usize)..(*offsets.last() as usize) } fn exact_chunks(buf: &[u8]) -> Option<&[[u8; K]]> {