feat(users/edef/weave): ingest roots in Parquet format
Parsing of store-paths.xz is now handled by //users/edef/fetchroots. Change-Id: I78be5aada0c0a321ed79d80c9b615e5f997ac3e0 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12670 Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
parent
313899c291
commit
06d2536eec
1 changed files with 15 additions and 13 deletions
|
@ -1,4 +1,4 @@
|
|||
//! Weave resolves a list of roots from `nixpkgs.roots` against `narinfo.parquet`,
|
||||
//! Weave resolves a list of roots from `releases.parquet` against `narinfo.parquet`,
|
||||
//! and then uses the reference graph from the accompanying `narinfo-references.parquet`
|
||||
//! produced by `swizzle` to collect the closure of the roots.
|
||||
//!
|
||||
|
@ -7,11 +7,10 @@
|
|||
|
||||
use anyhow::Result;
|
||||
use hashbrown::{hash_table, HashTable};
|
||||
use nix_compat::nixbase32;
|
||||
use rayon::prelude::*;
|
||||
use std::{
|
||||
collections::{BTreeMap, HashSet},
|
||||
fs::{self, File},
|
||||
fs::File,
|
||||
ops::Index,
|
||||
sync::atomic::{AtomicU32, Ordering},
|
||||
};
|
||||
|
@ -19,22 +18,24 @@ use std::{
|
|||
use polars::{
|
||||
datatypes::StaticArray,
|
||||
export::arrow::{array::UInt32Array, offset::OffsetsBuffer},
|
||||
lazy::dsl::col,
|
||||
prelude::*,
|
||||
};
|
||||
|
||||
use weave::{hash64, DONE, INDEX_NULL};
|
||||
use weave::{as_fixed_binary, hash64, DONE, INDEX_NULL};
|
||||
|
||||
fn main() -> Result<()> {
|
||||
eprint!("… parse roots\r");
|
||||
let roots: PathSet32 = {
|
||||
let mut roots = Vec::new();
|
||||
fs::read("nixpkgs.roots")?
|
||||
.par_chunks_exact(32 + 1)
|
||||
.map(|e| nixbase32::decode_fixed::<20>(&e[0..32]).unwrap())
|
||||
.collect_into_vec(&mut roots);
|
||||
|
||||
roots.iter().collect()
|
||||
};
|
||||
let roots: PathSet32 = as_fixed_binary::<20>(
|
||||
LazyFrame::scan_parquet("releases.parquet", ScanArgsParquet::default())?
|
||||
.explode([col("store_path_hash")])
|
||||
.select([col("store_path_hash")])
|
||||
.collect()?
|
||||
.column("store_path_hash")?
|
||||
.binary()?,
|
||||
)
|
||||
.flatten()
|
||||
.collect();
|
||||
eprintln!("{DONE}");
|
||||
|
||||
{
|
||||
|
@ -182,6 +183,7 @@ impl<'a> FromIterator<&'a [u8; 20]> for PathSet32 {
|
|||
this.insert(item);
|
||||
}
|
||||
|
||||
this.table.shrink_to_fit(|(x, _)| hash64(x));
|
||||
this
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue