feat(users/edef/weave): ingest roots in Parquet format
Parsing of store-paths.xz is now handled by //users/edef/fetchroots. Change-Id: I78be5aada0c0a321ed79d80c9b615e5f997ac3e0 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12670 Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
parent
313899c291
commit
06d2536eec
1 changed files with 15 additions and 13 deletions
|
@ -1,4 +1,4 @@
|
||||||
//! Weave resolves a list of roots from `nixpkgs.roots` against `narinfo.parquet`,
|
//! Weave resolves a list of roots from `releases.parquet` against `narinfo.parquet`,
|
||||||
//! and then uses the reference graph from the accompanying `narinfo-references.parquet`
|
//! and then uses the reference graph from the accompanying `narinfo-references.parquet`
|
||||||
//! produced by `swizzle` to collect the closure of the roots.
|
//! produced by `swizzle` to collect the closure of the roots.
|
||||||
//!
|
//!
|
||||||
|
@ -7,11 +7,10 @@
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use hashbrown::{hash_table, HashTable};
|
use hashbrown::{hash_table, HashTable};
|
||||||
use nix_compat::nixbase32;
|
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::{
|
use std::{
|
||||||
collections::{BTreeMap, HashSet},
|
collections::{BTreeMap, HashSet},
|
||||||
fs::{self, File},
|
fs::File,
|
||||||
ops::Index,
|
ops::Index,
|
||||||
sync::atomic::{AtomicU32, Ordering},
|
sync::atomic::{AtomicU32, Ordering},
|
||||||
};
|
};
|
||||||
|
@ -19,22 +18,24 @@ use std::{
|
||||||
use polars::{
|
use polars::{
|
||||||
datatypes::StaticArray,
|
datatypes::StaticArray,
|
||||||
export::arrow::{array::UInt32Array, offset::OffsetsBuffer},
|
export::arrow::{array::UInt32Array, offset::OffsetsBuffer},
|
||||||
|
lazy::dsl::col,
|
||||||
prelude::*,
|
prelude::*,
|
||||||
};
|
};
|
||||||
|
|
||||||
use weave::{hash64, DONE, INDEX_NULL};
|
use weave::{as_fixed_binary, hash64, DONE, INDEX_NULL};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
eprint!("… parse roots\r");
|
eprint!("… parse roots\r");
|
||||||
let roots: PathSet32 = {
|
let roots: PathSet32 = as_fixed_binary::<20>(
|
||||||
let mut roots = Vec::new();
|
LazyFrame::scan_parquet("releases.parquet", ScanArgsParquet::default())?
|
||||||
fs::read("nixpkgs.roots")?
|
.explode([col("store_path_hash")])
|
||||||
.par_chunks_exact(32 + 1)
|
.select([col("store_path_hash")])
|
||||||
.map(|e| nixbase32::decode_fixed::<20>(&e[0..32]).unwrap())
|
.collect()?
|
||||||
.collect_into_vec(&mut roots);
|
.column("store_path_hash")?
|
||||||
|
.binary()?,
|
||||||
roots.iter().collect()
|
)
|
||||||
};
|
.flatten()
|
||||||
|
.collect();
|
||||||
eprintln!("{DONE}");
|
eprintln!("{DONE}");
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -182,6 +183,7 @@ impl<'a> FromIterator<&'a [u8; 20]> for PathSet32 {
|
||||||
this.insert(item);
|
this.insert(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.table.shrink_to_fit(|(x, _)| hash64(x));
|
||||||
this
|
this
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue