feat(zseri): Add store-ref-scanner crate
This crate implements the scanner for finding references to store paths in uncompressed binary blobs and text files. It is currently a minimally working prototype and it is probably a good idea to polish the interface further. Change-Id: I8406f9d52d254fc3d660ea2b9bc9b7841cc815ec Reviewed-on: https://cl.tvl.fyi/c/depot/+/4596 Tested-by: BuildkiteCI Reviewed-by: zseri <zseri.devel@ytrizja.de>
This commit is contained in:
parent
979502335b
commit
8cdda57580
8 changed files with 2018 additions and 0 deletions
2
users/zseri/.gitignore
vendored
Normal file
2
users/zseri/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
.#*
|
||||||
|
target/
|
238
users/zseri/store-ref-scanner/Cargo.lock
generated
Normal file
238
users/zseri/store-ref-scanner/Cargo.lock
generated
Normal file
|
@ -0,0 +1,238 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitflags"
|
||||||
|
version = "1.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "byteorder"
|
||||||
|
version = "1.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "camino"
|
||||||
|
version = "1.0.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "52d74260d9bf6944e2208aa46841b4b8f0d7ffc0849a06837b2f510337f86b2b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "getrandom"
|
||||||
|
version = "0.2.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"wasi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.112"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-traits"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ppv-lite86"
|
||||||
|
version = "0.2.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro-error"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro-error-attr",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro-error-attr"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.34"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2f84e92c0f7c9d58328b85a78557813e4bd845130db68d7184635344399423b1"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-xid",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc_unroll"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ab97d993c96374333148bad5043d3c85a572c1ca81d13b9cf92f23f5ef72f54"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro-error",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proptest"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"byteorder",
|
||||||
|
"lazy_static",
|
||||||
|
"num-traits",
|
||||||
|
"quick-error",
|
||||||
|
"rand",
|
||||||
|
"rand_chacha",
|
||||||
|
"rand_xorshift",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quick-error"
|
||||||
|
version = "2.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand"
|
||||||
|
version = "0.8.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"rand_chacha",
|
||||||
|
"rand_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_chacha"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||||
|
dependencies = [
|
||||||
|
"ppv-lite86",
|
||||||
|
"rand_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_core"
|
||||||
|
version = "0.6.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_xorshift"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
|
||||||
|
dependencies = [
|
||||||
|
"rand_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.6.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "store-ref-scanner"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"camino",
|
||||||
|
"once_cell",
|
||||||
|
"proc_unroll",
|
||||||
|
"proptest",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "1.0.83"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "23a1dfb999630e338648c83e91c59a4e9fb7620f520c3194b6b89e276f2f1959"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-xid",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-xid"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasi"
|
||||||
|
version = "0.10.2+wasi-snapshot-preview1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
|
1336
users/zseri/store-ref-scanner/Cargo.nix
Normal file
1336
users/zseri/store-ref-scanner/Cargo.nix
Normal file
File diff suppressed because it is too large
Load diff
16
users/zseri/store-ref-scanner/Cargo.toml
Normal file
16
users/zseri/store-ref-scanner/Cargo.toml
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
[package]
|
||||||
|
name = "store-ref-scanner"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
camino = "1.0"
|
||||||
|
once_cell = "1.9.0"
|
||||||
|
proc_unroll = "0.1"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
|
||||||
|
[dev-dependencies.proptest]
|
||||||
|
version = "1.0"
|
||||||
|
default-features = false
|
||||||
|
features = ["std"]
|
5
users/zseri/store-ref-scanner/default.nix
Normal file
5
users/zseri/store-ref-scanner/default.nix
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
{ depot, pkgs, ... }:
|
||||||
|
|
||||||
|
(import ./Cargo.nix { inherit pkgs; }).rootCrate.build.override {
|
||||||
|
runTests = true;
|
||||||
|
}
|
150
users/zseri/store-ref-scanner/src/hbm.rs
Normal file
150
users/zseri/store-ref-scanner/src/hbm.rs
Normal file
|
@ -0,0 +1,150 @@
|
||||||
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub struct HalfBytesMask(pub [u8; 16]);
|
||||||
|
|
||||||
|
impl HalfBytesMask {
|
||||||
|
pub const B32_REVSHA256: HalfBytesMask =
|
||||||
|
HalfBytesMask([0, 0, 0, 0, 0, 0, 255, 3, 0, 0, 0, 0, 222, 127, 207, 7]);
|
||||||
|
|
||||||
|
pub const B64_BLAKE2B256: HalfBytesMask = HalfBytesMask([
|
||||||
|
0, 0, 0, 0, 0, 8, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7,
|
||||||
|
]);
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
#[proc_unroll::unroll]
|
||||||
|
pub const fn from_expanded(x: [bool; 128]) -> Self {
|
||||||
|
let mut ret = [0u8; 16];
|
||||||
|
for idx in 0..16 {
|
||||||
|
let mut tmp = 0;
|
||||||
|
let fin = idx * 8;
|
||||||
|
macro_rules! bitx {
|
||||||
|
($($a:expr),+) => {{ $( if x[fin + $a] { tmp += (1 << $a) as u8; } )+ }}
|
||||||
|
}
|
||||||
|
bitx!(0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
|
ret[idx] = tmp;
|
||||||
|
}
|
||||||
|
Self(ret)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// create a mask by allowing all characters via the mask which are included in the given string
|
||||||
|
pub fn from_bytes(s: &[u8]) -> Self {
|
||||||
|
s.iter().fold(Self([0u8; 16]), |mut ret, &i| {
|
||||||
|
ret.set(i, true);
|
||||||
|
ret
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[proc_unroll::unroll]
|
||||||
|
pub const fn into_expanded(self) -> [bool; 128] {
|
||||||
|
let Self(ihbm) = self;
|
||||||
|
let mut ret = [false; 128];
|
||||||
|
for idx in 0..16 {
|
||||||
|
let fin = idx * 8;
|
||||||
|
let curi = ihbm[idx];
|
||||||
|
macro_rules! bitx {
|
||||||
|
($($a:expr),+) => {{ $( ret[fin + $a] = (curi >> $a) & 0b1 != 0; )+ }}
|
||||||
|
}
|
||||||
|
bitx!(0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
|
}
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains(&self, byte: u8) -> bool {
|
||||||
|
(self.0[usize::from(byte / 8)] >> u32::from(byte % 8)) & 0b1 != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set(&mut self, byte: u8, allow: bool) {
|
||||||
|
if byte >= 0x80 {
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
panic!(
|
||||||
|
"tried to manipulate invalid byte {:?} in HalfBytesMask",
|
||||||
|
byte
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut block = &mut self.0[usize::from(byte / 8)];
|
||||||
|
let bitpat = (1 << u32::from(byte % 8)) as u8;
|
||||||
|
if allow {
|
||||||
|
*block |= bitpat;
|
||||||
|
} else {
|
||||||
|
*block &= !bitpat;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
fn count_ones(&self) -> u8 {
|
||||||
|
self.0
|
||||||
|
.iter()
|
||||||
|
.map(|i| i.count_ones())
|
||||||
|
.sum::<u32>()
|
||||||
|
.try_into()
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn maskbase() {
|
||||||
|
assert_eq!(HalfBytesMask::B32_REVSHA256.count_ones(), 32);
|
||||||
|
assert_eq!(HalfBytesMask::B64_BLAKE2B256.count_ones(), 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dflmask() {
|
||||||
|
assert_eq!(
|
||||||
|
HalfBytesMask::from_expanded(
|
||||||
|
[
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
]
|
||||||
|
.map(|i| i != 0)
|
||||||
|
),
|
||||||
|
Default::default(),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
HalfBytesMask::from_expanded(
|
||||||
|
[
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||||
|
]
|
||||||
|
.map(|i| i != 0)
|
||||||
|
),
|
||||||
|
HalfBytesMask::B32_REVSHA256,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
HalfBytesMask::from_expanded(
|
||||||
|
[
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||||
|
]
|
||||||
|
.map(|i| i != 0)
|
||||||
|
),
|
||||||
|
HalfBytesMask::B64_BLAKE2B256,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
proptest::proptest! {
|
||||||
|
#[test]
|
||||||
|
fn hbm_roundtrip(s: [u8; 16]) {
|
||||||
|
let a = HalfBytesMask(s);
|
||||||
|
let b = a.into_expanded();
|
||||||
|
let c = HalfBytesMask::from_expanded(b);
|
||||||
|
assert_eq!(a, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
225
users/zseri/store-ref-scanner/src/lib.rs
Normal file
225
users/zseri/store-ref-scanner/src/lib.rs
Normal file
|
@ -0,0 +1,225 @@
|
||||||
|
// TODO: make this no_std if possible
|
||||||
|
|
||||||
|
use camino::Utf8PathBuf;
|
||||||
|
|
||||||
|
mod hbm;
|
||||||
|
pub use hbm::HalfBytesMask;
|
||||||
|
|
||||||
|
mod spec;
|
||||||
|
pub use spec::*;
|
||||||
|
|
||||||
|
/// limit maximal length of store basename
|
||||||
|
const BASENAME_MAXLEN: usize = 255;
|
||||||
|
|
||||||
|
/// this is a trait which implements the interface of possible inputs
|
||||||
|
/// (usually byte slices)
|
||||||
|
pub trait ScannerInput: AsRef<[u8]> + Sized {
|
||||||
|
/// Splits the input into two at the given index.
|
||||||
|
/// Afterwards self contains elements [at, len), and the returned input part contains elements [0, at).
|
||||||
|
fn split_to(&mut self, at: usize) -> Self;
|
||||||
|
fn finish(&mut self);
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ScannerInput for &[u8] {
|
||||||
|
fn split_to(&mut self, at: usize) -> Self {
|
||||||
|
let (a, b) = self.split_at(at);
|
||||||
|
*self = b;
|
||||||
|
a
|
||||||
|
}
|
||||||
|
|
||||||
|
fn finish(&mut self) {
|
||||||
|
*self = &[];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ScannerInput for &mut [u8] {
|
||||||
|
fn split_to(&mut self, at: usize) -> Self {
|
||||||
|
// Lifetime dance taken from `impl Write for &mut [u8]`.
|
||||||
|
// Taken from crate `std`.
|
||||||
|
let (a, b) = core::mem::replace(self, &mut []).split_at_mut(at);
|
||||||
|
*self = b;
|
||||||
|
a
|
||||||
|
}
|
||||||
|
|
||||||
|
fn finish(&mut self) {
|
||||||
|
*self = &mut [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// this is the primary structure of this crate
|
||||||
|
///
|
||||||
|
/// it represents a scanner which scans binary slices for store references,
|
||||||
|
/// and implements an iterator interfaces which returns these as byte slices.
|
||||||
|
pub struct StoreRefScanner<'x, Input: 'x> {
|
||||||
|
input: Input,
|
||||||
|
spec: &'x StoreSpec,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Taken from crate `yz-string-utils`.
|
||||||
|
fn get_offset_of<T>(whole_buffer: &T, part: &T) -> usize
|
||||||
|
where
|
||||||
|
T: AsRef<[u8]> + ?Sized,
|
||||||
|
{
|
||||||
|
// NOTE: originally I wanted to use offset_from() here once it's stable,
|
||||||
|
// but according to https://github.com/rust-lang/rust/issues/41079#issuecomment-657163887
|
||||||
|
// this would be UB in cases where the code below isn't.
|
||||||
|
part.as_ref().as_ptr() as usize - whole_buffer.as_ref().as_ptr() as usize
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'x, Input> StoreRefScanner<'x, Input>
|
||||||
|
where
|
||||||
|
Input: ScannerInput + 'x,
|
||||||
|
{
|
||||||
|
pub fn new(input: Input, spec: &'x StoreSpec) -> Self {
|
||||||
|
for i in [&spec.valid_hashbytes, &spec.valid_restbytes] {
|
||||||
|
for j in [b'\0', b' ', b'\t', b'\n', b'/', b'\\'] {
|
||||||
|
assert!(!i.contains(j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Self { input, spec }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'x, Input: 'x> Iterator for StoreRefScanner<'x, Input>
|
||||||
|
where
|
||||||
|
Input: ScannerInput + 'x,
|
||||||
|
{
|
||||||
|
type Item = Input;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Input> {
|
||||||
|
let empty_path = camino::Utf8Path::new("");
|
||||||
|
let hbl: usize = self.spec.hashbytes_len.into();
|
||||||
|
while !self.input.as_ref().is_empty() {
|
||||||
|
if self.spec.path_to_store != empty_path {
|
||||||
|
let p2sas = self.spec.path_to_store.as_str();
|
||||||
|
if self.input.as_ref().starts_with(p2sas.as_bytes()) {
|
||||||
|
self.input.split_to(p2sas.len());
|
||||||
|
} else {
|
||||||
|
self.input.split_to(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let hsep = matches!(self.input.as_ref().iter().next(), Some(b'/') | Some(b'\\'));
|
||||||
|
self.input.split_to(1);
|
||||||
|
if hsep && self.spec.check_rest(self.input.as_ref()) {
|
||||||
|
// we have found a valid hash
|
||||||
|
// rest contains the store basename and all following components
|
||||||
|
// now let's search for the end
|
||||||
|
// and then cut off possible following components after the basename
|
||||||
|
let rlen = self
|
||||||
|
.input
|
||||||
|
.as_ref()
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.take(BASENAME_MAXLEN)
|
||||||
|
.skip(hbl)
|
||||||
|
.find(|&(_, &i)| !self.spec.valid_restbytes.contains(i))
|
||||||
|
.map(|(eosp, _)| eosp)
|
||||||
|
.unwrap_or(core::cmp::min(BASENAME_MAXLEN, self.input.as_ref().len()));
|
||||||
|
return Some(self.input.split_to(rlen));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.input.finish();
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn simple_nix2() {
|
||||||
|
let drv: &[u8] = br#"
|
||||||
|
Derive([("out","","r:sha256","")],[("/nix/store/2ax7bvjdfkzim69q957i0jlg0nvmapg0-util-linux-2.37.2.drv",["dev"]),("/nix/store/6b55ssmh8pzqsc4q4kw1yl3kqvr4fvqj-bash-5.1-p12.drv",["out"]),("/nix/store/fp2vx24kczlzv84avds28wyzsmrn8kyv-source.drv",["out"]),("/nix/store/s6c2lm5hpsvdwnxq9y1g3ngncghjzc3k-stdenv-linux.drv",["out"]),("/nix/store/xlnzpf4mzghi8vl0krabrgcbnqk5qjf3-pkg-config-wrapper-0.29.2.drv",["out"])],["/nix/store/03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch","/nix/store/2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs","/0sdk1r4l43yw4g6lmqdhd92vhdfhlwz3m76jxzvzsqsv63czw2km"),("builder","/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/bash"),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck",""),("doInstallCheck",""),("makeFlags","PREFIX=/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9 UDEVLIBDIR=/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9/lib/udev/"),("name","bcache-tools-1.0.7"),("nativeBuildInputs","/1kw0rwgdyq9q69wmmsa5d2kap6p52b0yldbzi4w17bhcq5g5cp2f"),("out","/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9"),("outputHashAlgo","sha256"),("outputHashMode","recursive"),("outputs","out"),("patches","/nix/store/2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch /nix/store/03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch"),("pname","bcache-tools"),("preBuild","sed -e \"s|/bin/sh|/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/sh|\" -i *.rules\n"),("preInstall","mkdir -p \"$out/sbin\" \"$out/lib/udev/rules.d\" \"$out/share/man/man8\"\n"),("prePatch","sed -e \"/INSTALL.*initramfs\\/hook/d\" \\\n -e \"/INSTALL.*initcpio\\/install/d\" \\\n -e \"/INSTALL.*dracut\\/module-setup.sh/d\" \\\n -e \"s/pkg-config/$PKG_CONFIG/\" \\\n -i Makefile\n"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/6izcafvfcbz19chi7hl20834g0fa043n-source"),("stdenv","/01ncyv8bxibj0imgfvmxgqy648n697bachil6aw6i46g1jk0bbds"),("strictDeps",""),("system","x86_64-linux"),("version","1.0.7")])
|
||||||
|
"#;
|
||||||
|
// we convert everything into strings because it is way easier to compare elements in error messages
|
||||||
|
let refs: Vec<&str> = StoreRefScanner::new(drv, &*SPEC_DFL_NIX2)
|
||||||
|
.map(|i| core::str::from_utf8(i).unwrap())
|
||||||
|
.collect();
|
||||||
|
let refs_expect: Vec<&[u8]> = vec![
|
||||||
|
b"2ax7bvjdfkzim69q957i0jlg0nvmapg0-util-linux-2.37.2.drv",
|
||||||
|
b"6b55ssmh8pzqsc4q4kw1yl3kqvr4fvqj-bash-5.1-p12.drv",
|
||||||
|
b"fp2vx24kczlzv84avds28wyzsmrn8kyv-source.drv",
|
||||||
|
b"s6c2lm5hpsvdwnxq9y1g3ngncghjzc3k-stdenv-linux.drv",
|
||||||
|
b"xlnzpf4mzghi8vl0krabrgcbnqk5qjf3-pkg-config-wrapper-0.29.2.drv",
|
||||||
|
b"03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch",
|
||||||
|
b"2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch",
|
||||||
|
b"9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh",
|
||||||
|
b"9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh",
|
||||||
|
b"2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch",
|
||||||
|
b"03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch",
|
||||||
|
b"6izcafvfcbz19chi7hl20834g0fa043n-source",
|
||||||
|
];
|
||||||
|
let refs_expect: Vec<&str> = refs_expect
|
||||||
|
.into_iter()
|
||||||
|
.map(|i| core::str::from_utf8(i).unwrap())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(refs, refs_expect);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn simple_yzix1() {
|
||||||
|
// I haven't yet produced any yzix derivation which included /yzixs absolute paths...
|
||||||
|
let fake: &[u8] = br#"
|
||||||
|
/yzixs/4Zx1PBoft1YyAuKdhjAY1seZFHloxQ+8voHQRkRMuys: ASCII text
|
||||||
|
/yzixs/dNE3yogD4JHKHzNa2t3jQMZddT8wjqlMDB0naDIFo0A: ASCII text
|
||||||
|
/yzixs/FMluSVOHLc4bxX7F4lBCXafNljBnDn+rAM5HzG7k8LI: unified diff output, ASCII text
|
||||||
|
/yzixs/g2G3GRL87hGEdw9cq2BZWqDQP_HeHSPRLbJ9P9KH+HI: unified diff output, ASCII text
|
||||||
|
/yzixs/H08Av1ZAONwFdzVLpFQm0Sc0dvyk0sbnk82waoBig7I: ASCII text
|
||||||
|
/yzixs/IndARQp+gaGDLS3K+PeyXdaRqAcCyS3EIbRXkkYjC94: unified diff output, ASCII text
|
||||||
|
/yzixs/IrLPnbkEolTAuWRxkXpuvVs6Imb1iB6wUJcI+fxWwkU: POSIX shell script, ASCII text executable
|
||||||
|
/yzixs/JsS_H3n3TSh2R6fiIzgOPZdjSmRkV71vGxstJJKPmr4: unified diff output, ASCII text
|
||||||
|
/yzixs/LZ6pQh1x8DRxZ2IYzetBRS4LuE__IXFjpOfQPxHVwpw: unified diff output, ASCII text
|
||||||
|
/yzixs/mEi2RPep9daRs0JUvwt1JsDfgYSph5sH_+_ihwn8IGQ: ASCII text
|
||||||
|
/yzixs/nd4DyljinP3auDMHL_LrpsRJkWQpSHQK2jqtyyzWcBA: POSIX shell script, ASCII text executable
|
||||||
|
/yzixs/nzpaknF0_ONSHtd0i_e1E3pkLF1QPeJQhAB7x9Ogo_M: unified diff output, ASCII text
|
||||||
|
/yzixs/UZ3uzVUUMC1gKGLw6tg_aLFwoFrJedXB3xbhEgQOaiY: unified diff output, ASCII text
|
||||||
|
/yzixs/VKyXxKTXsDGxYJ24YgbvCc1bZkA5twp3TC+Gbi4Kwd8: unified diff output, ASCII text
|
||||||
|
/yzixs/VPJMl8O1xkc1LsJznpoQrCrQO0Iy+ODCPsgoUBLiRZc: unified diff output, ASCII text
|
||||||
|
/yzixs/W6r1ow001ASHRj+gtRfyj9Fb_gCO_pBztX8WhYXVdIc: unified diff output, ASCII text
|
||||||
|
/yzixs/xvwEcXIob_rQynUEtQiQbwaDXEobTVKEGaBMir9oH9k: unified diff output, ASCII text
|
||||||
|
/yzixs/ZPvQbRJrtyeSITvW3FUZvw99hhNOO3CFqGgmWgScxcg: ASCII text
|
||||||
|
"#;
|
||||||
|
let refs: Vec<&str> = StoreRefScanner::new(fake, &*SPEC_DFL_YZIX1)
|
||||||
|
.map(|i| core::str::from_utf8(i).unwrap())
|
||||||
|
.collect();
|
||||||
|
let refs_expect: Vec<&[u8]> = vec![
|
||||||
|
b"4Zx1PBoft1YyAuKdhjAY1seZFHloxQ+8voHQRkRMuys",
|
||||||
|
b"dNE3yogD4JHKHzNa2t3jQMZddT8wjqlMDB0naDIFo0A",
|
||||||
|
b"FMluSVOHLc4bxX7F4lBCXafNljBnDn+rAM5HzG7k8LI",
|
||||||
|
b"g2G3GRL87hGEdw9cq2BZWqDQP_HeHSPRLbJ9P9KH+HI",
|
||||||
|
b"H08Av1ZAONwFdzVLpFQm0Sc0dvyk0sbnk82waoBig7I",
|
||||||
|
b"IndARQp+gaGDLS3K+PeyXdaRqAcCyS3EIbRXkkYjC94",
|
||||||
|
b"IrLPnbkEolTAuWRxkXpuvVs6Imb1iB6wUJcI+fxWwkU",
|
||||||
|
b"JsS_H3n3TSh2R6fiIzgOPZdjSmRkV71vGxstJJKPmr4",
|
||||||
|
b"LZ6pQh1x8DRxZ2IYzetBRS4LuE__IXFjpOfQPxHVwpw",
|
||||||
|
b"mEi2RPep9daRs0JUvwt1JsDfgYSph5sH_+_ihwn8IGQ",
|
||||||
|
b"nd4DyljinP3auDMHL_LrpsRJkWQpSHQK2jqtyyzWcBA",
|
||||||
|
b"nzpaknF0_ONSHtd0i_e1E3pkLF1QPeJQhAB7x9Ogo_M",
|
||||||
|
b"UZ3uzVUUMC1gKGLw6tg_aLFwoFrJedXB3xbhEgQOaiY",
|
||||||
|
b"VKyXxKTXsDGxYJ24YgbvCc1bZkA5twp3TC+Gbi4Kwd8",
|
||||||
|
b"VPJMl8O1xkc1LsJznpoQrCrQO0Iy+ODCPsgoUBLiRZc",
|
||||||
|
b"W6r1ow001ASHRj+gtRfyj9Fb_gCO_pBztX8WhYXVdIc",
|
||||||
|
b"xvwEcXIob_rQynUEtQiQbwaDXEobTVKEGaBMir9oH9k",
|
||||||
|
b"ZPvQbRJrtyeSITvW3FUZvw99hhNOO3CFqGgmWgScxcg",
|
||||||
|
];
|
||||||
|
let refs_expect: Vec<&str> = refs_expect
|
||||||
|
.into_iter()
|
||||||
|
.map(|i| core::str::from_utf8(i).unwrap())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(refs, refs_expect);
|
||||||
|
}
|
||||||
|
|
||||||
|
proptest::proptest! {
|
||||||
|
#[test]
|
||||||
|
fn nocrash_nix2(s: Vec<u8>) {
|
||||||
|
let _ = StoreRefScanner::new(&s[..], &*SPEC_DFL_NIX2).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn nocrash_yzix1(s: Vec<u8>) {
|
||||||
|
let _ = StoreRefScanner::new(&s[..], &*SPEC_DFL_YZIX1).count();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
46
users/zseri/store-ref-scanner/src/spec.rs
Normal file
46
users/zseri/store-ref-scanner/src/spec.rs
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
use crate::hbm::HalfBytesMask;
|
||||||
|
use camino::Utf8PathBuf;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
|
pub struct StoreSpec {
|
||||||
|
/// path to store without trailing slash
|
||||||
|
pub path_to_store: Utf8PathBuf,
|
||||||
|
|
||||||
|
/// compressed map of allowed ASCII characters in hash part
|
||||||
|
pub valid_hashbytes: HalfBytesMask,
|
||||||
|
|
||||||
|
/// compressed map of allowed ASCII characters in part after hash
|
||||||
|
pub valid_restbytes: HalfBytesMask,
|
||||||
|
|
||||||
|
/// exact length of hash part of store paths
|
||||||
|
pub hashbytes_len: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StoreSpec {
|
||||||
|
pub(crate) fn check_rest(&self, rest: &[u8]) -> bool {
|
||||||
|
let hbl = self.hashbytes_len.into();
|
||||||
|
rest.iter()
|
||||||
|
.take(hbl)
|
||||||
|
.take_while(|&&i| self.valid_hashbytes.contains(i))
|
||||||
|
.count()
|
||||||
|
== hbl
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub static SPEC_DFL_NIX2: Lazy<StoreSpec> = Lazy::new(|| StoreSpec {
|
||||||
|
path_to_store: "/nix/store".into(),
|
||||||
|
valid_hashbytes: HalfBytesMask::B32_REVSHA256,
|
||||||
|
valid_restbytes: HalfBytesMask::from_bytes(
|
||||||
|
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-._?=",
|
||||||
|
),
|
||||||
|
hashbytes_len: 32,
|
||||||
|
});
|
||||||
|
|
||||||
|
pub static SPEC_DFL_YZIX1: Lazy<StoreSpec> = Lazy::new(|| StoreSpec {
|
||||||
|
path_to_store: "/yzixs".into(),
|
||||||
|
valid_hashbytes: HalfBytesMask::B64_BLAKE2B256,
|
||||||
|
valid_restbytes: HalfBytesMask::from_bytes(
|
||||||
|
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-._?=",
|
||||||
|
),
|
||||||
|
hashbytes_len: 43,
|
||||||
|
});
|
Loading…
Reference in a new issue