feat(tvix/nix-compat): add narinfo parsing and serialisation

Change-Id: I72c63414794642ca8d85c3f635f49db888420c40
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9852
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
edef 2023-10-27 10:54:31 +00:00
parent b1ad94cc9a
commit 9253bf6632
8 changed files with 492 additions and 76 deletions

27
tvix/Cargo.lock generated
View file

@ -608,16 +608,6 @@ dependencies = [
"typenum",
]
[[package]]
name = "ctor"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096"
dependencies = [
"quote 1.0.26",
"syn 1.0.109",
]
[[package]]
name = "data-encoding"
version = "2.3.3"
@ -1444,12 +1434,14 @@ dependencies = [
"hex-literal",
"lazy_static",
"nom",
"pretty_assertions",
"serde",
"serde_json",
"sha2",
"test-case",
"test-generator",
"thiserror",
"zstd",
]
[[package]]
@ -1534,15 +1526,6 @@ version = "6.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267"
[[package]]
name = "output_vt100"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66"
dependencies = [
"winapi",
]
[[package]]
name = "overload"
version = "0.1.1"
@ -1687,13 +1670,11 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "pretty_assertions"
version = "1.3.0"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755"
checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
dependencies = [
"ctor",
"diff",
"output_vt100",
"yansi",
]

View file

@ -1796,29 +1796,6 @@ rec {
};
resolvedDefaultFeatures = [ "std" ];
};
"ctor" = rec {
crateName = "ctor";
version = "0.1.26";
edition = "2018";
sha256 = "15m0wqhv12p25xkxz5dxvg23r7a6bkh7p8zi1cdhgswjhdl028vd";
procMacro = true;
authors = [
"Matt Mastracci <matthew@mastracci.com>"
];
dependencies = [
{
name = "quote";
packageId = "quote 1.0.26";
}
{
name = "syn";
packageId = "syn 1.0.109";
usesDefaultFeatures = false;
features = [ "full" "parsing" "printing" "proc-macro" ];
}
];
};
"data-encoding" = rec {
crateName = "data-encoding";
version = "2.3.3";
@ -4259,6 +4236,10 @@ rec {
name = "lazy_static";
packageId = "lazy_static";
}
{
name = "pretty_assertions";
packageId = "pretty_assertions";
}
{
name = "serde_json";
packageId = "serde_json";
@ -4271,6 +4252,10 @@ rec {
name = "test-generator";
packageId = "test-generator";
}
{
name = "zstd";
packageId = "zstd";
}
];
features = {
"async" = [ "futures-util" ];
@ -4489,23 +4474,6 @@ rec {
};
resolvedDefaultFeatures = [ "raw_os_str" ];
};
"output_vt100" = rec {
crateName = "output_vt100";
version = "0.1.3";
edition = "2018";
sha256 = "0rpvpiq7gkyvvwyp9sk0zxhbk99ldlrv5q3ycr03wkmbxgx270k2";
authors = [
"Phuntsok Drak-pa <phundrak@phundrak.fr>"
];
dependencies = [
{
name = "winapi";
packageId = "winapi";
features = [ "winuser" "winbase" "consoleapi" "processenv" ];
}
];
};
"overload" = rec {
crateName = "overload";
version = "0.1.1";
@ -4871,29 +4839,19 @@ rec {
};
"pretty_assertions" = rec {
crateName = "pretty_assertions";
version = "1.3.0";
version = "1.4.0";
edition = "2018";
sha256 = "0mgp1ajl3fdc55h989ph48znnk86m41j9dqnpg80yy5a435rnpm2";
sha256 = "0rmsnqlpmpfjp5gyi31xgc48kdhc1kqn246bnc494nwadhdfwz5g";
authors = [
"Colin Kiegel <kiegel@gmx.de>"
"Florent Fayolle <florent.fayolle69@gmail.com>"
"Tom Milligan <code@tommilligan.net>"
];
dependencies = [
{
name = "ctor";
packageId = "ctor";
target = { target, features }: (target."windows" or false);
}
{
name = "diff";
packageId = "diff";
}
{
name = "output_vt100";
packageId = "output_vt100";
target = { target, features }: (target."windows" or false);
}
{
name = "yansi";
packageId = "yansi";

View file

@ -26,6 +26,8 @@ serde_json = "1.0"
test-case = "2.2.2"
criterion = { version = "0.4", features = ["html_reports"] }
hex-literal = "0.4.1"
pretty_assertions = "1.4.0"
zstd = "^0.9.0"
[dev-dependencies.test-generator]
# This fork of test-generator adds support for cargo workspaces, see
@ -36,3 +38,7 @@ rev = "82e799979980962aec1aa324ec6e0e4cad781f41"
[[bench]]
name = "derivation_parse_aterm"
harness = false
[[bench]]
name = "narinfo_parse"
harness = false

View file

@ -0,0 +1,65 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use lazy_static::lazy_static;
use nix_compat::narinfo::NarInfo;
use std::{io, str};
const SAMPLE: &str = r#"StorePath: /nix/store/1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0
URL: nar/0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs.nar.xz
Compression: xz
FileHash: sha256:0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs
FileSize: 92828
NarHash: sha256:0cfnydzp132y69bh20dj76yfd6hc3qdyblbwr9hwn59vfmnb09m0
NarSize: 173352
References: 03d4ncyfh76mgs6sfayl8l6zzdhm219w-python3.9-mt-940-4.23.0 0rhbw783qcjxv3cqln1760i1lmz2yb67-gsettings-desktop-schemas-41.0 1dm9ndgg56ylawpcbdzkhl03fg6777rr-python3.9-six-1.16.0 1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0 2ccy5zc89zpc2aznqxgvzp4wm1bwj05n-bzip2-1.0.6.0.2-bin 32gy3pqk4n725lscdm622yzsg9np3xvs-python3.9-cryptography-36.0.0-dev 35chvqbr7vp9icdki0132fc6np09vrx5-python3.9-bleach-4.1.0 53abh5cz9zi4yh75lfzg99xqy0fdgj4i-python3.9-xmlschema-1.9.2 5p96sifyavb407mnharhyzlw6pn6km1b-glib-2.70.2-bin 6hil8z0zkqcgvaw1qwjyqa8qyaa1lm3k-python3.9-pycairo-1.20.1 803ffb21rv4af521pplb72zjm1ygm9kk-python3.9-pyparsing-2.4.7 al95l8psvmq5di3vdwa75n8w2m0sj2sy-gdk-pixbuf-2.42.6 b09371lq1jjrv43h8jpp82v23igndsn2-python3.9-fints-3.0.1 b53hk557pdk5mq4lv1zrh71a54qazbsm-python3.9-certifi-2021.10.08 bl0cwvwgch92cfsnli4dsah2gxgdickp-gtk+3-3.24.30 cfkq9wi7ypqk26c75dzic5v3nxlzyi58-python3.9-cryptography-36.0.0 cyhg57whqvrx7xf7fvn70dr5836y7zak-python3.9-sepaxml-2.4.1 d810g729g1c4lvp3nv1n3ah6cvpwg7by-cairo-1.16.0-dev dn4fwp0yx6nsa85cr20cwvdmg64xwmcy-python3-3.9.9 dzsj2n0nmq8nv6w0hvy5vb61kim3rzmd-pango-1.50.0 fs6rcnhbjvpxsyw5qiq0q7jx378fjrq7-python3.9-webencodings-0.5.1 g08sxarx191yh2dh0yk2j8icja54aksf-harfbuzz-3.1.2 glanz2lv7m6ak8pql0jcpr3izyp5cxm5-python3.9-pycparser-2.21 gpzx6h0dp5yhcvkfj68zs444ghll7dzm-python3.9-html5lib-1.1 gxyhqkpahahn4h8wbanzfhr1zkxbysid-expat-2.4.2-dev gy3pnc7bpff1h4ylhrivs4cjlvmxl0dk-python3.9-packaging-20.9 hhpqldw0552mf4mjdm2q7zqwy9hpfchd-libpng-apng-1.6.37-dev ig2bdwmplvs6dyg07fdyh006ha768jh1-python3.9-cffi-1.15.0 ij5rm5y6lmqzrwqd1zxckhbii3dg2nq5-glib-2.70.2-dev j5raylzz6fsafbgayyfaydadjl0x22s0-freetype-2.11.1-dev j6w2fbsl49jska4scyr860gz4df9biha-gobject-introspection-1.70.0 jfc99f1hrca6ih6h0n4ax431hjlx96j0-python3.9-brotli-1.0.9 kbazcxnki2qz514rl1plhsj3587hl8bb-python3.9-pysocks-1.7.1 kkljrrrj80fnz59qyfgnv6wvv0cbmpql-libhandy-1.5.0 l82il2lbp757c0smi81qmj4crlcmdz9s-python3.9-pygobject-3.42.0-dev m4zflhr10wz4frhgxqfi43rwvapki1pi-fontconfig-2.13.94-bin mbsc1c7mq15vgfzcdma9fglczih9ncfy-python3.9-chardet-4.0.0 mfvaaf4illpwrflg30cij5x4rncp9jin-python3.9-text-unidecode-1.3 msiv2nkdcaf4gvaf2cfnxcjm66j8mjxz-python3.9-elementpath-2.4.0 nmwapds8fcx22vd30d81va7a7a51ywwx-gettext-0.21 pbfraw351mksnkp2ni9c4rkc9cpp89iv-bash-5.1-p12 r8cbf18vrd54rb4psf3m4zlk5sd2jsv3-python3.9-pygobject-3.42.0 rig6npd9sd45ashf6fxcwgxzm7m4p0l3-python3.9-requests-2.26.0 ryj72ashr27gf4kh0ssgi3zpiv8fxw53-librsvg-2.52.4 s2jjq7rk5yrzlv9lyralzvpixg4p6jh3-atk-2.36.0 w1lsr2i37fr0mp1jya04nwa5nf5dxm2n-python3.9-setuptools-57.2.0 whfykra99ahs814l5hp3q5ps8rwzsf3s-python3.9-brotlicffi-1.0.9.2 wqdmghdvc4s95jgpp13fj5v3xar8mlks-python3.9-charset-normalizer-2.0.8 x1ha2nyji1px0iqknbyhdnvw4icw5h3i-python3.9-idna-3.3 z9560qb4ygbi0352m9pglwhi332cxb1f-python3.9-urllib3-1.26.7
Deriver: 2ch8jx910qk6721mp4yqsmvdfgj5c8ir-banking-0.3.0.drv
Sig: cache.nixos.org-1:xcL67rBZPcdVZudDLpLeddkBa0KaFTw5A0udnaa0axysjrQ6Nvd9p3BLZ4rhKgl52/cKiU3c6aq60L8+IcE5Dw==
"#;
lazy_static! {
static ref CASES: &'static [&'static str] = {
let data =
zstd::decode_all(io::Cursor::new(include_bytes!("../testdata/narinfo.zst"))).unwrap();
let data = str::from_utf8(Vec::leak(data)).unwrap();
Vec::leak(
data.split_inclusive("\n\n")
.map(|s| s.strip_suffix('\n').unwrap())
.collect::<Vec<_>>(),
)
};
}
pub fn parse(c: &mut Criterion) {
let mut g = c.benchmark_group("parse");
{
g.throughput(Throughput::Bytes(SAMPLE.len() as u64));
g.bench_with_input("single", SAMPLE, |b, data| {
b.iter(|| {
black_box(NarInfo::parse(black_box(data)));
});
});
}
{
for &case in *CASES {
NarInfo::parse(case).expect("should parse");
}
g.throughput(Throughput::Bytes(
CASES.iter().map(|s| s.len() as u64).sum(),
));
g.bench_with_input("many", &*CASES, |b, data| {
let mut vec = vec![];
b.iter(|| {
vec.clear();
vec.extend(black_box(data).iter().map(|s| NarInfo::parse(s)));
black_box(&vec);
});
});
}
g.finish();
}
criterion_group!(benches, parse);
criterion_main!(benches);

View file

@ -1,6 +1,7 @@
pub(crate) mod aterm;
pub mod derivation;
pub mod nar;
pub mod narinfo;
pub mod nixbase32;
pub mod nixhash;
pub mod store_path;

View file

@ -0,0 +1,406 @@
//! NAR info files describe a store path in a traditional Nix binary cache.
//! Over the wire, they are formatted as "Key: value" pairs separated by newlines.
//!
//! It contains four kinds of information:
//! 1. the description of the store path itself
//! * store path prefix, digest, and name
//! * NAR hash and size
//! * references
//! 2. authenticity information
//! * zero or more signatures over that description
//! * an optional [CAHash] for content-addressed paths (fixed outputs, sources, and derivations)
//! 3. derivation metadata
//! * deriver (the derivation that produced this path)
//! * system (the system value of that derivation)
//! 4. cache-specific information
//! * URL of the compressed NAR, relative to the NAR info file
//! * compression algorithm used for the NAR
//! * hash and size of the compressed NAR
use data_encoding::BASE64;
use std::{
fmt::{self, Display},
mem,
};
use crate::{
nixbase32,
nixhash::{CAHash, NixHash},
store_path::StorePathRef,
};
#[derive(Debug)]
pub struct NarInfo<'a> {
// core (authenticated, but unverified here)
/// Store path described by this [NarInfo]
pub store_path: StorePathRef<'a>,
/// SHA-256 digest of the NAR file
pub nar_hash: [u8; 32],
/// Size of the NAR file in bytes
pub nar_size: u64,
/// Store paths known to be referenced by the contents
pub references: Vec<StorePathRef<'a>>,
// authenticity
/// Ed25519 signature over the path fingerprint
pub signatures: Vec<Signature<'a>>,
/// Content address (for content-defined paths)
pub ca: Option<CAHash>,
// derivation metadata
/// Nix system triple of [deriver]
pub system: Option<&'a str>,
/// Store path of the derivation that produced this
pub deriver: Option<StorePathRef<'a>>,
// cache-specific untrusted metadata
/// Relative URL of the compressed NAR file
pub url: &'a str,
/// Compression method of the NAR file
/// TODO(edef): default this to bzip2, and have None mean "none" (uncompressed)
pub compression: Option<&'a str>,
/// SHA-256 digest of the file at `url`
pub file_hash: Option<[u8; 32]>,
/// Size of the file at `url` in bytes
pub file_size: Option<u64>,
}
impl<'a> NarInfo<'a> {
pub fn parse(input: &'a str) -> Option<Self> {
let mut store_path = None;
let mut url = None;
let mut compression = None;
let mut file_hash = None;
let mut file_size = None;
let mut nar_hash = None;
let mut nar_size = None;
let mut references = None;
let mut system = None;
let mut deriver = None;
let mut signatures = vec![];
let mut ca = None;
for line in input.lines() {
let (tag, val) = line.split_once(':')?;
let val = val.strip_prefix(' ')?;
match tag {
"StorePath" => {
let val = val.strip_prefix("/nix/store/")?;
let val = StorePathRef::from_bytes(val.as_bytes()).ok()?;
if store_path.replace(val).is_some() {
return None;
}
}
"URL" => {
if val.is_empty() {
return None;
}
if url.replace(val).is_some() {
return None;
}
}
"Compression" => {
if val.is_empty() {
return None;
}
if compression.replace(val).is_some() {
return None;
}
}
"FileHash" => {
let val = val.strip_prefix("sha256:")?;
let val = nixbase32::decode_fixed::<32>(val).ok()?;
if file_hash.replace(val).is_some() {
return None;
}
}
"FileSize" => {
let val = val.parse::<u64>().ok()?;
if file_size.replace(val).is_some() {
return None;
}
}
"NarHash" => {
let val = val.strip_prefix("sha256:")?;
let val = nixbase32::decode_fixed::<32>(val).ok()?;
if nar_hash.replace(val).is_some() {
return None;
}
}
"NarSize" => {
let val = val.parse::<u64>().ok()?;
if nar_size.replace(val).is_some() {
return None;
}
}
"References" => {
let val: Vec<StorePathRef> = if !val.is_empty() {
let mut prev = "";
val.split(' ')
.map(|s| {
if mem::replace(&mut prev, s) < s {
StorePathRef::from_bytes(s.as_bytes()).ok()
} else {
// references are out of order
None
}
})
.collect::<Option<_>>()?
} else {
vec![]
};
if references.replace(val).is_some() {
return None;
}
}
"System" => {
if val.is_empty() {
return None;
}
if system.replace(val).is_some() {
return None;
}
}
"Deriver" => {
let val = StorePathRef::from_bytes(val.as_bytes()).ok()?;
if !val.name().ends_with(".drv") {
return None;
}
if deriver.replace(val).is_some() {
return None;
}
}
"Sig" => {
let val = Signature::parse(val)?;
signatures.push(val);
}
"CA" => {
let val = parse_ca(val)?;
if ca.replace(val).is_some() {
return None;
}
}
_ => {
// unknown field, ignore
}
}
}
Some(NarInfo {
store_path: store_path?,
nar_hash: nar_hash?,
nar_size: nar_size?,
references: references?,
signatures,
ca,
system,
deriver,
url: url?,
compression,
file_hash,
file_size,
})
}
}
impl Display for NarInfo<'_> {
fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
writeln!(w, "StorePath: /nix/store/{}", self.store_path)?;
writeln!(w, "URL: {}", self.url)?;
if let Some(compression) = self.compression {
writeln!(w, "Compression: {compression}")?;
}
if let Some(file_hash) = self.file_hash {
writeln!(w, "FileHash: {}", fmt_hash(&NixHash::Sha256(file_hash)))?;
}
if let Some(file_size) = self.file_size {
writeln!(w, "FileSize: {file_size}")?;
}
writeln!(w, "NarHash: {}", fmt_hash(&NixHash::Sha256(self.nar_hash)))?;
writeln!(w, "NarSize: {}", self.nar_size)?;
write!(w, "References:")?;
if self.references.is_empty() {
write!(w, " ")?;
} else {
for path in &self.references {
write!(w, " {path}")?;
}
}
writeln!(w)?;
if let Some(deriver) = &self.deriver {
writeln!(w, "Deriver: {deriver}")?;
}
if let Some(system) = self.system {
writeln!(w, "System: {system}")?;
}
for sig in &self.signatures {
writeln!(w, "Sig: {sig}")?;
}
if let Some(ca) = &self.ca {
writeln!(w, "CA: {}", fmt_ca(ca))?;
}
Ok(())
}
}
#[derive(Debug)]
pub struct Signature<'a> {
name: &'a str,
bytes: [u8; 64],
}
impl<'a> Signature<'a> {
pub fn parse(input: &'a str) -> Option<Signature<'a>> {
let (name, bytes64) = input.split_once(':')?;
let mut buf = [0; 66];
let mut bytes = [0; 64];
match BASE64.decode_mut(bytes64.as_bytes(), &mut buf) {
Ok(64) => {
bytes.copy_from_slice(&buf[..64]);
}
_ => {
return None;
}
}
Some(Signature { name, bytes })
}
pub fn name(&self) -> &'a str {
self.name
}
pub fn bytes(&self) -> &[u8; 64] {
&self.bytes
}
}
impl Display for Signature<'_> {
fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
write!(w, "{}:{}", self.name, BASE64.encode(&self.bytes))
}
}
pub fn parse_ca(s: &str) -> Option<CAHash> {
let (tag, s) = s.split_once(':')?;
match tag {
"text" => {
let digest = s.strip_prefix("sha256:")?;
let digest = nixbase32::decode_fixed(digest).ok()?;
Some(CAHash::Text(digest))
}
"fixed" => {
if let Some(digest) = s.strip_prefix("r:sha256:") {
let digest = nixbase32::decode_fixed(digest).ok()?;
Some(CAHash::Nar(NixHash::Sha256(digest)))
} else {
parse_hash(s).map(CAHash::Flat)
}
}
_ => None,
}
}
#[allow(non_camel_case_types)]
struct fmt_ca<'a>(&'a CAHash);
impl Display for fmt_ca<'_> {
fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
CAHash::Flat(h) => {
write!(w, "fixed:{}", fmt_hash(h))
}
&CAHash::Text(d) => {
write!(w, "text:{}", fmt_hash(&NixHash::Sha256(d)))
}
CAHash::Nar(h) => {
write!(w, "fixed:r:{}", fmt_hash(h))
}
}
}
}
fn parse_hash(s: &str) -> Option<NixHash> {
let (tag, digest) = s.split_once(':')?;
(match tag {
"md5" => nixbase32::decode_fixed(digest).map(NixHash::Md5),
"sha1" => nixbase32::decode_fixed(digest).map(NixHash::Sha1),
"sha256" => nixbase32::decode_fixed(digest).map(NixHash::Sha256),
"sha512" => nixbase32::decode_fixed(digest)
.map(Box::new)
.map(NixHash::Sha512),
_ => return None,
})
.ok()
}
#[allow(non_camel_case_types)]
struct fmt_hash<'a>(&'a NixHash);
impl Display for fmt_hash<'_> {
fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
let (tag, digest) = match self.0 {
NixHash::Md5(d) => ("md5", &d[..]),
NixHash::Sha1(d) => ("sha1", &d[..]),
NixHash::Sha256(d) => ("sha256", &d[..]),
NixHash::Sha512(d) => ("sha512", &d[..]),
};
write!(w, "{tag}:{}", nixbase32::encode(digest))
}
}
#[cfg(test)]
mod test {
use lazy_static::lazy_static;
use pretty_assertions::assert_eq;
use std::{io, str};
use super::NarInfo;
lazy_static! {
static ref CASES: &'static [&'static str] = {
let data = zstd::decode_all(io::Cursor::new(include_bytes!("../testdata/narinfo.zst")))
.unwrap();
let data = str::from_utf8(Vec::leak(data)).unwrap();
Vec::leak(
data.split_inclusive("\n\n")
.map(|s| s.strip_suffix('\n').unwrap())
.collect::<Vec<_>>(),
)
};
}
#[test]
fn roundtrip() {
for &input in *CASES {
let parsed = NarInfo::parse(input).expect("should parse");
let output = format!("{parsed}");
assert_eq!(input, output, "should roundtrip");
}
}
}

View file

@ -124,7 +124,6 @@ fn decode_inner(input: &[u8], output: &mut [u8]) -> Result<(), Nixbase32DecodeEr
Ok(())
}
#[cold]
fn find_invalid(input: &[u8]) -> u8 {
for &c in input {
if !ALPHABET.contains(&c) {

BIN
tvix/nix-compat/testdata/narinfo.zst vendored Normal file

Binary file not shown.