refactor(tvix/nix-compat): -derivation::Hash, +NixHash

This stops using our own custom Hash structure, which was mostly only
used because we had to parse the JSON representation somehow.

Since cl/8217, there's a `NixHash` struct, which is better suited to
hold this data. Converting the format requires a bit of serde labor
though, but that only really matters when interacting with JSON
representations (which we mostly don't).

Change-Id: Idc5ee511e36e6726c71f66face8300a441b0bf4c
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8304
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
This commit is contained in:
Florian Klink 2023-03-14 17:16:05 +01:00 committed by clbot
parent e82385dbe5
commit b55d1f97ce
8 changed files with 201 additions and 87 deletions

View file

@ -3610,6 +3610,10 @@ rec {
packageId = "serde";
features = [ "derive" ];
}
{
name = "serde_json";
packageId = "serde_json";
}
{
name = "sha2";
packageId = "sha2 0.10.6";

View file

@ -1,5 +1,5 @@
//! Implements `builtins.derivation`, the core of what makes Nix build packages.
use nix_compat::derivation::{Derivation, Hash};
use nix_compat::derivation::Derivation;
use nix_compat::{hash_placeholder, nixhash};
use std::cell::RefCell;
use std::collections::{btree_map, BTreeSet};
@ -126,19 +126,18 @@ fn populate_output_configuration(
let output_hash = nixhash::from_str(&hash, a).map_err(Error::InvalidOutputHash)?;
// construct the algo string. Depending on hashMode, we prepend a `r:`.
let algo = match hash_mode.as_deref() {
None | Some("flat") => format!("{}", &output_hash.algo),
Some("recursive") => format!("r:{}", &output_hash.algo),
// construct the NixHashWithMode.
out.hash_with_mode = match hash_mode.as_deref() {
None | Some("flat") => Some(nixhash::NixHashWithMode::Flat(
nixhash::NixHash::new(output_hash.algo, output_hash.digest),
)),
Some("recursive") => Some(nixhash::NixHashWithMode::Recursive(
nixhash::NixHash::new(output_hash.algo, output_hash.digest),
)),
Some(other) => {
return Err(Error::InvalidOutputHashMode(other.to_string()).into())
}
};
out.hash = Some(Hash {
algo,
digest: data_encoding::HEXLOWER.encode(&output_hash.digest),
});
}
}
}
}

View file

@ -10,6 +10,7 @@ anyhow = "1.0.68"
data-encoding = "2.3.3"
glob = "0.3.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
sha2 = "0.10.6"
thiserror = "1.0.38"

View file

@ -1,5 +1,5 @@
use crate::{
nixhash::NixHash,
nixhash::HashAlgo,
store_path::{self, StorePath},
};
use serde::{Deserialize, Serialize};
@ -17,8 +17,9 @@ mod write;
mod tests;
// Public API of the crate.
pub use crate::nixhash::{NixHash, NixHashWithMode};
pub use errors::{DerivationError, OutputError};
pub use output::{Hash, Output};
pub use output::Output;
pub use utils::path_with_references;
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
@ -75,23 +76,22 @@ impl Derivation {
buffer
}
/// Returns the fixed output path and its hash
// (if the Derivation is fixed output),
/// or None if there is no fixed output.
/// Returns the fixed output path and its [NixHashWithMode]
/// (if the Derivation is fixed output), or None if there is no fixed output.
/// This takes some shortcuts in case more than one output exists, as this
/// can't be a valid fixed-output Derivation.
pub fn get_fixed_output(&self) -> Option<(&String, &Hash)> {
pub fn get_fixed_output(&self) -> Option<(&String, &NixHashWithMode)> {
if self.outputs.len() != 1 {
return None;
}
if let Some(out_output) = self.outputs.get("out") {
if let Some(out_output_hash) = &out_output.hash {
if let Some(out_output_hash) = &out_output.hash_with_mode {
return Some((&out_output.path, out_output_hash));
}
// There has to be a hash, otherwise it would not be FOD
}
return None;
None
}
/// Returns the drv path of a Derivation struct.
@ -160,8 +160,6 @@ impl Derivation {
/// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of
/// the A-Term.
///
/// TODO: what's the representation of ${digest}?
///
/// If the derivation is not a fixed derivation, it's up to the caller of
/// this function to provide a lookup function to lookup these calculation
/// results of parent derivations at `fn_get_hash_derivation_modulo` (by
@ -175,8 +173,9 @@ impl Derivation {
// Fixed-output derivations return a fixed hash
Some((fixed_output_path, fixed_output_hash)) => {
hasher.update(format!(
"fixed:out:{}:{}:{}",
&fixed_output_hash.algo, &fixed_output_hash.digest, fixed_output_path,
"fixed:out:{}:{}",
fixed_output_hash.to_nix_hash_string(),
fixed_output_path
));
hasher.finalize()
}
@ -196,7 +195,7 @@ impl Derivation {
for (drv_path, output_names) in &self.input_derivations {
replaced_input_derivations.insert(
data_encoding::HEXLOWER
.encode(&fn_get_derivation_or_fod_hash(&drv_path).digest),
.encode(&fn_get_derivation_or_fod_hash(drv_path).digest),
output_names.clone(),
);
}
@ -255,7 +254,7 @@ impl Derivation {
output_path_name.push_str(output_name);
}
let s = &format!(
let fp = &format!(
"output:{}:{}:{}:{}",
output_name,
derivation_or_fod_hash.to_nix_hash_string(),
@ -264,7 +263,7 @@ impl Derivation {
);
let abs_store_path =
utils::build_store_path(false, s, &output_path_name)?.to_absolute_path();
utils::build_store_path(false, fp, &output_path_name)?.to_absolute_path();
output.path = abs_store_path.clone();
self.environment
@ -277,33 +276,41 @@ impl Derivation {
// footgun prevention mechanism.
assert!(fixed_output_path.is_empty());
let s = {
let mut s = String::new();
let fp = {
let mut fp = String::new();
// Fixed-output derivation.
// There's two different hashing strategies in place, depending on the value of hash.algo.
// There's two different hashing strategies in place,
// depending on whether the hash is recursive AND sha256 or anything else.
// This code is _weird_ but it is what Nix is doing. See:
// https://github.com/NixOS/nix/blob/1385b2007804c8a0370f2a6555045a00e34b07c7/src/libstore/store-api.cc#L178-L196
if fixed_output_hash.algo == "r:sha256" {
s.push_str(&format!(
"source:sha256:{}",
fixed_output_hash.digest, // lowerhex
));
if let NixHashWithMode::Recursive(recursive_hash) = fixed_output_hash {
if recursive_hash.algo == HashAlgo::Sha256 {
fp.push_str(&format!("source:{}", recursive_hash.to_nix_hash_string()));
} else {
// This is similar to the FOD case, with an empty fixed_output_path.
fp.push_str(&format!(
"output:out:{}",
derivation_or_fod_hash.to_nix_hash_string(),
));
}
} else {
s.push_str("output:out:");
// This is drv_replacement for FOD, with an empty fixed_output_path.
s.push_str(&derivation_or_fod_hash.to_nix_hash_string());
// This is similar to the FOD case, with an empty fixed_output_path.
fp.push_str(&format!(
"output:out:{}",
derivation_or_fod_hash.to_nix_hash_string(),
));
}
s.push_str(&format!(":{}:{}", store_path::STORE_DIR, name));
s
fp.push_str(&format!(":{}:{}", store_path::STORE_DIR, name));
fp
};
let abs_store_path = utils::build_store_path(false, &s, name)?.to_absolute_path();
let abs_store_path = utils::build_store_path(false, &fp, name)?.to_absolute_path();
self.outputs.insert(
"out".to_string(),
Output {
path: abs_store_path.clone(),
hash: Some(fixed_output_hash.clone()),
hash_with_mode: Some(fixed_output_hash.clone()),
},
);
self.environment.insert("out".to_string(), abs_store_path);

View file

@ -1,5 +1,6 @@
use crate::derivation::OutputError;
use crate::{nixbase32, store_path::StorePath};
use crate::nixhash::{HashAlgo, NixHashWithMode};
use crate::store_path::StorePath;
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
@ -7,42 +8,23 @@ pub struct Output {
pub path: String,
#[serde(flatten)]
pub hash: Option<Hash>,
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub struct Hash {
#[serde(rename = "hash")]
pub digest: String,
#[serde(rename = "hashAlgo")]
pub algo: String,
pub hash_with_mode: Option<NixHashWithMode>,
}
impl Output {
pub fn is_fixed(&self) -> bool {
self.hash.is_some()
self.hash_with_mode.is_some()
}
pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> {
if let Some(hash) = &self.hash {
// try to decode digest
let result = nixbase32::decode(hash.digest.as_bytes());
match result {
Err(e) => return Err(OutputError::InvalidHashEncoding(hash.digest.clone(), e)),
Ok(digest) => {
if hash.algo != "sha1" && hash.algo != "sha256" {
return Err(OutputError::InvalidHashAlgo(hash.algo.to_string()));
}
if (hash.algo == "sha1" && digest.len() != 20)
|| (hash.algo == "sha256" && digest.len() != 32)
{
return Err(OutputError::InvalidDigestSizeForAlgo(
digest.len(),
hash.algo.to_string(),
));
if let Some(hash) = &self.hash_with_mode {
match hash {
NixHashWithMode::Flat(h) | NixHashWithMode::Recursive(h) => {
if h.algo != HashAlgo::Sha1 || h.algo != HashAlgo::Sha256 {
return Err(OutputError::InvalidHashAlgo(h.algo.to_string()));
}
}
};
}
}
if validate_output_paths {
if let Err(e) = StorePath::from_absolute_path(&self.path) {

View file

@ -1,5 +1,6 @@
use crate::derivation::output::{Hash, Output};
use crate::derivation::output::Output;
use crate::derivation::Derivation;
use crate::nixhash::NixHash;
use crate::store_path::StorePath;
use std::collections::BTreeSet;
use std::fs::File;
@ -218,11 +219,15 @@ fn output_path_construction() {
"out".to_string(),
Output {
path: "".to_string(), // will be calculated
hash: Some(Hash {
digest: "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"
.to_string(),
algo: "r:sha256".to_string(),
}),
hash_with_mode: Some(crate::nixhash::NixHashWithMode::Recursive(NixHash {
digest: data_encoding::HEXLOWER
.decode(
"08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"
.as_bytes(),
)
.unwrap(),
algo: crate::nixhash::HashAlgo::Sha256,
})),
},
);
@ -271,7 +276,7 @@ fn output_path_construction() {
"out".to_string(),
Output {
path: "".to_string(), // will be calculated
hash: None,
hash_with_mode: None,
},
);

View file

@ -58,16 +58,22 @@ pub fn write_outputs(
let mut elements: Vec<&str> = vec![output_name, &output.path];
match &output.hash {
Some(hash) => {
elements.push(&hash.algo);
elements.push(&hash.digest);
}
None => {
elements.push("");
elements.push("");
}
}
let (e2, e3) = match &output.hash_with_mode {
Some(hash) => match hash {
crate::nixhash::NixHashWithMode::Flat(h) => (
h.algo.to_string(),
data_encoding::HEXLOWER.encode(&h.digest),
),
crate::nixhash::NixHashWithMode::Recursive(h) => (
format!("r:{}", h.algo),
data_encoding::HEXLOWER.encode(&h.digest),
),
},
None => ("".to_string(), "".to_string()),
};
elements.push(&e2);
elements.push(&e3);
write_array_elements(
writer,

View file

@ -1,14 +1,124 @@
use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER};
use serde::ser::SerializeMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::fmt::Display;
use thiserror::Error;
use crate::nixbase32;
/// A Nix Hash can either be flat or recursive.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum NixHashWithMode {
Flat(NixHash),
Recursive(NixHash),
}
impl NixHashWithMode {
/// Formats a [NixHashWithMode] in the Nix default hash format,
/// which is the algo, followed by a colon, then the lower hex encoded digest.
/// In case the hash itself is recursive, a `r:` is added as prefix
pub fn to_nix_hash_string(&self) -> String {
match self {
NixHashWithMode::Flat(h) => h.to_nix_hash_string(),
NixHashWithMode::Recursive(h) => format!("r:{}", h.to_nix_hash_string()),
}
}
}
impl Serialize for NixHashWithMode {
/// map a NixHashWithMode into the serde data model.
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut map = serializer.serialize_map(Some(2))?;
match self {
NixHashWithMode::Flat(h) => {
map.serialize_entry("hash", &nixbase32::encode(&h.digest))?;
map.serialize_entry("hashAlgo", &h.algo.to_string())?;
}
NixHashWithMode::Recursive(h) => {
map.serialize_entry("hash", &nixbase32::encode(&h.digest))?;
map.serialize_entry("hashAlgo", &format!("r:{}", &h.algo.to_string()))?;
}
};
map.end()
}
}
impl<'de> Deserialize<'de> for NixHashWithMode {
/// map the serde data model into a NixHashWithMode.
///
/// The serde data model has a `hash` field (containing a digest in nixbase32),
/// and a `hashAlgo` field, containing the stringified hash algo.
/// In case the hash is recursive, hashAlgo also has a `r:` prefix.
///
/// This is to match how `nix show-derivation` command shows them in JSON
/// representation.
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
// TODO: don't use serde_json here?
// TODO: serde seems to simply set `hash_with_mode` to None if hash
// and hashAlgo fail, but that should be a proper deserialization error
// that should be propagated to the user!
let json = serde_json::Value::deserialize(deserializer)?;
match json.as_object() {
None => Err(serde::de::Error::custom("couldn't parse as map"))?,
Some(map) => {
let digest: Vec<u8> = {
if let Some(v) = map.get("hash") {
if let Some(s) = v.as_str() {
data_encoding::HEXLOWER
.decode(s.as_bytes())
.map_err(|e| serde::de::Error::custom(e.to_string()))?
} else {
return Err(serde::de::Error::custom(
"couldn't parse 'hash' as string",
));
}
} else {
return Err(serde::de::Error::custom("couldn't extract 'hash' key"));
}
};
if let Some(v) = map.get("hashAlgo") {
if let Some(s) = v.as_str() {
match s.strip_prefix("r:") {
Some(rest) => Ok(NixHashWithMode::Recursive(NixHash::new(
HashAlgo::try_from(rest).map_err(|e| {
serde::de::Error::custom(format!("unable to parse algo: {}", e))
})?,
digest,
))),
None => Ok(NixHashWithMode::Flat(NixHash::new(
HashAlgo::try_from(s).map_err(|e| {
serde::de::Error::custom(format!("unable to parse algo: {}", e))
})?,
digest,
))),
}
} else {
Err(serde::de::Error::custom(
"couldn't parse 'hashAlgo' as string",
))
}
} else {
Err(serde::de::Error::custom("couldn't extract 'hashAlgo' key"))
}
}
}
}
}
/// Nix allows specifying hashes in various encodings, and magically just
/// derives the encoding.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct NixHash {
pub digest: Vec<u8>,
pub algo: HashAlgo,
}
@ -26,7 +136,7 @@ impl NixHash {
}
/// This are the hash algorithms supported by cppnix.
#[derive(Clone, Debug, Eq, PartialEq)]
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub enum HashAlgo {
Md5,
Sha1,