feat(tvix/eval): implement builtins.hashFile

The primary addition is the `builtin_hashFile` function which attempts
value-path coercion and on success reads the file to a nix_string.
Similar to the `builtins_hashString` implementation, the hash is then
derived using the `hash_nix_string` function in the introduced hash
module which has extracted hashing functionality initially specified in
cl/11005.

Change-Id: I5e3ef0317223af99488ebb79efc2fb49b4cbc1b6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11007
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
Padraic-O-Mhuiris 2024-02-22 15:33:29 +00:00 committed by Pádraic Ó Mhuiris
parent 5acaa672ac
commit e0ffb4d47a
6 changed files with 42 additions and 25 deletions

View file

@ -65,7 +65,7 @@ The `impl` column indicates implementation status in tvix:
| getEnv | false | | false | | | getEnv | false | | false | |
| hasAttr | false | | | | | hasAttr | false | | | |
| hasContext | false | | | | | hasContext | false | | | |
| hashFile | false | | false | todo | | hashFile | false | | false | |
| hashString | false | | | | | hashString | false | | | |
| head | false | | | | | head | false | | | |
| import | true | | | | | import | true | | | |

View file

@ -0,0 +1,25 @@
use bstr::ByteSlice;
use data_encoding::HEXLOWER;
use md5::Md5;
use sha1::Sha1;
use sha2::{digest::Output, Digest, Sha256, Sha512};
use crate::ErrorKind;
fn hash<D: Digest>(b: &[u8]) -> Output<D> {
let mut hasher = D::new();
hasher.update(b);
hasher.finalize()
}
pub fn hash_nix_string(algo: impl AsRef<[u8]>, s: impl AsRef<[u8]>) -> Result<String, ErrorKind> {
match algo.as_ref() {
b"md5" => Ok(HEXLOWER.encode(hash::<Md5>(s.as_ref()).as_bstr())),
b"sha1" => Ok(HEXLOWER.encode(hash::<Sha1>(s.as_ref()).as_bstr())),
b"sha256" => Ok(HEXLOWER.encode(hash::<Sha256>(s.as_ref()).as_bstr())),
b"sha512" => Ok(HEXLOWER.encode(hash::<Sha512>(s.as_ref()).as_bstr())),
_ => Err(ErrorKind::UnknownHashType(
algo.as_ref().as_bstr().to_string(),
)),
}
}

View file

@ -21,7 +21,7 @@ mod impure_builtins {
use std::os::unix::ffi::OsStrExt; use std::os::unix::ffi::OsStrExt;
use super::*; use super::*;
use crate::builtins::coerce_value_to_path; use crate::builtins::{coerce_value_to_path, hash::hash_nix_string};
#[builtin("getEnv")] #[builtin("getEnv")]
async fn builtin_get_env(co: GenCo, var: Value) -> Result<Value, ErrorKind> { async fn builtin_get_env(co: GenCo, var: Value) -> Result<Value, ErrorKind> {
@ -30,6 +30,17 @@ mod impure_builtins {
.into()) .into())
} }
#[builtin("hashFile")]
#[allow(non_snake_case)]
async fn builtin_hashFile(co: GenCo, algo: Value, path: Value) -> Result<Value, ErrorKind> {
let path = match coerce_value_to_path(&co, path).await? {
Err(cek) => return Ok(Value::from(cek)),
Ok(p) => p,
};
let s = generators::request_read_to_string(&co, path).await;
hash_nix_string(algo.to_str()?, s.to_str()?).map(Value::from)
}
#[builtin("pathExists")] #[builtin("pathExists")]
async fn builtin_path_exists(co: GenCo, path: Value) -> Result<Value, ErrorKind> { async fn builtin_path_exists(co: GenCo, path: Value) -> Result<Value, ErrorKind> {
match coerce_value_to_path(&co, path).await? { match coerce_value_to_path(&co, path).await? {

View file

@ -5,14 +5,9 @@
use bstr::{ByteSlice, ByteVec}; use bstr::{ByteSlice, ByteVec};
use builtin_macros::builtins; use builtin_macros::builtins;
use data_encoding::HEXLOWER;
use genawaiter::rc::Gen; use genawaiter::rc::Gen;
use imbl::OrdMap; use imbl::OrdMap;
use md5::Md5;
use regex::Regex; use regex::Regex;
use sha1::Sha1;
use sha2::digest::Output;
use sha2::{Digest, Sha256, Sha512};
use std::cmp::{self, Ordering}; use std::cmp::{self, Ordering};
use std::collections::VecDeque; use std::collections::VecDeque;
use std::collections::{BTreeMap, HashSet}; use std::collections::{BTreeMap, HashSet};
@ -24,12 +19,14 @@ use crate::vm::generators::{self, GenCo};
use crate::warnings::WarningKind; use crate::warnings::WarningKind;
use crate::{ use crate::{
self as tvix_eval, self as tvix_eval,
builtins::hash::hash_nix_string,
errors::{CatchableErrorKind, ErrorKind}, errors::{CatchableErrorKind, ErrorKind},
value::{CoercionKind, NixAttrs, NixList, NixString, Thunk, Value}, value::{CoercionKind, NixAttrs, NixList, NixString, Thunk, Value},
}; };
use self::versions::{VersionPart, VersionPartsIter}; use self::versions::{VersionPart, VersionPartsIter};
mod hash;
mod to_xml; mod to_xml;
mod versions; mod versions;
@ -692,23 +689,7 @@ mod pure_builtins {
#[builtin("hashString")] #[builtin("hashString")]
#[allow(non_snake_case)] #[allow(non_snake_case)]
async fn builtin_hashString(co: GenCo, algo: Value, s: Value) -> Result<Value, ErrorKind> { async fn builtin_hashString(co: GenCo, algo: Value, s: Value) -> Result<Value, ErrorKind> {
fn hash<D: Digest>(b: &[u8]) -> Output<D> { hash_nix_string(algo.to_str()?, s.to_str()?).map(Value::from)
let mut hasher = D::new();
hasher.update(b);
hasher.finalize()
}
let s = s.to_str()?;
let encoded_hash = match algo.to_str()?.as_bytes() {
b"md5" => HEXLOWER.encode(hash::<Md5>(&s).as_bstr()),
b"sha1" => HEXLOWER.encode(hash::<Sha1>(&s).as_bstr()),
b"sha256" => HEXLOWER.encode(hash::<Sha256>(&s).as_bstr()),
b"sha512" => HEXLOWER.encode(hash::<Sha512>(&s).as_bstr()),
_ => return Err(ErrorKind::UnknownHashType(s.into())),
};
Ok(Value::from(encoded_hash))
} }
#[builtin("head")] #[builtin("head")]

View file

@ -1,4 +1,4 @@
let let
paths = [ ./../data ./../binary-data ]; paths = [ ./data ./binary-data ];
in in
builtins.concatLists (map (hash: map (builtins.hashFile hash) paths) ["md5" "sha1" "sha256" "sha512"]) builtins.concatLists (map (hash: map (builtins.hashFile hash) paths) ["md5" "sha1" "sha256" "sha512"])