feat(tvix/eval): Initial impl of builtins.match

Implement an *initial* version of builtins.match, using the rust `regex`
crate for regular expressions. The rust regex crate definitely has
different semantics than nix's regular expressions - but we'd like to
see how far we can get before the incompatibility starts to matter.

This consciously leaves out any sort of memo for compiled regular
expressions (which upstream nix also has) for the sake of expediency -
in the future we should implement that so we don't have to compile the
same regular expression multiple times.

Change-Id: I5b718635831ec83397940e417a9047c4342b6fa1
Reviewed-on: https://cl.tvl.fyi/c/depot/+/6989
Tested-by: BuildkiteCI
Reviewed-by: Adam Joseph <adam@westernsemico.com>
Reviewed-by: tazjin <tazjin@tvl.su>
This commit is contained in:
Griffin Smith 2022-10-12 18:45:52 -04:00 committed by grfn
parent 5eb89be682
commit 03a3189a3d
6 changed files with 96 additions and 0 deletions

View file

@ -2,6 +2,15 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "aho-corasick"
version = "0.7.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "atty" name = "atty"
version = "0.2.14" version = "0.2.14"
@ -310,6 +319,12 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]] [[package]]
name = "memoffset" name = "memoffset"
version = "0.6.5" version = "0.6.5"
@ -399,6 +414,23 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "regex"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
[[package]] [[package]]
name = "rnix" name = "rnix"
version = "0.11.0-dev" version = "0.11.0-dev"
@ -580,6 +612,7 @@ dependencies = [
"codemap-diagnostic", "codemap-diagnostic",
"dirs", "dirs",
"path-clean", "path-clean",
"regex",
"rnix", "rnix",
"rowan", "rowan",
"serde_json", "serde_json",

12
tvix/eval/Cargo.lock generated
View file

@ -2,6 +2,15 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "aho-corasick"
version = "0.7.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "atty" name = "atty"
version = "0.2.14" version = "0.2.14"
@ -859,6 +868,8 @@ version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
dependencies = [ dependencies = [
"aho-corasick",
"memchr",
"regex-syntax", "regex-syntax",
] ]
@ -1208,6 +1219,7 @@ dependencies = [
"path-clean", "path-clean",
"pretty_assertions", "pretty_assertions",
"proptest", "proptest",
"regex",
"rnix", "rnix",
"rowan", "rowan",
"rustyline", "rustyline",

View file

@ -25,6 +25,7 @@ proptest = { version = "1.0.0", default_features = false, features = ["std", "al
test-strategy = { version = "0.2.1", optional = true } test-strategy = { version = "0.2.1", optional = true }
clap = { version = "3.2.22", optional = true, features = ["derive", "env"] } clap = { version = "3.2.22", optional = true, features = ["derive", "env"] }
serde_json = "1.0.86" serde_json = "1.0.86"
regex = "1.6.0"
# rnix has not been released in a while (as of 2022-09-23), we will # rnix has not been released in a while (as of 2022-09-23), we will
# use it from git. # use it from git.

View file

@ -7,6 +7,8 @@ use std::cmp;
use std::collections::{BTreeMap, HashMap, HashSet}; use std::collections::{BTreeMap, HashMap, HashSet};
use std::path::PathBuf; use std::path::PathBuf;
use regex::Regex;
use crate::{ use crate::{
errors::ErrorKind, errors::ErrorKind,
value::{Builtin, CoercionKind, NixAttrs, NixList, NixString, Value}, value::{Builtin, CoercionKind, NixAttrs, NixList, NixString, Value},
@ -381,6 +383,24 @@ fn pure_builtins() -> Vec<Builtin> {
.map(|list| Value::List(NixList::from(list))) .map(|list| Value::List(NixList::from(list)))
.map_err(Into::into) .map_err(Into::into)
}), }),
Builtin::new(
"match",
&[true, true],
|mut args: Vec<Value>, _: &mut VM| {
let s = args.pop().unwrap().to_str()?;
let re = args.pop().unwrap().to_str()?;
let re: Regex = Regex::new(&format!("^{}$", re.as_str())).unwrap();
match re.captures(&s) {
Some(caps) => Ok(caps
.iter()
.skip(1)
.map(|grp| grp.map(|g| Value::from(g.as_str())).unwrap_or(Value::Null))
.collect::<Vec<Value>>()
.into()),
None => Ok(Value::Null),
}
},
),
Builtin::new( Builtin::new(
"mul", "mul",
&[false, false], &[false, false],

View file

@ -0,0 +1 @@
[ true true false true true true true false false true false [ "foobar" ] [ "FOO" ] [ "/path/to/" "/path/to" "foobar" "nix" ] [ null null "foobar" "cc" ] ]

View file

@ -0,0 +1,29 @@
with builtins;
let
matches = pat: s: match pat s != null;
splitFN = match "((.*)/)?([^/]*)\\.(nix|cc)";
in
[
(matches "foobar" "foobar")
(matches "fo*" "f")
(matches "fo+" "f")
(matches "fo*" "fo")
(matches "fo*" "foo")
(matches "fo+" "foo")
(matches "fo{1,2}" "foo")
(matches "fo{1,2}" "fooo")
(matches "fo*" "foobar")
(matches "[[:space:]]+([^[:space:]]+)[[:space:]]+" " foo ")
(matches "[[:space:]]+([[:upper:]]+)[[:space:]]+" " foo ")
(match "(.*)\\.nix" "foobar.nix")
(match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " FOO ")
(splitFN "/path/to/foobar.nix")
(splitFN "foobar.cc")
]