feat(tvix/eval): Initial impl of builtins.match
Implement an *initial* version of builtins.match, using the rust `regex` crate for regular expressions. The rust regex crate definitely has different semantics than nix's regular expressions - but we'd like to see how far we can get before the incompatibility starts to matter. This consciously leaves out any sort of memo for compiled regular expressions (which upstream nix also has) for the sake of expediency - in the future we should implement that so we don't have to compile the same regular expression multiple times. Change-Id: I5b718635831ec83397940e417a9047c4342b6fa1 Reviewed-on: https://cl.tvl.fyi/c/depot/+/6989 Tested-by: BuildkiteCI Reviewed-by: Adam Joseph <adam@westernsemico.com> Reviewed-by: tazjin <tazjin@tvl.su>
This commit is contained in:
parent
5eb89be682
commit
03a3189a3d
6 changed files with 96 additions and 0 deletions
33
corp/tvixbolt/Cargo.lock
generated
33
corp/tvixbolt/Cargo.lock
generated
|
@ -2,6 +2,15 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
|
@ -310,6 +319,12 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.6.5"
|
||||
|
@ -399,6 +414,23 @@ dependencies = [
|
|||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
|
||||
|
||||
[[package]]
|
||||
name = "rnix"
|
||||
version = "0.11.0-dev"
|
||||
|
@ -580,6 +612,7 @@ dependencies = [
|
|||
"codemap-diagnostic",
|
||||
"dirs",
|
||||
"path-clean",
|
||||
"regex",
|
||||
"rnix",
|
||||
"rowan",
|
||||
"serde_json",
|
||||
|
|
12
tvix/eval/Cargo.lock
generated
12
tvix/eval/Cargo.lock
generated
|
@ -2,6 +2,15 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
|
@ -859,6 +868,8 @@ version = "1.6.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
|
@ -1208,6 +1219,7 @@ dependencies = [
|
|||
"path-clean",
|
||||
"pretty_assertions",
|
||||
"proptest",
|
||||
"regex",
|
||||
"rnix",
|
||||
"rowan",
|
||||
"rustyline",
|
||||
|
|
|
@ -25,6 +25,7 @@ proptest = { version = "1.0.0", default_features = false, features = ["std", "al
|
|||
test-strategy = { version = "0.2.1", optional = true }
|
||||
clap = { version = "3.2.22", optional = true, features = ["derive", "env"] }
|
||||
serde_json = "1.0.86"
|
||||
regex = "1.6.0"
|
||||
|
||||
# rnix has not been released in a while (as of 2022-09-23), we will
|
||||
# use it from git.
|
||||
|
|
|
@ -7,6 +7,8 @@ use std::cmp;
|
|||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use crate::{
|
||||
errors::ErrorKind,
|
||||
value::{Builtin, CoercionKind, NixAttrs, NixList, NixString, Value},
|
||||
|
@ -381,6 +383,24 @@ fn pure_builtins() -> Vec<Builtin> {
|
|||
.map(|list| Value::List(NixList::from(list)))
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
Builtin::new(
|
||||
"match",
|
||||
&[true, true],
|
||||
|mut args: Vec<Value>, _: &mut VM| {
|
||||
let s = args.pop().unwrap().to_str()?;
|
||||
let re = args.pop().unwrap().to_str()?;
|
||||
let re: Regex = Regex::new(&format!("^{}$", re.as_str())).unwrap();
|
||||
match re.captures(&s) {
|
||||
Some(caps) => Ok(caps
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(|grp| grp.map(|g| Value::from(g.as_str())).unwrap_or(Value::Null))
|
||||
.collect::<Vec<Value>>()
|
||||
.into()),
|
||||
None => Ok(Value::Null),
|
||||
}
|
||||
},
|
||||
),
|
||||
Builtin::new(
|
||||
"mul",
|
||||
&[false, false],
|
||||
|
|
1
tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp
Normal file
1
tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp
Normal file
|
@ -0,0 +1 @@
|
|||
[ true true false true true true true false false true false [ "foobar" ] [ "FOO" ] [ "/path/to/" "/path/to" "foobar" "nix" ] [ null null "foobar" "cc" ] ]
|
29
tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix
Normal file
29
tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix
Normal file
|
@ -0,0 +1,29 @@
|
|||
with builtins;
|
||||
|
||||
let
|
||||
|
||||
matches = pat: s: match pat s != null;
|
||||
|
||||
splitFN = match "((.*)/)?([^/]*)\\.(nix|cc)";
|
||||
|
||||
in
|
||||
|
||||
[
|
||||
(matches "foobar" "foobar")
|
||||
(matches "fo*" "f")
|
||||
(matches "fo+" "f")
|
||||
(matches "fo*" "fo")
|
||||
(matches "fo*" "foo")
|
||||
(matches "fo+" "foo")
|
||||
(matches "fo{1,2}" "foo")
|
||||
(matches "fo{1,2}" "fooo")
|
||||
(matches "fo*" "foobar")
|
||||
(matches "[[:space:]]+([^[:space:]]+)[[:space:]]+" " foo ")
|
||||
(matches "[[:space:]]+([[:upper:]]+)[[:space:]]+" " foo ")
|
||||
|
||||
(match "(.*)\\.nix" "foobar.nix")
|
||||
(match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " FOO ")
|
||||
|
||||
(splitFN "/path/to/foobar.nix")
|
||||
(splitFN "foobar.cc")
|
||||
]
|
Loading…
Reference in a new issue