Add a primop for regular expression pattern matching

The function ‘builtins.match’ takes a POSIX extended regular
expression and an arbitrary string. It returns ‘null’ if the string
does not match the regular expression. Otherwise, it returns a list
containing substring matches corresponding to parenthesis groups in
the regex. The regex must match the entire string (i.e. there is an
implied "^<pat>$" around the regex).  For example:

  match "foo" "foobar" => null
  match "foo" "foo" => []
  match "f(o+)(.*)" "foooobar" => ["oooo" "bar"]
  match "(.*/)?([^/]*)" "/dir/file.nix" => ["/dir/" "file.nix"]
  match "(.*/)?([^/]*)" "file.nix" => [null "file.nix"]

The following example finds all regular files with extension .nix or
.patch underneath the current directory:

  let

    findFiles = pat: dir: concatLists (mapAttrsToList (name: type:
      if type == "directory" then
        findFiles pat (dir + "/" + name)
      else if type == "regular" && match pat name != null then
        [(dir + "/" + name)]
      else []) (readDir dir));

  in findFiles ".*\\.(nix|patch)" (toString ./.)
This commit is contained in:
Eelco Dolstra 2014-11-25 11:47:06 +01:00
parent 4e340a983f
commit 976df480c9
5 changed files with 84 additions and 5 deletions

View file

@ -1,13 +1,16 @@
#include "regex.hh"
#include "types.hh"
#include <algorithm>
namespace nix {
Regex::Regex(const string & pattern)
Regex::Regex(const string & pattern, bool subs)
{
/* Patterns must match the entire string. */
int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), REG_NOSUB | REG_EXTENDED);
if (err) throw Error(format("compiling pattern %1%: %2%") % pattern % showError(err));
int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), (subs ? 0 : REG_NOSUB) | REG_EXTENDED);
if (err) throw RegexError(format("compiling pattern %1%: %2%") % pattern % showError(err));
nrParens = subs ? std::count(pattern.begin(), pattern.end(), '(') : 0;
}
Regex::~Regex()
@ -23,6 +26,20 @@ bool Regex::matches(const string & s)
throw Error(format("matching string %1%: %2%") % s % showError(err));
}
bool Regex::matches(const string & s, Subs & subs)
{
regmatch_t pmatch[nrParens + 2];
int err = regexec(&preg, s.c_str(), nrParens + 2, pmatch, 0);
if (err == 0) {
for (unsigned int n = 2; n < nrParens + 2; ++n)
if (pmatch[n].rm_eo != -1)
subs[n - 2] = string(s, pmatch[n].rm_so, pmatch[n].rm_eo - pmatch[n].rm_so);
return true;
}
else if (err == REG_NOMATCH) return false;
throw Error(format("matching string %1%: %2%") % s % showError(err));
}
string Regex::showError(int err)
{
char buf[256];

View file

@ -5,16 +5,23 @@
#include <sys/types.h>
#include <regex.h>
#include <map>
namespace nix {
MakeError(RegexError, Error)
class Regex
{
public:
Regex(const string & pattern);
Regex(const string & pattern, bool subs = false);
~Regex();
bool matches(const string & s);
typedef std::map<unsigned int, string> Subs;
bool matches(const string & s, Subs & subs);
private:
unsigned nrParens;
regex_t preg;
string showError(int err);
};