* Replaced the SDF parser by a substantially faster Bison/Flex
parser (roughly 80x faster). The absolutely latest version of Bison (1.875c) is required for reentrant GLR support, as well as a recent version of Flex (say, 2.5.31). Note that most Unix distributions ship with the prehistoric Flex 2.5.4, which doesn't support reentrancy.
This commit is contained in:
parent
abd1878b26
commit
c5baaafae6
6 changed files with 261 additions and 252 deletions
|
@ -1,20 +1,22 @@
|
|||
noinst_LIBRARIES = libexpr.a
|
||||
|
||||
libexpr_a_SOURCES = nixexpr.cc nixexpr.hh parser.cc parser.hh \
|
||||
eval.cc eval.hh primops.cc primops.hh nix.sdf
|
||||
eval.cc eval.hh primops.cc primops.hh \
|
||||
lexer-tab.c lexer-tab.h parser-tab.c parser-tab.h
|
||||
|
||||
AM_CXXFLAGS = \
|
||||
-I.. -I../../externals/inst/include -I../libutil -I../libstore
|
||||
AM_CFLAGS = \
|
||||
-I../../externals/inst/include
|
||||
|
||||
|
||||
# Parse table generation.
|
||||
# Parser generation.
|
||||
|
||||
parser.o: parse-table.h
|
||||
parser-tab.c parser-tab.h: parser.y
|
||||
../grammartest/inst/bin/bison -v -o parser-tab.c parser.y -d
|
||||
|
||||
parse-table.h: nix.tbl
|
||||
../bin2c/bin2c nixParseTable < $< > $@ || (rm $@ && exit 1)
|
||||
lexer-tab.c lexer-tab.h: lexer.l
|
||||
flex --outfile lexer-tab.c --header-file=lexer-tab.h lexer.l
|
||||
|
||||
%.tbl: %.sdf
|
||||
../../externals/inst/bin/sdf2table -s -i $< -o $@
|
||||
|
||||
CLEANFILES = parse-table.h nix.tbl
|
||||
CLEANFILES =
|
||||
|
|
|
@ -137,6 +137,8 @@ Expr evalExpr2(EvalState & state, Expr e)
|
|||
/* Any encountered variables must be undeclared or primops. */
|
||||
if (atMatch(m, e) >> "Var" >> s1) {
|
||||
if (s1 == "null") return primNull(state);
|
||||
if (s1 == "true") return ATmake("Bool(True)");
|
||||
if (s1 == "false") return ATmake("Bool(False)");
|
||||
return e;
|
||||
}
|
||||
|
||||
|
|
78
src/libexpr/lexer.l
Normal file
78
src/libexpr/lexer.l
Normal file
|
@ -0,0 +1,78 @@
|
|||
%option reentrant bison-bridge bison-locations
|
||||
%option noyywrap
|
||||
%option never-interactive
|
||||
|
||||
|
||||
%{
|
||||
#include <string.h>
|
||||
#include <aterm2.h>
|
||||
#include "parser-tab.h"
|
||||
|
||||
static void initLoc(YYLTYPE * loc)
|
||||
{
|
||||
loc->first_line = 1;
|
||||
loc->first_column = 1;
|
||||
}
|
||||
|
||||
static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
|
||||
{
|
||||
while (len--) {
|
||||
switch (*s++) {
|
||||
case '\n':
|
||||
++loc->first_line;
|
||||
loc->first_column = 1;
|
||||
break;
|
||||
default:
|
||||
++loc->first_column;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define YY_USER_INIT initLoc(yylloc)
|
||||
#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
|
||||
|
||||
%}
|
||||
|
||||
|
||||
ID [a-zA-Z\_][a-zA-Z0-9\_\']*
|
||||
INT [0-9]+
|
||||
STR \"[^\n\"]*\"
|
||||
PATH [a-zA-Z0-9\.\_\-\+]*(\/[a-zA-Z0-9\.\_\-\+]+)+
|
||||
URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']*
|
||||
|
||||
|
||||
%%
|
||||
|
||||
|
||||
if { return IF; }
|
||||
then { return THEN; }
|
||||
else { return ELSE; }
|
||||
assert { return ASSERT; }
|
||||
let { return LET; }
|
||||
rec { return REC; }
|
||||
|
||||
\=\= { return EQ; }
|
||||
\!\= { return NEQ; }
|
||||
\&\& { return AND; }
|
||||
\|\| { return OR; }
|
||||
\-\> { return IMPL; }
|
||||
|
||||
{ID} { yylval->t = ATmake("<str>", yytext); return ID; /* !!! alloc */ }
|
||||
{INT} { return INT; }
|
||||
{STR} { int len = strlen(yytext);
|
||||
yytext[len - 1] = 0;
|
||||
yylval->t = ATmake("<str>", yytext + 1);
|
||||
yytext[len - 1] = '\"';
|
||||
return STR; /* !!! alloc */
|
||||
}
|
||||
{PATH} { yylval->t = ATmake("<str>", yytext); return PATH; /* !!! alloc */ }
|
||||
{URI} { yylval->t = ATmake("<str>", yytext); return URI; /* !!! alloc */ }
|
||||
|
||||
[ \t\n]+ /* eat up whitespace */
|
||||
\#[^\n]* /* single-line comments */
|
||||
\/\*(.|\n)*\*\/ /* long comments */
|
||||
|
||||
. return yytext[0];
|
||||
|
||||
|
||||
%%
|
|
@ -1,131 +0,0 @@
|
|||
definition
|
||||
|
||||
module Main
|
||||
imports Fix
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% Top level syntax.
|
||||
|
||||
module Fix
|
||||
imports Fix-Exprs Fix-Layout
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% Expressions.
|
||||
|
||||
module Fix-Exprs
|
||||
imports Fix-Lexicals
|
||||
exports
|
||||
sorts Expr Formal Bind Binds ExprList
|
||||
context-free syntax
|
||||
|
||||
Id -> Expr {cons("Var")}
|
||||
|
||||
Int -> Expr {cons("Int")}
|
||||
|
||||
Str -> Expr {cons("Str")}
|
||||
|
||||
Uri -> Expr {cons("Uri")}
|
||||
|
||||
Path -> Expr {cons("Path")}
|
||||
|
||||
"(" Expr ")" -> Expr {bracket}
|
||||
|
||||
Expr Expr -> Expr {cons("Call"), left}
|
||||
|
||||
"{" {Formal ","}* "}" ":" Expr -> Expr {cons("Function")}
|
||||
Id -> Formal {cons("NoDefFormal")}
|
||||
Id "?" Expr -> Formal {cons("DefFormal")}
|
||||
|
||||
"assert" Expr ";" Expr -> Expr {cons("Assert")}
|
||||
|
||||
"rec" "{" Binds "}" -> Expr {cons("Rec")}
|
||||
"let" "{" Binds "}" -> Expr {cons("LetRec")}
|
||||
"{" Binds "}" -> Expr {cons("Attrs")}
|
||||
|
||||
Bind* -> Binds
|
||||
Id "=" Expr ";" -> Bind {cons("Bind")}
|
||||
|
||||
"[" ExprList "]" -> Expr {cons("List")}
|
||||
"" -> ExprList {cons("ExprNil")}
|
||||
Expr ExprList -> ExprList {cons("ExprCons")}
|
||||
|
||||
Expr "." Id -> Expr {cons("Select")}
|
||||
|
||||
"if" Expr "then" Expr "else" Expr -> Expr {cons("If")}
|
||||
|
||||
Expr "==" Expr -> Expr {cons("OpEq"), non-assoc}
|
||||
Expr "!=" Expr -> Expr {cons("OpNEq"), non-assoc}
|
||||
|
||||
"!" Expr -> Expr {cons("OpNot")}
|
||||
Expr "&&" Expr -> Expr {cons("OpAnd"), right}
|
||||
Expr "||" Expr -> Expr {cons("OpOr"), right}
|
||||
Expr "->" Expr -> Expr {cons("OpImpl"), right}
|
||||
|
||||
context-free priorities
|
||||
|
||||
Expr "." Id -> Expr
|
||||
> Expr ExprList -> ExprList
|
||||
> Expr Expr -> Expr
|
||||
> "!" Expr -> Expr
|
||||
> Expr "==" Expr -> Expr
|
||||
> Expr "!=" Expr -> Expr
|
||||
> Expr "&&" Expr -> Expr
|
||||
> Expr "||" Expr -> Expr
|
||||
> Expr "->" Expr -> Expr
|
||||
> "assert" Expr ";" Expr -> Expr
|
||||
> "{" {Formal ","}* "}" ":" Expr -> Expr
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% Lexical syntax.
|
||||
|
||||
module Fix-Lexicals
|
||||
exports
|
||||
sorts Id Int Str Path PathComp Uri
|
||||
lexical syntax
|
||||
[a-zA-Z\_][a-zA-Z0-9\_\']* -> Id
|
||||
"rec" -> Id {reject}
|
||||
"let" -> Id {reject}
|
||||
"if" -> Id {reject}
|
||||
"then" -> Id {reject}
|
||||
"else" -> Id {reject}
|
||||
"assert" -> Id {reject}
|
||||
|
||||
[0-9]+ -> Int
|
||||
|
||||
"\"" ~[\n\"]* "\"" -> Str
|
||||
|
||||
"." ("/" PathComp)+ -> Path
|
||||
".." ("/" PathComp)+ -> Path
|
||||
("/" PathComp)+ -> Path
|
||||
[a-zA-Z0-9\.\_\-\+]+ -> PathComp
|
||||
|
||||
[a-zA-Z] [a-zA-Z0-9\+\-\.]* ":" [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']* -> Uri
|
||||
|
||||
lexical restrictions
|
||||
Id -/- [a-zA-Z0-9\_\']
|
||||
Int -/- [0-9]
|
||||
Path -/- [a-zA-Z0-9\.\_\-\+\/]
|
||||
Uri -/- [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% Layout.
|
||||
|
||||
module Fix-Layout
|
||||
exports
|
||||
sorts HashComment Asterisk Comment
|
||||
lexical syntax
|
||||
[\ \t\n] -> LAYOUT
|
||||
HashComment -> LAYOUT
|
||||
Comment -> LAYOUT
|
||||
"#" ~[\n]* -> HashComment
|
||||
"/*" ( ~[\*] | Asterisk )* "*/" -> Comment
|
||||
[\*] -> Asterisk
|
||||
lexical restrictions
|
||||
Asterisk -/- [\/]
|
||||
HashComment -/- ~[\n]
|
||||
context-free restrictions
|
||||
LAYOUT? -/- [\ \t\n]
|
|
@ -5,133 +5,63 @@
|
|||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
extern "C" {
|
||||
#include <sglr.h>
|
||||
#include <asfix2.h>
|
||||
}
|
||||
|
||||
#include "aterm.hh"
|
||||
#include "parser.hh"
|
||||
#include "parse-table.h"
|
||||
|
||||
|
||||
/* Cleanup cleans up an imploded parse tree into an actual abstract
|
||||
syntax tree that we can evaluate. It removes quotes around
|
||||
strings, converts integer literals into actual integers, and
|
||||
absolutises paths relative to the directory containing the input
|
||||
file. */
|
||||
struct Cleanup : TermFun
|
||||
struct ParseData
|
||||
{
|
||||
Expr result;
|
||||
string basePath;
|
||||
|
||||
virtual ATerm operator () (ATerm e)
|
||||
{
|
||||
checkInterrupt();
|
||||
|
||||
ATMatcher m;
|
||||
string s;
|
||||
|
||||
if (atMatch(m, e) >> "Str" >> s)
|
||||
return ATmake("Str(<str>)",
|
||||
string(s, 1, s.size() - 2).c_str());
|
||||
|
||||
if (atMatch(m, e) >> "Path" >> s)
|
||||
return ATmake("Path(<str>)", absPath(s, basePath).c_str());
|
||||
|
||||
if (atMatch(m, e) >> "Int" >> s) {
|
||||
istringstream s2(s);
|
||||
int n;
|
||||
s2 >> n;
|
||||
return ATmake("Int(<int>)", n);
|
||||
}
|
||||
|
||||
if (atMatch(m, e) >> "Var" >> "true")
|
||||
return ATmake("Bool(True)");
|
||||
|
||||
if (atMatch(m, e) >> "Var" >> "false")
|
||||
return ATmake("Bool(False)");
|
||||
|
||||
if (atMatch(m, e) >> "ExprNil")
|
||||
return (ATerm) ATempty;
|
||||
|
||||
ATerm e1;
|
||||
ATermList e2;
|
||||
if (atMatch(m, e) >> "ExprCons" >> e1 >> e2)
|
||||
return (ATerm) ATinsert(e2, e1);
|
||||
|
||||
return e;
|
||||
}
|
||||
string location;
|
||||
string error;
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
|
||||
#include "parser-tab.h"
|
||||
#include "lexer-tab.h"
|
||||
|
||||
/* Callbacks for getting from C to C++. Due to a (small) bug in the
|
||||
GLR code of Bison we cannot currently compile the parser as C++
|
||||
code. */
|
||||
|
||||
void setParseResult(ParseData * data, ATerm t)
|
||||
{
|
||||
data->result = t;
|
||||
}
|
||||
|
||||
ATerm absParsedPath(ParseData * data, ATerm t)
|
||||
{
|
||||
return string2ATerm(absPath(aterm2String(t), data->basePath).c_str());
|
||||
}
|
||||
|
||||
void parseError(ParseData * data, char * error, int line, int column)
|
||||
{
|
||||
data->error = (format("%1%, at line %2%, column %3%, of %4%")
|
||||
% error % line % column % data->location).str();
|
||||
}
|
||||
|
||||
int yyparse(yyscan_t scanner, ParseData * data);
|
||||
}
|
||||
|
||||
|
||||
static Expr parse(const char * text, const string & location,
|
||||
const Path & basePath)
|
||||
{
|
||||
/* Initialise the SDF libraries. */
|
||||
static bool initialised = false;
|
||||
static ATerm parseTable = 0;
|
||||
static language lang = 0;
|
||||
yyscan_t scanner;
|
||||
ParseData data;
|
||||
data.basePath = basePath;
|
||||
data.location = location;
|
||||
|
||||
if (!initialised) {
|
||||
PT_initMEPTApi();
|
||||
PT_initAsFix2Api();
|
||||
SGinitParser(ATfalse);
|
||||
|
||||
ATprotect(&parseTable);
|
||||
parseTable = ATreadFromBinaryString(
|
||||
(char *) nixParseTable, sizeof nixParseTable);
|
||||
if (!parseTable)
|
||||
throw Error(format("cannot construct parse table term"));
|
||||
|
||||
ATprotect(&lang);
|
||||
lang = ATmake("Nix");
|
||||
if (!SGopenLanguageFromTerm("nix-parse", lang, parseTable))
|
||||
throw Error(format("cannot open language"));
|
||||
|
||||
SG_STARTSYMBOL_ON();
|
||||
SG_OUTPUT_ON();
|
||||
SG_ASFIX2ME_ON();
|
||||
SG_AMBIGUITY_ERROR_ON();
|
||||
SG_FILTER_OFF();
|
||||
|
||||
initialised = true;
|
||||
}
|
||||
|
||||
/* Parse it. */
|
||||
ATerm result = SGparseString(lang, "Expr", (char *) text);
|
||||
if (!result)
|
||||
throw SysError(format("parse failed in `%1%'") % location);
|
||||
if (SGisParseError(result))
|
||||
throw Error(format("parse error in `%1%': %2%")
|
||||
% location % result);
|
||||
|
||||
/* Implode it. */
|
||||
PT_ParseTree tree = PT_makeParseTreeFromTerm(result);
|
||||
if (!tree)
|
||||
throw Error(format("cannot create parse tree"));
|
||||
yylex_init(&scanner);
|
||||
yy_scan_string(text, scanner);
|
||||
int res = yyparse(scanner, &data);
|
||||
yylex_destroy(scanner);
|
||||
|
||||
ATerm imploded = PT_implodeParseTree(tree,
|
||||
ATtrue,
|
||||
ATtrue,
|
||||
ATtrue,
|
||||
ATtrue,
|
||||
ATtrue,
|
||||
ATtrue,
|
||||
ATfalse,
|
||||
ATtrue,
|
||||
ATtrue,
|
||||
ATtrue,
|
||||
ATfalse);
|
||||
if (!imploded)
|
||||
throw Error(format("cannot implode parse tree"));
|
||||
if (res) throw Error(data.error);
|
||||
|
||||
printMsg(lvlVomit, format("imploded parse tree of `%1%': %2%")
|
||||
% location % imploded);
|
||||
|
||||
/* Finally, clean it up. */
|
||||
Cleanup cleanup;
|
||||
cleanup.basePath = basePath;
|
||||
return bottomupRewrite(cleanup, imploded);
|
||||
return data.result;
|
||||
}
|
||||
|
||||
|
||||
|
@ -171,7 +101,7 @@ Expr parseExprFromFile(Path path)
|
|||
readFull(fd, (unsigned char *) text, st.st_size);
|
||||
text[st.st_size] = 0;
|
||||
|
||||
return parse(text, path, dirOf(path));
|
||||
return parse(text, "`" + path + "'", dirOf(path));
|
||||
}
|
||||
|
||||
|
||||
|
|
128
src/libexpr/parser.y
Normal file
128
src/libexpr/parser.y
Normal file
|
@ -0,0 +1,128 @@
|
|||
%glr-parser
|
||||
%pure-parser
|
||||
%locations
|
||||
%error-verbose
|
||||
%parse-param { yyscan_t scanner }
|
||||
%parse-param { void * data }
|
||||
%lex-param { yyscan_t scanner }
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <aterm2.h>
|
||||
|
||||
#include "parser-tab.h"
|
||||
#include "lexer-tab.h"
|
||||
|
||||
void setParseResult(void * data, ATerm t);
|
||||
void parseError(void * data, char * error, int line, int column);
|
||||
ATerm absParsedPath(void * data, ATerm t);
|
||||
|
||||
void yyerror(YYLTYPE * loc, yyscan_t scanner, void * data, char * s)
|
||||
{
|
||||
parseError(data, s, loc->first_line, loc->first_column);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
%union {
|
||||
ATerm t;
|
||||
ATermList ts;
|
||||
}
|
||||
|
||||
%type <t> start expr expr_function expr_assert expr_op
|
||||
%type <t> expr_app expr_select expr_simple bind formal
|
||||
%type <ts> binds expr_list formals
|
||||
%token <t> ID INT STR PATH URI
|
||||
%token IF THEN ELSE ASSERT LET REC EQ NEQ AND OR IMPL
|
||||
|
||||
%nonassoc IMPL
|
||||
%left OR
|
||||
%left AND
|
||||
%nonassoc EQ NEQ
|
||||
%left NEG
|
||||
|
||||
%%
|
||||
|
||||
start: expr { setParseResult(data, $1); };
|
||||
|
||||
expr: expr_function;
|
||||
|
||||
expr_function
|
||||
: '{' formals '}' ':' expr_function
|
||||
{ $$ = ATmake("Function(<term>, <term>)", $2, $5); }
|
||||
| expr_assert
|
||||
;
|
||||
|
||||
expr_assert
|
||||
: ASSERT expr ';' expr_assert
|
||||
{ $$ = ATmake("Assert(<term>, <term>)", $2, $4); }
|
||||
| expr_op
|
||||
;
|
||||
|
||||
expr_op
|
||||
: '!' expr_op %prec NEG { $$ = ATmake("OpNot(<term>)", $2); }
|
||||
| expr_op EQ expr_op { $$ = ATmake("OpEq(<term>, <term>)", $1, $3); }
|
||||
| expr_op NEQ expr_op { $$ = ATmake("OpNEq(<term>, <term>)", $1, $3); }
|
||||
| expr_op AND expr_op { $$ = ATmake("OpAnd(<term>, <term>)", $1, $3); }
|
||||
| expr_op OR expr_op { $$ = ATmake("OpOr(<term>, <term>)", $1, $3); }
|
||||
| expr_op IMPL expr_op { $$ = ATmake("OpImpl(<term>, <term>)", $1, $3); }
|
||||
| expr_app
|
||||
;
|
||||
|
||||
expr_app
|
||||
: expr_app expr_select
|
||||
{ $$ = ATmake("Call(<term>, <term>)", $1, $2); }
|
||||
| expr_select { $$ = $1; }
|
||||
;
|
||||
|
||||
expr_select
|
||||
: expr_select '.' ID
|
||||
{ $$ = ATmake("Select(<term>, <term>)", $1, $3); }
|
||||
| expr_simple { $$ = $1; }
|
||||
;
|
||||
|
||||
expr_simple
|
||||
: ID { $$ = ATmake("Var(<term>)", $1); }
|
||||
| STR { $$ = ATmake("Str(<term>)", $1); }
|
||||
| PATH { $$ = ATmake("Path(<term>)", absParsedPath(data, $1)); }
|
||||
| URI { $$ = ATmake("Uri(<term>)", $1); }
|
||||
| '(' expr ')' { $$ = $2; }
|
||||
| LET '{' binds '}' { $$ = ATmake("LetRec(<term>)", $3); }
|
||||
| REC '{' binds '}' { $$ = ATmake("Rec(<term>)", $3); }
|
||||
| '{' binds '}' { $$ = ATmake("Attrs(<term>)", $2); }
|
||||
| '[' expr_list ']' { $$ = ATmake("List(<term>)", $2); }
|
||||
| IF expr THEN expr ELSE expr
|
||||
{ $$ = ATmake("If(<term>, <term>, <term>)", $2, $4, $6); }
|
||||
;
|
||||
|
||||
binds
|
||||
: binds bind { $$ = ATinsert($1, $2); }
|
||||
| { $$ = ATempty; }
|
||||
;
|
||||
|
||||
bind
|
||||
: ID '=' expr ';'
|
||||
{ $$ = ATmake("Bind(<term>, <term>)", $1, $3); }
|
||||
;
|
||||
|
||||
expr_list
|
||||
: expr_select expr_list { $$ = ATinsert($2, $1); }
|
||||
/* yes, this is right-recursive, but it doesn't matter since
|
||||
otherwise we would need ATreverse which requires unbounded
|
||||
stack space */
|
||||
| { $$ = ATempty; }
|
||||
;
|
||||
|
||||
formals
|
||||
: formal ',' formals { $$ = ATinsert($3, $1); } /* idem - right recursive */
|
||||
| formal { $$ = ATinsert(ATempty, $1); }
|
||||
;
|
||||
|
||||
formal
|
||||
: ID { $$ = ATmake("NoDefFormal(<term>)", $1); }
|
||||
| ID '?' expr { $$ = ATmake("DefFormal(<term>, <term>)", $1, $3); }
|
||||
;
|
||||
|
||||
%%
|
Loading…
Reference in a new issue