chore(3p/lisp): import npg source tarball
Used http://wcp.sdf-eu.org/software/npg-20150517T144652.tbz (sha256 42e88f6067128fbdb3a3d578371c9b0ee2a34f1d36daf80be8a520094132d828). There's no upstream repository nor a release since 2015, so importing seems to make a lot of sense. Since we can't subtree making any depot-related changes in a separate CL -- this is only the source import. Change-Id: I64c984ca0a84b9e48c6f496577ffccce1d7bdceb Reviewed-on: https://cl.tvl.fyi/c/depot/+/3377 Tested-by: BuildkiteCI Reviewed-by: grfn <grfn@gws.fyi>
This commit is contained in:
parent
2e08324484
commit
8e45aace13
10 changed files with 3609 additions and 0 deletions
79
third_party/lisp/npg/src/common.lisp
vendored
Normal file
79
third_party/lisp/npg/src/common.lisp
vendored
Normal file
|
@ -0,0 +1,79 @@
|
|||
;;; common.lisp --- common stuff
|
||||
|
||||
;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero
|
||||
|
||||
;;; Author: Walter C. Pelissero <walter@pelissero.de>
|
||||
;;; Project: NPG a Naive Parser Generator
|
||||
|
||||
#+cmu (ext:file-comment "$Module: common.lisp $")
|
||||
|
||||
;;; This library is free software; you can redistribute it and/or
|
||||
;;; modify it under the terms of the GNU Lesser General Public License
|
||||
;;; as published by the Free Software Foundation; either version 2.1
|
||||
;;; of the License, or (at your option) any later version.
|
||||
;;; This library is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;; Lesser General Public License for more details.
|
||||
;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;; License along with this library; if not, write to the Free
|
||||
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
;;; 02111-1307 USA
|
||||
|
||||
(in-package :naive-parser-generator)
|
||||
|
||||
(eval-when (:compile-toplevel :load-toplevel)
|
||||
(defstruct grammar
|
||||
rules
|
||||
keywords
|
||||
equal-p)
|
||||
|
||||
(defstruct rule
|
||||
name
|
||||
productions)
|
||||
|
||||
(defstruct (production (:conc-name prod-))
|
||||
tokens
|
||||
(tokens-length 0 :type fixnum)
|
||||
action)
|
||||
|
||||
(defstruct token
|
||||
type ; type of token (identifier, number, ...)
|
||||
value ; its actual value
|
||||
position) ; line/column in the input stream
|
||||
) ; eval-when
|
||||
|
||||
(defmethod print-object ((obj rule) stream)
|
||||
(format stream "#R(~A)" (rule-name obj)))
|
||||
|
||||
(defmethod print-object ((obj production) stream)
|
||||
(format stream "#P(action: ~S)" (prod-action obj)))
|
||||
|
||||
(defmethod print-object ((obj token) stream)
|
||||
(format stream "#T:~A=~S" (token-type obj) (token-value obj)))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(declaim (inline make-rules-table find-rule add-rule))
|
||||
|
||||
(defun make-rules-table ()
|
||||
(make-hash-table))
|
||||
|
||||
(defun find-rule (rule-name rules)
|
||||
(gethash rule-name rules))
|
||||
|
||||
(defun add-rule (rule-name rule rules)
|
||||
(setf (gethash rule-name rules) rule))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(declaim (inline make-keywords-table find-keyword add-keyword))
|
||||
|
||||
(defun make-keywords-table ()
|
||||
(make-hash-table :test 'equal))
|
||||
|
||||
(defun find-keyword (keyword-name keywords)
|
||||
(gethash keyword-name keywords))
|
||||
|
||||
(defun add-keyword (keyword keywords)
|
||||
(setf (gethash keyword keywords) t))
|
408
third_party/lisp/npg/src/define.lisp
vendored
Normal file
408
third_party/lisp/npg/src/define.lisp
vendored
Normal file
|
@ -0,0 +1,408 @@
|
|||
;;; define.lisp --- grammar rules definition
|
||||
|
||||
;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero
|
||||
|
||||
;;; Author: Walter C. Pelissero <walter@pelissero.de>
|
||||
;;; Project: NPG a Naive Parser Generator
|
||||
|
||||
#+cmu (ext:file-comment "$Module: define.lisp $")
|
||||
|
||||
;;; This library is free software; you can redistribute it and/or
|
||||
;;; modify it under the terms of the GNU Lesser General Public License
|
||||
;;; as published by the Free Software Foundation; either version 2.1
|
||||
;;; of the License, or (at your option) any later version.
|
||||
;;; This library is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;; Lesser General Public License for more details.
|
||||
;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;; License along with this library; if not, write to the Free
|
||||
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
;;; 02111-1307 USA
|
||||
|
||||
(in-package :naive-parser-generator)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(defvar *smart-default-reduction* t
|
||||
"If true the default reductions take only the non-static tokens -
|
||||
those that are not declared as strings in the grammar.")
|
||||
|
||||
;; These two are filled with DEFRULE.
|
||||
(defvar *rules* (make-rules-table))
|
||||
(defvar *keywords* (make-keywords-table))
|
||||
|
||||
(defun make-action-arguments (tokens)
|
||||
"Given a list of tokens making up a production, return three values:
|
||||
the list of variables for the function reducing this production, those
|
||||
that are non static and their unambiguous user-friendly names."
|
||||
(flet ((unique (sym list)
|
||||
(if (not (assoc sym list))
|
||||
sym
|
||||
(loop
|
||||
for i of-type fixnum from 2
|
||||
for x = (intern (format nil "~:@(~A~)~A" sym i))
|
||||
while (assoc x list)
|
||||
finally (return x)))))
|
||||
(loop
|
||||
for tok in tokens
|
||||
for i of-type fixnum from 1
|
||||
for arg = (intern (format nil "$~A" i) (find-package #.*package*))
|
||||
collect arg into args
|
||||
unless (const-terminal-p tok)
|
||||
collect arg into vars
|
||||
and when (symbolp tok)
|
||||
collect (list (unique tok named-vars) arg) into named-vars
|
||||
when (and (listp tok)
|
||||
(symbolp (cadr tok)))
|
||||
collect (list (unique (cadr tok) named-vars) arg) into named-vars
|
||||
finally
|
||||
(return (values args vars named-vars)))))
|
||||
|
||||
(defun make-action-function (name tokens action)
|
||||
"Create a function with name NAME, arguments derived from TOKENS and
|
||||
body ACTION. Return it's definition."
|
||||
(let ((function
|
||||
(multiple-value-bind (args vars named-vars)
|
||||
(make-action-arguments tokens)
|
||||
`(lambda ,args
|
||||
(declare (ignorable ,@args))
|
||||
(let (($vars (list ,@vars))
|
||||
($all (list ,@args))
|
||||
,@named-vars
|
||||
($alist (list ,@(mapcar #'(lambda (v)
|
||||
`(cons ',(intern (symbol-name (car v)))
|
||||
,(cadr v)))
|
||||
named-vars))))
|
||||
(declare (ignorable $vars $all $alist ,@(mapcar #'car named-vars)))
|
||||
(flet ((make-object (&optional type args)
|
||||
(apply #'make-instance (or type ',name)
|
||||
(append args $alist))))
|
||||
,action))))))
|
||||
(when *compile-print*
|
||||
(if *compile-verbose*
|
||||
(format t "; Compiling ~S:~% ~S~%" name function)
|
||||
(format t "; Compiling ~S~%" name)))
|
||||
(compile name function)))
|
||||
|
||||
(defun define-rule (name productions)
|
||||
"Accept a rule in EBNF-like syntax, translate it into a sexp and a
|
||||
call to INSERT-RULE-IN-CURRENT-GRAMMAR."
|
||||
(flet ((transform (productions)
|
||||
(loop
|
||||
for tok in productions
|
||||
with prod = nil
|
||||
with action = nil
|
||||
with phase = nil
|
||||
with new-prods = nil
|
||||
while tok
|
||||
do (cond ((eq tok :=)
|
||||
(push (list (nreverse prod) action) new-prods)
|
||||
(setf prod nil
|
||||
action nil
|
||||
phase :prod))
|
||||
((eq tok :reduce)
|
||||
(setf phase :action))
|
||||
((eq tok :tag)
|
||||
(setf phase :tag))
|
||||
((eq phase :tag)
|
||||
(setf action `(cons ,tok $vars)))
|
||||
((eq phase :action)
|
||||
(setf action tok))
|
||||
((eq phase :prod)
|
||||
(push tok prod)))
|
||||
finally
|
||||
(return (cdr (nreverse (cons (list (nreverse prod) action) new-prods)))))))
|
||||
(insert-rule-in-current-grammar name (transform productions))))
|
||||
|
||||
(defmacro defrule (name &rest productions)
|
||||
"Wrapper macro for DEFINE-RULE."
|
||||
`(define-rule ',name ',productions))
|
||||
|
||||
(defun make-optional-rule (token)
|
||||
"Make a rule for a possibly missing (non)terminal (? syntax) and
|
||||
return it."
|
||||
(insert-rule-in-current-grammar
|
||||
(gensym (concatenate 'string "OPT-"
|
||||
(if (rule-p token)
|
||||
(symbol-name (rule-name token))
|
||||
(string-upcase token))))
|
||||
`(((,token)) (()))))
|
||||
|
||||
(defun make-alternative-rule (tokens)
|
||||
"Make a rule for a list of alternatives (\"or\" syntax) and return it."
|
||||
(insert-rule-in-current-grammar
|
||||
(gensym "ALT")
|
||||
(mapcar #'(lambda (alternative)
|
||||
`((,alternative)))
|
||||
tokens)))
|
||||
|
||||
(defun make-nonempty-list-rule (token &optional separator)
|
||||
"Make a rule for a non-empty list (+ syntax) and return it."
|
||||
(let ((rule-name (gensym (concatenate 'string "NELST-"
|
||||
(if (rule-p token)
|
||||
(symbol-name (rule-name token))
|
||||
(string-upcase token))))))
|
||||
(insert-rule-in-current-grammar
|
||||
rule-name
|
||||
(if separator
|
||||
`(((,token ,separator ,rule-name)
|
||||
(cons $1 $3))
|
||||
((,token) ,#'list))
|
||||
`(((,token ,rule-name)
|
||||
(cons $1 $2))
|
||||
((,token) ,#'list))))))
|
||||
|
||||
(defun make-list-rule (token &optional separator)
|
||||
"Make a rule for a possibly empty list (* syntax) return it."
|
||||
(make-optional-rule (make-nonempty-list-rule token separator)))
|
||||
|
||||
(defun const-terminal-p (object)
|
||||
(or (stringp object)
|
||||
(keywordp object)))
|
||||
|
||||
(defun expand-production-token (tok)
|
||||
"Translate token of the type NAME? or NAME* or NAME+ into (? NAME)
|
||||
or (* NAME) or (+ NAME). This is used by the DEFRULE macro."
|
||||
(if (symbolp tok)
|
||||
(let* ((name (symbol-name tok))
|
||||
(last (char name (1- (length name))))
|
||||
;; this looks silly but we need to make sure that we
|
||||
;; return symbols interned in this package, no one else
|
||||
(op (cadr (assoc last '((#\? ?) (#\+ +) (#\* *))))))
|
||||
(if (and (> (length name) 1) op)
|
||||
(list op
|
||||
(intern (subseq name 0 (1- (length name)))))
|
||||
tok))
|
||||
tok))
|
||||
|
||||
(defun EBNF-to-SEBNF (tokens)
|
||||
"Take a production as a list of TOKENS and expand it. This turns a
|
||||
EBNF syntax into a sexp-based EBNF syntax or SEBNF."
|
||||
(loop
|
||||
for tok in tokens
|
||||
for token = (expand-production-token tok)
|
||||
with new-tokens = '()
|
||||
do (cond ((member token '(* + ?))
|
||||
(setf (car new-tokens)
|
||||
(list token (car new-tokens))))
|
||||
(t
|
||||
(push token new-tokens)))
|
||||
finally (return (nreverse new-tokens))))
|
||||
|
||||
(defun SEBNF-to-BNF (tokens)
|
||||
"Take a production in SEBNF (Symbolic Extended BNF) syntax and turn
|
||||
it into BNF. The production is simplified but the current grammar is
|
||||
populated with additional rules."
|
||||
(flet ((make-complex-token-rule (tok)
|
||||
(ecase (car tok)
|
||||
(* (apply #'make-list-rule (cdr tok)))
|
||||
(+ (apply #'make-nonempty-list-rule (cdr tok)))
|
||||
(? (make-optional-rule (cadr tok)))
|
||||
(or (make-alternative-rule (cdr tok))))))
|
||||
(loop
|
||||
for token in tokens
|
||||
with new-tokens = '()
|
||||
with keywords = '()
|
||||
do (cond ((listp token)
|
||||
(push (make-complex-token-rule token) new-tokens))
|
||||
(t
|
||||
(push token new-tokens)
|
||||
(when (const-terminal-p token)
|
||||
(push token keywords))))
|
||||
finally (return (values (nreverse new-tokens) keywords)))))
|
||||
|
||||
(defun make-default-action-function (name tokens)
|
||||
"Create a sexp to be used as default action in case one is not
|
||||
supplied in the production. This is usually a quite sensible
|
||||
one. That is, only the non-constant tokens are returned in a
|
||||
list and in case only a variable token is available that one is
|
||||
returned (not included in a list). If all the tokens are
|
||||
constant, then all of them are returned in a list."
|
||||
(cond ((null tokens)
|
||||
;; if the production matched the empty list (no tokens) we
|
||||
;; return always nil, that is the function LIST applied to no
|
||||
;; arguments
|
||||
#'list)
|
||||
((null (cdr tokens))
|
||||
;; if the production matches just one token we simply return
|
||||
;; that
|
||||
#'identity)
|
||||
(*smart-default-reduction*
|
||||
;; If we are required to be "smart" then create a function
|
||||
;; that simply returns the non static tokens of the
|
||||
;; production. If the production doesn't have nonterminal,
|
||||
;; then return all the tokens. If the production has only
|
||||
;; one argument then return that one only.
|
||||
(make-action-function name tokens '(cond
|
||||
((null $vars) $all)
|
||||
((null (cdr $vars)) (car $vars))
|
||||
(t $vars))))
|
||||
(t
|
||||
;; in all the other cases we return all the token matching
|
||||
;; the production
|
||||
#'list)))
|
||||
|
||||
(defun make-production-from-descr (name production-description)
|
||||
"Take a production NAME and its description in the form of a sexp
|
||||
and return a production structure object together with a list of used
|
||||
keywords."
|
||||
(destructuring-bind (tokens &optional action) production-description
|
||||
(let ((expanded-tokens (EBNF-to-SEBNF tokens)))
|
||||
(multiple-value-bind (production-tokens keywords)
|
||||
(sebnf-to-bnf expanded-tokens)
|
||||
(let ((funct
|
||||
(cond ((not action)
|
||||
(make-default-action-function name expanded-tokens))
|
||||
((or (listp action)
|
||||
;; the case when the action is simply to
|
||||
;; return a token (ie $2) or a constant value
|
||||
(symbolp action))
|
||||
(make-action-function name expanded-tokens action))
|
||||
((functionp action)
|
||||
action)
|
||||
(t ; action is a constant
|
||||
#'(lambda (&rest args)
|
||||
(declare (ignore args))
|
||||
action)))))
|
||||
(values
|
||||
;; Make a promise instead of actually resolving the
|
||||
;; nonterminals. This avoids endless recursion.
|
||||
(make-production :tokens production-tokens
|
||||
:tokens-length (length production-tokens)
|
||||
:action funct)
|
||||
keywords))))))
|
||||
|
||||
(defun remove-immediate-left-recursivity (rule)
|
||||
"Turn left recursive rules of the type
|
||||
A -> A x | y
|
||||
into
|
||||
A -> y A2
|
||||
A2 -> x A2 | E
|
||||
where E is the empty production."
|
||||
(let ((name (rule-name rule))
|
||||
(productions (rule-productions rule)))
|
||||
(loop
|
||||
for prod in productions
|
||||
for tokens = (prod-tokens prod)
|
||||
;; when immediately left recursive
|
||||
when (eq (car tokens) rule)
|
||||
collect prod into left-recursive
|
||||
else
|
||||
collect prod into non-left-recursive
|
||||
finally
|
||||
;; found any left recursive production?
|
||||
(when left-recursive
|
||||
(warn "rule ~S is left recursive" name)
|
||||
(let ((new-rule (make-rule :name (gensym "REWRITE"))))
|
||||
;; A -> y A2
|
||||
(setf (rule-productions rule)
|
||||
(mapcar #'(lambda (p)
|
||||
(let ((tokens (prod-tokens p))
|
||||
(action (prod-action p)))
|
||||
(make-production :tokens (append tokens (list new-rule))
|
||||
:tokens-length (1+ (prod-tokens-length p))
|
||||
:action #'(lambda (&rest args)
|
||||
(let ((f-A2 (car (last args)))
|
||||
(head (butlast args)))
|
||||
(funcall f-A2 (apply action head)))))))
|
||||
non-left-recursive))
|
||||
;; A2 -> x A2 | E
|
||||
(setf (rule-productions new-rule)
|
||||
(append
|
||||
(mapcar #'(lambda (p)
|
||||
(let ((tokens (prod-tokens p))
|
||||
(action (prod-action p)))
|
||||
(make-production :tokens (append (cdr tokens) (list new-rule))
|
||||
:tokens-length (prod-tokens-length p)
|
||||
:action #'(lambda (&rest args)
|
||||
(let ((f-A2 (car (last args)))
|
||||
(head (butlast args)))
|
||||
#'(lambda (x)
|
||||
(funcall f-A2 (apply action x head))))))))
|
||||
left-recursive)
|
||||
(list
|
||||
(make-production :tokens nil
|
||||
:tokens-length 0
|
||||
:action #'(lambda () #'(lambda (arg) arg)))))))))))
|
||||
|
||||
(defun remove-left-recursivity-from-rules (rules)
|
||||
(loop
|
||||
for rule being each hash-value in rules
|
||||
do
|
||||
;; More to be done here. For now only the trivial immediate left
|
||||
;; recursivity is removed -wcp18/11/03.
|
||||
(remove-immediate-left-recursivity rule)))
|
||||
|
||||
(defun resolve-all-nonterminals (rules)
|
||||
(loop
|
||||
for rule being each hash-value in rules
|
||||
do (loop
|
||||
for production in (rule-productions rule)
|
||||
do (setf (prod-tokens production)
|
||||
(resolve-nonterminals (prod-tokens production) rules)))))
|
||||
|
||||
(defun make-rule-productions (rule-name production-descriptions)
|
||||
"Return a production object that belongs to RULE-NAME made according
|
||||
to PRODUCTION-DESCRIPTIONS. See also MAKE-PRODUCTION-FROM-DESCR."
|
||||
(loop
|
||||
for descr in production-descriptions
|
||||
for i of-type fixnum from 1 by 1
|
||||
for prod-name = (intern (format nil "~:@(~A~)-PROD~A" rule-name i))
|
||||
with productions = '()
|
||||
with keywords = '()
|
||||
do (progn
|
||||
(multiple-value-bind (production keyws)
|
||||
(make-production-from-descr prod-name descr)
|
||||
(push production productions)
|
||||
(setf keywords (append keyws keywords))))
|
||||
finally (return
|
||||
(values (nreverse productions) keywords))))
|
||||
|
||||
(defun create-rule (name production-descriptions)
|
||||
"Return a new rule object together with a list of keywords making up
|
||||
the production definitions."
|
||||
(multiple-value-bind (productions keywords)
|
||||
(make-rule-productions name production-descriptions)
|
||||
(values (make-rule :name name :productions productions)
|
||||
keywords)))
|
||||
|
||||
(defun insert-rule-in-current-grammar (name productions)
|
||||
"Add rule to the current grammar and its keywords to the keywords
|
||||
hash table. You don't want to use this directly. See DEFRULE macro
|
||||
instead."
|
||||
(when (find-rule name *rules*)
|
||||
(error "redefining rule ~A" name))
|
||||
(multiple-value-bind (rule keywords)
|
||||
(create-rule name productions)
|
||||
(add-rule name rule *rules*)
|
||||
(dolist (term keywords)
|
||||
(add-keyword term *keywords*))
|
||||
rule))
|
||||
|
||||
(defun resolve-nonterminals (tokens rules)
|
||||
"Given a list of production tokens, try to expand the nonterminal
|
||||
ones with their respective rule from the the RULES pool."
|
||||
(flet ((resolve-symbol (sym)
|
||||
(or (find-rule sym rules)
|
||||
sym)))
|
||||
(mapcar #'(lambda (tok)
|
||||
(if (symbolp tok)
|
||||
(resolve-symbol tok)
|
||||
tok))
|
||||
tokens)))
|
||||
|
||||
(defun reset-grammar ()
|
||||
"Empty the current grammar from any existing rule."
|
||||
(setf *rules* (make-rules-table)
|
||||
*keywords* (make-keywords-table)))
|
||||
|
||||
(defun generate-grammar (&optional (equal-p #'string-equal))
|
||||
"Return a GRAMMAR structure suitable for the PARSE function, using
|
||||
the current rules. EQUAL-P, if present, is a function to be used to
|
||||
match the input tokens; it defaults to STRING-EQUAL."
|
||||
(resolve-all-nonterminals *rules*)
|
||||
(remove-left-recursivity-from-rules *rules*)
|
||||
(make-grammar :rules *rules*
|
||||
:keywords *keywords*
|
||||
:equal-p equal-p))
|
50
third_party/lisp/npg/src/package.lisp
vendored
Normal file
50
third_party/lisp/npg/src/package.lisp
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
;;; package.lisp --- backtracking parser package definition
|
||||
|
||||
;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero
|
||||
|
||||
;;; Author: Walter C. Pelissero <walter@pelissero.de>
|
||||
;;; Project: NPG a Naive Parser Generator
|
||||
|
||||
#+cmu (ext:file-comment "$Module: package.lisp $")
|
||||
|
||||
;;; This library is free software; you can redistribute it and/or
|
||||
;;; modify it under the terms of the GNU Lesser General Public License
|
||||
;;; as published by the Free Software Foundation; either version 2.1
|
||||
;;; of the License, or (at your option) any later version.
|
||||
;;; This library is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;; Lesser General Public License for more details.
|
||||
;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;; License along with this library; if not, write to the Free
|
||||
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
;;; 02111-1307 USA
|
||||
|
||||
(in-package :cl-user)
|
||||
|
||||
(defpackage :naive-parser-generator
|
||||
(:nicknames :npg)
|
||||
(:use :common-lisp)
|
||||
(:export
|
||||
#:parse ; The Parser
|
||||
#:reset-grammar
|
||||
#:generate-grammar
|
||||
#:print-grammar-figures
|
||||
#:grammar-keyword-p
|
||||
#:keyword
|
||||
#:grammar
|
||||
#:make-token
|
||||
#:token-value
|
||||
#:token-type
|
||||
#:token-position
|
||||
#:later-position
|
||||
#:defrule ; to define grammars
|
||||
#:deftoken ; to define a lexer
|
||||
#:input-cursor-mixin
|
||||
#:copy-input-cursor-slots
|
||||
#:dup-input-cursor
|
||||
#:read-next-tokens
|
||||
#:end-of-input
|
||||
#:? #:+ #:* #:or
|
||||
#:$vars #:$all #:$alist
|
||||
#:$1 #:$2 #:$3 #:$4 #:$5 #:$6 #:$7 #:$8 #:$9 #:$10))
|
234
third_party/lisp/npg/src/parser.lisp
vendored
Normal file
234
third_party/lisp/npg/src/parser.lisp
vendored
Normal file
|
@ -0,0 +1,234 @@
|
|||
;;; parser.lisp --- runtime parser
|
||||
|
||||
;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero
|
||||
|
||||
;;; Author: Walter C. Pelissero <walter@pelissero.de>
|
||||
;;; Project: NPG a Naive Parser Generator
|
||||
|
||||
#+cmu (ext:file-comment "$Module: parser.lisp $")
|
||||
|
||||
;;; This library is free software; you can redistribute it and/or
|
||||
;;; modify it under the terms of the GNU Lesser General Public License
|
||||
;;; as published by the Free Software Foundation; either version 2.1
|
||||
;;; of the License, or (at your option) any later version.
|
||||
;;; This library is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;; Lesser General Public License for more details.
|
||||
;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;; License along with this library; if not, write to the Free
|
||||
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
;;; 02111-1307 USA
|
||||
|
||||
;;; Commentary:
|
||||
;;;
|
||||
;;; This is the runtime part of the parser. The code that is
|
||||
;;; responsible to execute the parser defined with the primitives
|
||||
;;; found in define.lisp.
|
||||
|
||||
(in-package :naive-parser-generator)
|
||||
|
||||
(defvar *debug* nil
|
||||
"Either nil or a stream where to write the debug informations.")
|
||||
#+debug (declaim (fixnum *maximum-recursion-depth*))
|
||||
#+debug (defvar *maximum-recursion-depth* 1000
|
||||
"Maximum depth the parser is allowed to recursively call itself.
|
||||
This is the only way for the parser to detect a loop in the grammar.
|
||||
Tune this if your grammar is unusually complex.")
|
||||
|
||||
(declaim (inline reduce-production))
|
||||
(defun reduce-production (production arguments)
|
||||
"Apply PRODUCTION's action on ARGUMENTS. This has the effect of
|
||||
\"reducing\" the production."
|
||||
(when *debug*
|
||||
(format *debug* "reducing ~S on ~S~%" production arguments))
|
||||
(flet ((safe-token-value (token)
|
||||
(if (token-p token)
|
||||
(token-value token)
|
||||
token)))
|
||||
(apply (prod-action production) (mapcar #'safe-token-value arguments))))
|
||||
|
||||
(defgeneric later-position (pos1 pos2)
|
||||
(:documentation
|
||||
"Compare two file postions and return true if POS1 is later than
|
||||
POS2 in the input stream."))
|
||||
|
||||
;; This is meant to be overloaded in the lexer
|
||||
(defmethod later-position ((pos1 integer) (pos2 integer))
|
||||
(> pos1 pos2))
|
||||
|
||||
;; this looks silly but turns out to be useful (see below)
|
||||
(defmethod later-position (pos1 pos2)
|
||||
(and (eq pos1 :eof) (not (eq pos2 :eof))))
|
||||
|
||||
(defgeneric read-next-tokens (tokens-source)
|
||||
(:documentation "Read next token from a lexical analysed stream. The nature of
|
||||
TOKENS-SOURCE is implementation dependent and any lexical analyzer is
|
||||
supposed to specialise this method."))
|
||||
|
||||
;; This is the actual parser. the algorithm is pretty
|
||||
;; straightforward, the execution of the reductions a bit less. Error
|
||||
;; recovery is rather clumsy.
|
||||
|
||||
(defun parse (grammar start tokenizer)
|
||||
"Match a GRAMMAR against the list of input tokens coming from TOKENIZER.
|
||||
Return the reduced values according to the nonterminal actions. Raise
|
||||
an error on failure."
|
||||
(declare (type grammar grammar)
|
||||
(type symbol start))
|
||||
(labels
|
||||
((match-token (expected token)
|
||||
(when *debug*
|
||||
(format *debug* "match-token ~S ~S -> " expected token))
|
||||
(let ((res (cond ((symbolp expected)
|
||||
;; non-costant terminal (like identifiers)
|
||||
(eq expected (token-type token)))
|
||||
((and (stringp expected)
|
||||
(stringp (token-value token)))
|
||||
;; string costant terminal
|
||||
(funcall (the function (grammar-equal-p grammar)) expected (token-value token)))
|
||||
((functionp expected)
|
||||
;; custom equality predicate (must be able
|
||||
;; to deal with token objects)
|
||||
(funcall expected token))
|
||||
;; all the rest
|
||||
(t (equal expected (token-value token))))))
|
||||
(when *debug*
|
||||
(format *debug* "~Amatched~%" (if res "" "not ")))
|
||||
res))
|
||||
(match (expected matched #+debug depth)
|
||||
(declare (list expected matched)
|
||||
#+debug (fixnum depth))
|
||||
(let ((first-expected (car expected)))
|
||||
(cond #+debug ((> depth *maximum-recursion-depth*)
|
||||
(error "endless recursion on ~A ~A at ~A expecting ~S"
|
||||
(token-type (car matched)) (token-value (car matched))
|
||||
(token-position (car matched)) expected))
|
||||
((eq first-expected :any)
|
||||
(match (cdr expected) (cdr matched) #+debug depth))
|
||||
;; This is a trick to obtain partial parses. When we
|
||||
;; reach this expected token we assume we succeeded
|
||||
;; the parsing and return the remaining tokens as
|
||||
;; part of the match.
|
||||
((eq first-expected :rest)
|
||||
;; we could be at the end of input so we check this
|
||||
(unless (cdr matched)
|
||||
(setf (cdr matched) (list :rest)))
|
||||
(list nil nil))
|
||||
((rule-p first-expected)
|
||||
;; If it's a rule, then we try to match all its
|
||||
;; productions. We return the first that succeeds.
|
||||
(loop
|
||||
for production in (rule-productions first-expected)
|
||||
for production-tokens of-type list = (prod-tokens production)
|
||||
with last-error-position = nil
|
||||
with last-error = nil
|
||||
for (error-position error-descr) =
|
||||
(progn
|
||||
(when *debug*
|
||||
(format *debug* "trying to match ~A: ~S~%"
|
||||
(rule-name first-expected) production-tokens))
|
||||
(match (append production-tokens (cdr expected)) matched #+debug (1+ depth)))
|
||||
do (cond ((not error-position)
|
||||
(return (let ((args-count (prod-tokens-length production)))
|
||||
(setf (cdr matched)
|
||||
(cons (reduce-production
|
||||
production
|
||||
(subseq (the list (cdr matched)) 0 args-count))
|
||||
(nthcdr (1+ args-count) matched)))
|
||||
(list nil nil))))
|
||||
((or (not last-error)
|
||||
(later-position error-position last-error-position))
|
||||
(setf last-error-position error-position
|
||||
last-error error-descr)))
|
||||
;; if everything fails return the "best" error
|
||||
finally (return (list last-error-position
|
||||
(if *debug*
|
||||
#'(lambda ()
|
||||
(format nil "~A, trying to match ~A"
|
||||
(funcall (the function last-error))
|
||||
(rule-name first-expected)))
|
||||
last-error)))))
|
||||
(t
|
||||
;; if necessary load the next tokens
|
||||
(when (null (cdr matched))
|
||||
(setf (cdr matched) (read-next-tokens tokenizer)))
|
||||
(cond ((and (or (null expected) (eq first-expected :eof))
|
||||
(null (cdr matched)))
|
||||
;; This point is reached only once for each complete
|
||||
;; parsing. The expected tokens and the input
|
||||
;; tokens have been exhausted at the same time.
|
||||
;; Hence we succeeded the parsing.
|
||||
(setf (cdr matched) (list :eof))
|
||||
(list nil nil))
|
||||
((null expected)
|
||||
;; Garbage at end of parsing. This may mean that we
|
||||
;; have considered a production completed too soon.
|
||||
(list (token-position (car matched))
|
||||
#'(lambda ()
|
||||
"garbage at end of parsing")))
|
||||
((null (cdr matched))
|
||||
;; EOF error
|
||||
(list :eof
|
||||
#'(lambda ()
|
||||
(format nil "end of input expecting ~S" expected))))
|
||||
(t ;; normal token
|
||||
(let ((first-token (cadr matched)))
|
||||
(if (match-token first-expected first-token)
|
||||
(match (cdr expected) (cdr matched) #+debug depth)
|
||||
;; failed: we return the error
|
||||
(list (token-position first-token)
|
||||
#'(lambda ()
|
||||
(format nil "expected ~S but got ~S ~S"
|
||||
first-expected (token-type first-token)
|
||||
(token-value first-token)))))))))))))
|
||||
(declare (inline match-token))
|
||||
(let ((result (list :head)))
|
||||
(destructuring-bind (error-position error)
|
||||
(match (list (find-rule start (grammar-rules grammar))) result #+debug 0)
|
||||
(when error-position
|
||||
(error "~A at ~A~%" (funcall (the function error)) error-position))
|
||||
(cadr result)))))
|
||||
|
||||
(defgeneric terminals-in-grammar (grammar-or-hashtable)
|
||||
(:documentation
|
||||
"Find non constant terminal symbols in GRAMMAR."))
|
||||
|
||||
(defmethod terminals-in-grammar ((grammar hash-table))
|
||||
(loop
|
||||
for rule being each hash-value of grammar
|
||||
with terminals = '()
|
||||
do (loop
|
||||
for prod in (rule-productions rule)
|
||||
do (loop
|
||||
for tok in (prod-tokens prod)
|
||||
when (symbolp tok)
|
||||
do (pushnew tok terminals)))
|
||||
finally (return terminals)))
|
||||
|
||||
(defmethod terminals-in-grammar ((grammar grammar))
|
||||
(terminals-in-grammar (grammar-rules grammar)))
|
||||
|
||||
(defun print-grammar-figures (grammar &optional (stream *standard-output*))
|
||||
(format stream "rules: ~A~%constant terminals: ~A~%variable terminals: ~S~%"
|
||||
(hash-table-count (grammar-rules grammar))
|
||||
(hash-table-count (grammar-keywords grammar))
|
||||
(terminals-in-grammar (grammar-rules grammar))))
|
||||
|
||||
|
||||
(defun grammar-keyword-p (keyword grammar)
|
||||
"Check if KEYWORD is part of this grammar."
|
||||
(find-keyword keyword (grammar-keywords grammar)))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(defvar *grammars* (make-hash-table))
|
||||
|
||||
(defun find-grammar (name)
|
||||
(gethash name *grammars*))
|
||||
|
||||
(defun delete-grammar (name)
|
||||
(remhash name *grammars*))
|
||||
|
||||
(defun add-grammar (name grammar)
|
||||
(setf (gethash name *grammars*) grammar))
|
Loading…
Add table
Add a link
Reference in a new issue