437efa7686
Checked in the sources for this because it is tracked upstream in CVS and I can't be bothered to deal with that right now.
702 lines
31 KiB
Common Lisp
702 lines
31 KiB
Common Lisp
;;;; -*- mode: lisp -*-
|
|
;;;;
|
|
;;;; $Id: xml.lisp,v 1.15 2006/01/19 20:00:06 scaekenberghe Exp $
|
|
;;;;
|
|
;;;; This is a Common Lisp implementation of a basic but usable XML parser.
|
|
;;;; The parser is non-validating and not complete (no PI).
|
|
;;;; Namespace and entities are handled.
|
|
;;;; The API into the parser is a pure functional parser hook model that comes from SSAX,
|
|
;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net
|
|
;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one.
|
|
;;;;
|
|
;;;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Sven Van Caekenberghe, Beta Nine BVBA.
|
|
;;;;
|
|
;;;; You are granted the rights to distribute and use this software
|
|
;;;; as governed by the terms of the Lisp Lesser General Public License
|
|
;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
|
|
|
|
(in-package :s-xml)
|
|
|
|
;;; (tazjin): moved up here because something was wonky with the
|
|
;;; definition order
|
|
(defvar *ignore-namespaces* nil
|
|
"When t, namespaces are ignored like in the old version of S-XML")
|
|
|
|
;;; error reporting
|
|
|
|
(define-condition xml-parser-error (error)
|
|
((message :initarg :message :reader xml-parser-error-message)
|
|
(args :initarg :args :reader xml-parser-error-args)
|
|
(stream :initarg :stream :reader xml-parser-error-stream :initform nil))
|
|
(:report (lambda (condition stream)
|
|
(format stream
|
|
"XML parser ~?~@[ near stream position ~d~]."
|
|
(xml-parser-error-message condition)
|
|
(xml-parser-error-args condition)
|
|
(and (xml-parser-error-stream condition)
|
|
(file-position (xml-parser-error-stream condition))))))
|
|
(:documentation "Thrown by the XML parser to indicate errorneous input"))
|
|
|
|
(setf (documentation 'xml-parser-error-message 'function)
|
|
"Get the message from an XML parser error"
|
|
(documentation 'xml-parser-error-args 'function)
|
|
"Get the error arguments from an XML parser error"
|
|
(documentation 'xml-parser-error-stream 'function)
|
|
"Get the stream from an XML parser error")
|
|
|
|
(defun parser-error (message &optional args stream)
|
|
(make-condition 'xml-parser-error
|
|
:message message
|
|
:args args
|
|
:stream stream))
|
|
|
|
;; attribute parsing hooks
|
|
;; this is a bit complicated, refer to the mailing lists for a more detailed explanation
|
|
|
|
(defun parse-attribute-name (string)
|
|
"Default parser for the attribute name"
|
|
(declare (special *namespaces*))
|
|
(resolve-identifier string *namespaces* t))
|
|
|
|
(defun parse-attribute-value (name string)
|
|
"Default parser for the attribute value"
|
|
(declare (ignore name)
|
|
(special *ignore-namespace*))
|
|
(if *ignore-namespaces*
|
|
(copy-seq string)
|
|
string))
|
|
|
|
(defparameter *attribute-name-parser* #'parse-attribute-name
|
|
"Called to compute interned attribute name from a buffer that will be reused")
|
|
|
|
(defparameter *attribute-value-parser* #'parse-attribute-value
|
|
"Called to compute an element of an attribute list from a buffer that will be reused")
|
|
|
|
;;; utilities
|
|
|
|
(defun whitespace-char-p (char)
|
|
"Is char an XML whitespace character ?"
|
|
(declare (type character char))
|
|
(or (char= char #\space)
|
|
(char= char #\tab)
|
|
(char= char #\return)
|
|
(char= char #\linefeed)))
|
|
|
|
(defun identifier-char-p (char)
|
|
"Is char an XML identifier character ?"
|
|
(declare (type character char))
|
|
(or (and (char<= #\A char) (char<= char #\Z))
|
|
(and (char<= #\a char) (char<= char #\z))
|
|
(and (char<= #\0 char) (char<= char #\9))
|
|
(char= char #\-)
|
|
(char= char #\_)
|
|
(char= char #\.)
|
|
(char= char #\:)))
|
|
|
|
(defun skip-whitespace (stream)
|
|
"Skip over XML whitespace in stream, return first non-whitespace
|
|
character which was peeked but not read, return nil on eof"
|
|
(loop
|
|
(let ((char (peek-char nil stream nil #\Null)))
|
|
(declare (type character char))
|
|
(if (whitespace-char-p char)
|
|
(read-char stream)
|
|
(return char)))))
|
|
|
|
(defun make-extendable-string (&optional (size 10))
|
|
"Make an extendable string which is a one-dimensional character
|
|
array which is adjustable and has a fill pointer"
|
|
(make-array size
|
|
:element-type 'character
|
|
:adjustable t
|
|
:fill-pointer 0))
|
|
|
|
(defun print-string-xml (string stream &key (start 0) end)
|
|
"Write the characters of string to stream using basic XML conventions"
|
|
(loop for offset upfrom start below (or end (length string))
|
|
for char = (char string offset)
|
|
do (case char
|
|
(#\& (write-string "&" stream))
|
|
(#\< (write-string "<" stream))
|
|
(#\> (write-string ">" stream))
|
|
(#\" (write-string """ stream))
|
|
((#\newline #\return #\tab) (write-char char stream))
|
|
(t (if (and (<= 32 (char-code char))
|
|
(<= (char-code char) 126))
|
|
(write-char char stream)
|
|
(progn
|
|
(write-string "&#x" stream)
|
|
(write (char-code char) :stream stream :base 16)
|
|
(write-char #\; stream)))))))
|
|
|
|
(defun make-standard-entities ()
|
|
"A hashtable mapping XML entity names to their replacement strings,
|
|
filled with the standard set"
|
|
(let ((entities (make-hash-table :test #'equal)))
|
|
(setf (gethash "amp" entities) (string #\&)
|
|
(gethash "quot" entities) (string #\")
|
|
(gethash "apos" entities) (string #\')
|
|
(gethash "lt" entities) (string #\<)
|
|
(gethash "gt" entities) (string #\>)
|
|
(gethash "nbsp" entities) (string #\space))
|
|
entities))
|
|
|
|
(defun resolve-entity (stream extendable-string entities entity)
|
|
"Read and resolve an XML entity from stream, positioned after the '&' entity marker,
|
|
accepting &name; &#DEC; and &#xHEX; formats,
|
|
destructively modifying string, which is also returned,
|
|
destructively modifying entity, incorrect entity formats result in errors"
|
|
(declare (type (vector character) entity))
|
|
(loop
|
|
(let ((char (read-char stream nil #\Null)))
|
|
(declare (type character char))
|
|
(cond ((char= char #\Null) (error (parser-error "encountered eof before end of entity")))
|
|
((char= #\; char) (return))
|
|
(t (vector-push-extend char entity)))))
|
|
(if (char= (char entity 0) #\#)
|
|
(let ((code (if (char= (char entity 1) #\x)
|
|
(parse-integer entity :start 2 :radix 16 :junk-allowed t)
|
|
(parse-integer entity :start 1 :radix 10 :junk-allowed t))))
|
|
(when (null code)
|
|
(error (parser-error "encountered incorrect entity &~s;" (list entity) stream)))
|
|
(vector-push-extend (code-char code) extendable-string))
|
|
(let ((value (gethash entity entities)))
|
|
(if value
|
|
(loop :for char :across value
|
|
:do (vector-push-extend char extendable-string))
|
|
(error (parser-error "encountered unknown entity &~s;" (list entity) stream)))))
|
|
extendable-string)
|
|
|
|
;;; namespace support
|
|
|
|
(defclass xml-namespace ()
|
|
((uri :documentation "The URI used to identify this namespace"
|
|
:accessor get-uri
|
|
:initarg :uri)
|
|
(prefix :documentation "The preferred prefix assigned to this namespace"
|
|
:accessor get-prefix
|
|
:initarg :prefix
|
|
:initform nil)
|
|
(package :documentation "The Common Lisp package where this namespace's symbols are interned"
|
|
:accessor get-package
|
|
:initarg :package
|
|
:initform nil))
|
|
(:documentation "Describes an XML namespace and how it is handled"))
|
|
|
|
(setf (documentation 'get-uri 'function)
|
|
"The URI used to identify this namespace"
|
|
(documentation 'get-prefix 'function)
|
|
"The preferred prefix assigned to this namespace"
|
|
(documentation 'get-package 'function)
|
|
"The Common Lisp package where this namespace's symbols are interned")
|
|
|
|
(defmethod print-object ((object xml-namespace) stream)
|
|
(print-unreadable-object (object stream :type t :identity t)
|
|
(format stream "~A - ~A" (get-prefix object) (get-uri object))))
|
|
|
|
(defvar *local-namespace* (make-instance 'xml-namespace
|
|
:uri "local"
|
|
:prefix ""
|
|
:package (find-package :keyword))
|
|
"The local (global default) XML namespace")
|
|
|
|
(defvar *xml-namespace* (make-instance 'xml-namespace
|
|
:uri "http://www.w3.org/XML/1998/namespace"
|
|
:prefix "xml"
|
|
:package (or (find-package :xml)
|
|
(make-package :xml :nicknames '("XML"))))
|
|
"REC-xml-names-19990114 says the prefix xml is bound to the namespace http://www.w3.org/XML/1998/namespace.")
|
|
|
|
(defvar *known-namespaces* (list *local-namespace* *xml-namespace*)
|
|
"The list of known/defined namespaces")
|
|
|
|
(defvar *namespaces* `(("xml" . ,*xml-namespace*) ("" . ,*local-namespace*))
|
|
"Ordered list of (prefix . XML-namespace) bindings currently in effect - special variable")
|
|
|
|
(defun find-namespace (uri)
|
|
"Find a registered XML namespace identified by uri"
|
|
(find uri *known-namespaces* :key #'get-uri :test #'string-equal))
|
|
|
|
(defun register-namespace (uri prefix package)
|
|
"Register a new or redefine an existing XML namespace defined by uri with prefix and package"
|
|
(let ((namespace (find-namespace uri)))
|
|
(if namespace
|
|
(setf (get-prefix namespace) prefix
|
|
(get-package namespace) (find-package package))
|
|
(push (setf namespace (make-instance 'xml-namespace
|
|
:uri uri
|
|
:prefix prefix
|
|
:package (find-package package)))
|
|
*known-namespaces*))
|
|
namespace))
|
|
|
|
(defun find-namespace-binding (prefix namespaces)
|
|
"Find the XML namespace currently bound to prefix in the namespaces bindings"
|
|
(cdr (assoc prefix namespaces :test #'string-equal)))
|
|
|
|
(defun split-identifier (identifier)
|
|
"Split an identifier 'prefix:name' and return (values prefix name)"
|
|
(when (symbolp identifier)
|
|
(setf identifier (symbol-name identifier)))
|
|
(let ((colon-position (position #\: identifier :test #'char=)))
|
|
(if colon-position
|
|
(values (subseq identifier 0 colon-position)
|
|
(subseq identifier (1+ colon-position)))
|
|
(values nil identifier))))
|
|
|
|
(defvar *require-existing-symbols* nil
|
|
"If t, each XML identifier must exist as symbol already")
|
|
|
|
(defvar *auto-export-symbols* t
|
|
"If t, export newly interned symbols form their packages")
|
|
|
|
(defun resolve-identifier (identifier namespaces &optional as-attribute)
|
|
"Resolve the string identifier in the list of namespace bindings"
|
|
(if *ignore-namespaces*
|
|
(intern identifier :keyword)
|
|
(flet ((intern-symbol (string package) ; intern string as a symbol in package
|
|
(if *require-existing-symbols*
|
|
(let ((symbol (find-symbol string package)))
|
|
(or symbol
|
|
(error "Symbol ~s does not exist in ~s" string package)))
|
|
(let ((symbol (intern string package)))
|
|
(when (and *auto-export-symbols*
|
|
(not (eql package (find-package :keyword))))
|
|
(export symbol package))
|
|
symbol))))
|
|
(multiple-value-bind (prefix name)
|
|
(split-identifier identifier)
|
|
(if (or (null prefix) (string= prefix "xmlns"))
|
|
(if as-attribute
|
|
(intern (if (string= prefix "xmlns") identifier name) (get-package *local-namespace*))
|
|
(let ((default-namespace (find-namespace-binding "" namespaces)))
|
|
(intern-symbol name (get-package default-namespace))))
|
|
(let ((namespace (find-namespace-binding prefix namespaces)))
|
|
(if namespace
|
|
(intern-symbol name (get-package namespace))
|
|
(error "namespace not found for prefix ~s" prefix))))))))
|
|
|
|
(defvar *auto-create-namespace-packages* t
|
|
"If t, new packages will be created for namespaces, if needed, named by the prefix")
|
|
|
|
(defun new-namespace (uri &optional prefix)
|
|
"Register a new namespace for uri and prefix, creating a package if necessary"
|
|
(if prefix
|
|
(register-namespace uri
|
|
prefix
|
|
(or (find-package prefix)
|
|
(if *auto-create-namespace-packages*
|
|
(make-package prefix :nicknames `(,(string-upcase prefix)))
|
|
(error "Cannot find or create package ~s" prefix))))
|
|
(let ((unique-name (loop :for i :upfrom 0
|
|
:do (let ((name (format nil "ns-~d" i)))
|
|
(when (not (find-package name))
|
|
(return name))))))
|
|
(register-namespace uri
|
|
unique-name
|
|
(if *auto-create-namespace-packages*
|
|
(make-package (string-upcase unique-name) :nicknames `(,unique-name))
|
|
(error "Cannot create package ~s" unique-name))))))
|
|
|
|
(defun extend-namespaces (attributes namespaces)
|
|
"Given possible 'xmlns[:prefix]' attributes, extend the namespaces bindings"
|
|
(unless *ignore-namespaces*
|
|
(let (default-namespace-uri)
|
|
(loop :for (key . value) :in attributes
|
|
:do (if (string= key "xmlns")
|
|
(setf default-namespace-uri value)
|
|
(multiple-value-bind (prefix name)
|
|
(split-identifier key)
|
|
(when (string= prefix "xmlns")
|
|
(let* ((uri value)
|
|
(prefix name)
|
|
(namespace (find-namespace uri)))
|
|
(unless namespace
|
|
(setf namespace (new-namespace uri prefix)))
|
|
(push `(,prefix . ,namespace) namespaces))))))
|
|
(when default-namespace-uri
|
|
(let ((namespace (find-namespace default-namespace-uri)))
|
|
(unless namespace
|
|
(setf namespace (new-namespace default-namespace-uri)))
|
|
(push `("" . ,namespace) namespaces)))))
|
|
namespaces)
|
|
|
|
(defun print-identifier (identifier stream &optional as-attribute)
|
|
"Print identifier on stream using namespace conventions"
|
|
(declare (ignore as-attribute) (special *namespaces*))
|
|
(if *ignore-namespaces*
|
|
(princ identifier stream)
|
|
(if (symbolp identifier)
|
|
(let ((package (symbol-package identifier))
|
|
(name (symbol-name identifier)))
|
|
(let* ((namespace (find package *known-namespaces* :key #'get-package))
|
|
(prefix (or (car (find namespace *namespaces* :key #'cdr))
|
|
(get-prefix namespace))))
|
|
(if (string= prefix "")
|
|
(princ name stream)
|
|
(format stream "~a:~a" prefix name))))
|
|
(princ identifier stream))))
|
|
|
|
;;; the parser state
|
|
|
|
(defclass xml-parser-state ()
|
|
((entities :documentation "A hashtable mapping XML entity names to their replacement stings"
|
|
:accessor get-entities
|
|
:initarg :entities
|
|
:initform (make-standard-entities))
|
|
(seed :documentation "The user seed object"
|
|
:accessor get-seed
|
|
:initarg :seed
|
|
:initform nil)
|
|
(buffer :documentation "The main reusable character buffer"
|
|
:accessor get-buffer
|
|
:initform (make-extendable-string))
|
|
(mini-buffer :documentation "The secondary, smaller reusable character buffer"
|
|
:accessor get-mini-buffer
|
|
:initform (make-extendable-string))
|
|
(new-element-hook :documentation "Called when new element starts"
|
|
;; Handle the start of a new xml element with name and attributes,
|
|
;; receiving seed from previous element (sibling or parent)
|
|
;; return seed to be used for first child (content)
|
|
;; or directly to finish-element-hook
|
|
:accessor get-new-element-hook
|
|
:initarg :new-element-hook
|
|
:initform #'(lambda (name attributes seed)
|
|
(declare (ignore name attributes))
|
|
seed))
|
|
(finish-element-hook :documentation "Called when element ends"
|
|
;; Handle the end of an xml element with name and attributes,
|
|
;; receiving parent-seed, the seed passed to us when this element started,
|
|
;; i.e. passed to our corresponding new-element-hook
|
|
;; and receiving seed from last child (content)
|
|
;; or directly from new-element-hook
|
|
;; return final seed for this element to next element (sibling or parent)
|
|
:accessor get-finish-element-hook
|
|
:initarg :finish-element-hook
|
|
:initform #'(lambda (name attributes parent-seed seed)
|
|
(declare (ignore name attributes parent-seed))
|
|
seed))
|
|
(text-hook :documentation "Called when text is found"
|
|
;; Handle text in string, found as contents,
|
|
;; receiving seed from previous element (sibling or parent),
|
|
;; return final seed for this element to next element (sibling or parent)
|
|
:accessor get-text-hook
|
|
:initarg :text-hook
|
|
:initform #'(lambda (string seed)
|
|
(declare (ignore string))
|
|
seed)))
|
|
(:documentation "The XML parser state passed along all code making up the parser"))
|
|
|
|
(setf (documentation 'get-seed 'function)
|
|
"Get the initial user seed of an XML parser state"
|
|
(documentation 'get-entities 'function)
|
|
"Get the entities hashtable of an XML parser state"
|
|
(documentation 'get-new-element-hook 'function)
|
|
"Get the new element hook of an XML parser state"
|
|
(documentation 'get-finish-element-hook 'function)
|
|
"Get the finish element hook of an XML parser state"
|
|
(documentation 'get-text-hook 'function)
|
|
"Get the text hook of an XML parser state")
|
|
|
|
#-allegro
|
|
(setf (documentation '(setf get-seed) 'function)
|
|
"Set the initial user seed of an XML parser state"
|
|
(documentation '(setf get-entities) 'function)
|
|
"Set the entities hashtable of an XML parser state"
|
|
(documentation '(setf get-new-element-hook) 'function)
|
|
"Set the new element hook of an XML parser state"
|
|
(documentation '(setf get-finish-element-hook) 'function)
|
|
"Set the finish element hook of an XML parser state"
|
|
(documentation '(setf get-text-hook) 'function)
|
|
"Set the text hook of an XML parser state")
|
|
|
|
(defmethod get-mini-buffer :after ((state xml-parser-state))
|
|
"Reset and return the reusable mini buffer"
|
|
(with-slots (mini-buffer) state
|
|
(setf (fill-pointer mini-buffer) 0)))
|
|
|
|
(defmethod get-buffer :after ((state xml-parser-state))
|
|
"Reset and return the main reusable buffer"
|
|
(with-slots (buffer) state
|
|
(setf (fill-pointer buffer) 0)))
|
|
|
|
;;; parser support
|
|
|
|
(defun parse-whitespace (stream extendable-string)
|
|
"Read and collect XML whitespace from stream in string which is
|
|
destructively modified, return first non-whitespace character which
|
|
was peeked but not read, return #\Null on eof"
|
|
(declare (type (vector character) extendable-string))
|
|
(loop
|
|
(let ((char (peek-char nil stream nil #\Null)))
|
|
(declare (type character char))
|
|
(if (whitespace-char-p char)
|
|
(vector-push-extend (read-char stream) extendable-string)
|
|
(return char)))))
|
|
|
|
(defun parse-string (stream state string)
|
|
"Read and return an XML string from stream, delimited by either
|
|
single or double quotes, the stream is expected to be on the opening
|
|
delimiter, at the end the closing delimiter is also read, entities
|
|
are resolved, eof before end of string is an error"
|
|
(declare (type (vector character) string))
|
|
(let ((delimiter (read-char stream nil #\Null))
|
|
(char #\Null))
|
|
(declare (type character delimiter char))
|
|
(unless (or (char= delimiter #\') (char= delimiter #\"))
|
|
(error (parser-error "expected string delimiter" nil stream)))
|
|
(loop
|
|
(setf char (read-char stream nil #\Null))
|
|
(cond ((char= char #\Null) (error (parser-error "encountered eof before end of string")))
|
|
((char= char delimiter) (return))
|
|
((char= char #\&) (resolve-entity stream string (get-entities state) (get-mini-buffer state)))
|
|
(t (vector-push-extend char string))))
|
|
string))
|
|
|
|
(defun parse-text (stream state extendable-string)
|
|
"Read and collect XML text from stream in string which is
|
|
destructively modified, the text ends with a '<', which is peeked and
|
|
returned, entities are resolved, eof is considered an error"
|
|
(declare (type (vector character) extendable-string))
|
|
(let ((char #\Null))
|
|
(declare (type character char))
|
|
(loop
|
|
(setf char (peek-char nil stream nil #\Null))
|
|
(when (char= char #\Null) (error (parser-error "encountered unexpected eof in text")))
|
|
(when (char= char #\<) (return))
|
|
(read-char stream)
|
|
(if (char= char #\&)
|
|
(resolve-entity stream extendable-string (get-entities state) (get-mini-buffer state))
|
|
(vector-push-extend char extendable-string)))
|
|
char))
|
|
|
|
(defun parse-identifier (stream identifier)
|
|
"Read and returns an XML identifier from stream, positioned at the
|
|
start of the identifier, ending with the first non-identifier
|
|
character, which is peeked, the identifier is written destructively
|
|
into identifier which is also returned"
|
|
(declare (type (vector character) identifier))
|
|
(loop
|
|
(let ((char (read-char stream nil #\Null)))
|
|
(declare (type character char))
|
|
(cond ((identifier-char-p char)
|
|
(vector-push-extend char identifier))
|
|
(t
|
|
(when (char/= char #\Null) (unread-char char stream))
|
|
(return identifier))))))
|
|
|
|
(defun skip-comment (stream)
|
|
"Skip an XML comment in stream, positioned after the opening '<!--',
|
|
consumes the closing '-->' sequence, unexpected eof or a malformed
|
|
closing sequence result in a error"
|
|
(let ((dashes-to-read 2))
|
|
(loop
|
|
(if (zerop dashes-to-read) (return))
|
|
(let ((char (read-char stream nil #\Null)))
|
|
(declare (type character char))
|
|
(if (char= char #\Null)
|
|
(error (parser-error "encountered unexpected eof for comment")))
|
|
(if (char= char #\-)
|
|
(decf dashes-to-read)
|
|
(setf dashes-to-read 2)))))
|
|
(if (char/= (read-char stream nil #\Null) #\>)
|
|
(error (parser-error "expected > ending comment" nil stream))))
|
|
|
|
(defun read-cdata (stream state string)
|
|
"Reads in the CDATA and calls the callback for CDATA if it exists"
|
|
;; we already read the <![CDATA[ stuff
|
|
;; continue to read until we hit ]]>
|
|
(let ((char #\space)
|
|
(last-3-characters (list #\[ #\A #\T))
|
|
(pattern (list #\> #\] #\])))
|
|
(declare (type character char))
|
|
(loop
|
|
(setf char (read-char stream nil #\Null))
|
|
(when (char= char #\Null) (error (parser-error "encountered unexpected eof in text")))
|
|
(push char last-3-characters)
|
|
(setf (cdddr last-3-characters) nil)
|
|
(cond
|
|
((equal last-3-characters
|
|
pattern)
|
|
(setf (fill-pointer string)
|
|
(- (fill-pointer string) 2))
|
|
(setf (get-seed state)
|
|
(funcall (get-text-hook state)
|
|
(copy-seq string)
|
|
(get-seed state)))
|
|
(return-from read-cdata))
|
|
(t
|
|
(vector-push-extend char string))))))
|
|
|
|
(defun skip-special-tag (stream state)
|
|
"Skip an XML special tag (comments and processing instructions) in
|
|
stream, positioned after the opening '<', unexpected eof is an error"
|
|
;; opening < has been read, consume ? or !
|
|
(read-char stream)
|
|
(let ((char (read-char stream nil #\Null)))
|
|
(declare (type character char))
|
|
;; see if we are dealing with a comment
|
|
(when (char= char #\-)
|
|
(setf char (read-char stream nil #\Null))
|
|
(when (char= char #\-)
|
|
(skip-comment stream)
|
|
(return-from skip-special-tag)))
|
|
;; maybe we are dealing with CDATA?
|
|
(when (and (char= char #\[)
|
|
(loop :for pattern :across "CDATA["
|
|
:for char = (read-char stream nil #\Null)
|
|
:when (char= char #\Null) :do
|
|
(error (parser-error "encountered unexpected eof in cdata"))
|
|
:always (char= char pattern)))
|
|
(read-cdata stream state (get-buffer state))
|
|
(return-from skip-special-tag))
|
|
;; loop over chars, dealing with strings (skipping their content)
|
|
;; and counting opening and closing < and > chars
|
|
(let ((taglevel 1)
|
|
(string-delimiter #\Null))
|
|
(declare (type character string-delimiter))
|
|
(loop
|
|
(when (zerop taglevel) (return))
|
|
(setf char (read-char stream nil #\Null))
|
|
(when (char= char #\Null)
|
|
(error (parser-error "encountered unexpected eof for special (! or ?) tag" nil stream)))
|
|
(if (char/= string-delimiter #\Null)
|
|
;; inside a string we only look for a closing string delimiter
|
|
(when (char= char string-delimiter)
|
|
(setf string-delimiter #\Null))
|
|
;; outside a string we count < and > and watch out for strings
|
|
(cond ((or (char= char #\') (char= char #\")) (setf string-delimiter char))
|
|
((char= char #\<) (incf taglevel))
|
|
((char= char #\>) (decf taglevel))))))))
|
|
|
|
;;; the XML parser proper
|
|
|
|
(defun parse-xml-element-attributes (stream state)
|
|
"Parse XML element attributes from stream positioned after the tag
|
|
identifier, returning the attributes as an assoc list, ending at
|
|
either a '>' or a '/' which is peeked and also returned"
|
|
(declare (special *namespaces*))
|
|
(let ((char #\Null) attributes)
|
|
(declare (type character char))
|
|
(loop
|
|
;; skip whitespace separating items
|
|
(setf char (skip-whitespace stream))
|
|
;; start tag attributes ends with > or />
|
|
(when (or (char= char #\>) (char= char #\/)) (return))
|
|
;; read the attribute key
|
|
(let ((key (let ((string (parse-identifier stream (get-mini-buffer state))))
|
|
(if *ignore-namespaces*
|
|
(funcall *attribute-name-parser* string)
|
|
(copy-seq string)))))
|
|
;; skip separating whitespace
|
|
(setf char (skip-whitespace stream))
|
|
;; require = sign (and consume it if present)
|
|
(if (char= char #\=)
|
|
(read-char stream)
|
|
(error (parser-error "expected =" nil stream)))
|
|
;; skip separating whitespace
|
|
(skip-whitespace stream)
|
|
;; read the attribute value as a string
|
|
(push (cons key (let ((string (parse-string stream state (get-buffer state))))
|
|
(if *ignore-namespaces*
|
|
(funcall *attribute-value-parser* key string)
|
|
(copy-seq string))))
|
|
attributes)))
|
|
;; return attributes peek char ending loop
|
|
(values attributes char)))
|
|
|
|
(defun parse-xml-element (stream state)
|
|
"Parse and return an XML element from stream, positioned after the opening '<'"
|
|
(declare (special *namespaces*))
|
|
;; opening < has been read
|
|
(when (char= (peek-char nil stream nil #\Null) #\!)
|
|
(skip-special-tag stream state)
|
|
(return-from parse-xml-element))
|
|
(let ((char #\Null) buffer open-tag parent-seed has-children)
|
|
(declare (type character char))
|
|
(setf parent-seed (get-seed state))
|
|
;; read tag name (no whitespace between < and name ?)
|
|
(setf open-tag (copy-seq (parse-identifier stream (get-mini-buffer state))))
|
|
;; tag has been read, read attributes if any
|
|
(multiple-value-bind (attributes peeked-char)
|
|
(parse-xml-element-attributes stream state)
|
|
(let ((*namespaces* (extend-namespaces attributes *namespaces*)))
|
|
(setf open-tag (resolve-identifier open-tag *namespaces*))
|
|
(unless *ignore-namespaces*
|
|
(dolist (attribute attributes)
|
|
(setf (car attribute) (funcall *attribute-name-parser* (car attribute))
|
|
(cdr attribute) (funcall *attribute-value-parser* (car attribute) (cdr attribute)))))
|
|
(setf (get-seed state) (funcall (get-new-element-hook state)
|
|
open-tag attributes (get-seed state)))
|
|
(setf char peeked-char)
|
|
(when (char= char #\/)
|
|
;; handle solitary tag of the form <tag .. />
|
|
(read-char stream)
|
|
(setf char (read-char stream nil #\Null))
|
|
(if (char= #\> char)
|
|
(progn
|
|
(setf (get-seed state) (funcall (get-finish-element-hook state)
|
|
open-tag attributes parent-seed (get-seed state)))
|
|
(return-from parse-xml-element))
|
|
(error (parser-error "expected >" nil stream))))
|
|
;; consume >
|
|
(read-char stream)
|
|
(loop
|
|
(setf buffer (get-buffer state))
|
|
;; read whitespace into buffer
|
|
(setf char (parse-whitespace stream buffer))
|
|
;; see what ended the whitespace scan
|
|
(cond ((char= char #\Null) (error (parser-error "encountered unexpected eof handling ~a"
|
|
(list open-tag))))
|
|
((char= char #\<)
|
|
;; consume the <
|
|
(read-char stream)
|
|
(if (char= (peek-char nil stream nil #\Null) #\/)
|
|
(progn
|
|
;; handle the matching closing tag </tag> and done
|
|
;; if we read whitespace as this (leaf) element's contents, it is significant
|
|
(when (and (not has-children) (plusp (length buffer)))
|
|
(setf (get-seed state) (funcall (get-text-hook state)
|
|
(copy-seq buffer) (get-seed state))))
|
|
(read-char stream)
|
|
(let ((close-tag (resolve-identifier (parse-identifier stream (get-mini-buffer state))
|
|
*namespaces*)))
|
|
(unless (eq open-tag close-tag)
|
|
(error (parser-error "found <~a> not matched by </~a> but by <~a>"
|
|
(list open-tag open-tag close-tag) stream)))
|
|
(unless (char= (read-char stream nil #\Null) #\>)
|
|
(error (parser-error "expected >" nil stream)))
|
|
(setf (get-seed state) (funcall (get-finish-element-hook state)
|
|
open-tag attributes parent-seed (get-seed state))))
|
|
(return))
|
|
;; handle child tag and loop, no hooks to call here
|
|
;; whitespace between child elements is skipped
|
|
(progn
|
|
(setf has-children t)
|
|
(parse-xml-element stream state))))
|
|
(t
|
|
;; no child tag, concatenate text to whitespace in buffer
|
|
;; handle text content and loop
|
|
(setf char (parse-text stream state buffer))
|
|
(setf (get-seed state) (funcall (get-text-hook state)
|
|
(copy-seq buffer) (get-seed state))))))))))
|
|
|
|
(defun start-parse-xml (stream &optional (state (make-instance 'xml-parser-state)))
|
|
"Parse and return a toplevel XML element from stream, using parser state"
|
|
(loop
|
|
(let ((char (skip-whitespace stream)))
|
|
(when (char= char #\Null) (return-from start-parse-xml))
|
|
;; skip whitespace until start tag
|
|
(unless (char= char #\<)
|
|
(error (parser-error "expected <" nil stream)))
|
|
(read-char stream) ; consume peeked char
|
|
(setf char (peek-char nil stream nil #\Null))
|
|
(if (or (char= char #\!) (char= char #\?))
|
|
;; deal with special tags
|
|
(skip-special-tag stream state)
|
|
(progn
|
|
;; read the main element
|
|
(parse-xml-element stream state)
|
|
(return-from start-parse-xml (get-seed state)))))))
|
|
|
|
;;;; eof
|