9d0b4818ce
Having #+cmu all over the place suggests that we maintain CMUCL support or test with CMUCL which is not the case. Change-Id: Ia0828cb1ac48e49acdee6fef7a0fa2c04c1805b3 Reviewed-on: https://cl.tvl.fyi/c/depot/+/5068 Tested-by: BuildkiteCI Reviewed-by: sterni <sternenseemann@systemli.org>
300 lines
10 KiB
Common Lisp
300 lines
10 KiB
Common Lisp
;;; address.lisp --- e-mail address parser
|
|
|
|
;;; Copyright (C) 2007, 2008, 2009 by Walter C. Pelissero
|
|
;;; Copyright (C) 2022 The TVL Authors
|
|
|
|
;;; Author: Walter C. Pelissero <walter@pelissero.de>
|
|
;;; Project: mime4cl
|
|
|
|
;;; This library is free software; you can redistribute it and/or
|
|
;;; modify it under the terms of the GNU Lesser General Public License
|
|
;;; as published by the Free Software Foundation; either version 2.1
|
|
;;; of the License, or (at your option) any later version.
|
|
;;; This library is distributed in the hope that it will be useful,
|
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
;;; Lesser General Public License for more details.
|
|
;;; You should have received a copy of the GNU Lesser General Public
|
|
;;; License along with this library; if not, write to the Free
|
|
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
;;; 02111-1307 USA
|
|
|
|
;;; Although not MIME specific, this parser is often useful together
|
|
;;; with the MIME primitives. It should be able to parse the address
|
|
;;; syntax described in RFC2822 excluding the obsolete syntax (see
|
|
;;; RFC822). Have a look at the test suite to get an idea of what
|
|
;;; kind of addresses it can parse.
|
|
|
|
(in-package :mime4cl)
|
|
|
|
(defstruct (mailbox (:conc-name mbx-))
|
|
description
|
|
user
|
|
host
|
|
domain)
|
|
|
|
(defstruct (mailbox-group (:conc-name mbxg-))
|
|
name
|
|
mailboxes)
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(defun write-mailbox-domain-name (addr &optional (stream *standard-output*))
|
|
(when (eq :internet (mbx-domain addr))
|
|
(write-char #\[ stream))
|
|
(write-string (mbx-host addr) stream)
|
|
(when (eq :internet (mbx-domain addr))
|
|
(write-char #\] stream))
|
|
(when (stringp (mbx-domain addr))
|
|
(write-char #\. stream)
|
|
(write-string (mbx-domain addr) stream)))
|
|
|
|
(defun write-mailbox-address (addr &optional (stream *standard-output*))
|
|
(write-string (mbx-user addr) stream)
|
|
(when (mbx-host addr)
|
|
(write-char #\@ stream)
|
|
(write-mailbox-domain-name addr stream)))
|
|
|
|
(defmethod mbx-domain-name ((MBX mailbox))
|
|
"Return the complete domain name string of MBX, in the form
|
|
\"host.domain\"."
|
|
(with-output-to-string (out)
|
|
(write-mailbox-domain-name mbx out)))
|
|
|
|
(defmethod mbx-address ((mbx mailbox))
|
|
"Return the e-mail address string of MBX, in the form
|
|
\"user@host.domain\"."
|
|
(with-output-to-string (out)
|
|
(write-mailbox-address mbx out)))
|
|
|
|
(defun write-mailbox (addr &optional (stream *standard-output*))
|
|
(awhen (mbx-description addr)
|
|
(write it :stream stream :readably t)
|
|
(write-string " <" stream))
|
|
(write-mailbox-address addr stream)
|
|
(awhen (mbx-description addr)
|
|
(write-char #\> stream)))
|
|
|
|
(defun write-mailbox-group (grp &optional (stream *standard-output*))
|
|
(write-string (mbxg-name grp) stream)
|
|
(write-string ": " stream)
|
|
(loop
|
|
for mailboxes on (mbxg-mailboxes grp)
|
|
for mailbox = (car mailboxes)
|
|
do (write-mailbox mailbox stream)
|
|
unless (endp (cdr mailboxes))
|
|
do (write-string ", " stream))
|
|
(write-char #\; stream))
|
|
|
|
(defmethod print-object ((mbx mailbox) stream)
|
|
(if (or *print-readably* *print-escape*)
|
|
(call-next-method)
|
|
(write-mailbox mbx stream)))
|
|
|
|
(defmethod print-object ((grp mailbox-group) stream)
|
|
(if (or *print-readably* *print-escape*)
|
|
(call-next-method)
|
|
(write-mailbox-group grp stream)))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(defun parser-make-mailbox (description address-list)
|
|
(make-mailbox :description description
|
|
:user (car address-list)
|
|
:host (cadr address-list)
|
|
:domain (when (cddr address-list)
|
|
(string-concat (cddr address-list) "."))))
|
|
|
|
|
|
(defun populate-grammar ()
|
|
(defrule address-list
|
|
:= (+ address ","))
|
|
|
|
(defrule address
|
|
:= mailbox
|
|
:= group)
|
|
|
|
(defrule mailbox
|
|
:= display-name? angle-addr comment?
|
|
:reduce (parser-make-mailbox (or display-name comment) angle-addr)
|
|
:= addr-spec comment?
|
|
:reduce (parser-make-mailbox comment addr-spec))
|
|
|
|
(defrule angle-addr
|
|
:= "<" addr-spec ">")
|
|
|
|
(defrule group
|
|
:= display-name ":" mailbox-list ";"
|
|
:reduce (make-mailbox-group :name display-name :mailboxes mailbox-list))
|
|
|
|
(defrule display-name
|
|
:= phrase
|
|
:reduce (string-concat phrase " "))
|
|
|
|
(defrule phrase
|
|
:= word+)
|
|
|
|
(defrule word
|
|
:= atext
|
|
:= string)
|
|
|
|
(defrule mailbox-list
|
|
:= (+ mailbox ","))
|
|
|
|
(defrule addr-spec
|
|
:= local-part "@" domain :reduce (cons local-part domain))
|
|
|
|
(defrule local-part
|
|
:= dot-atom :reduce (string-concat dot-atom ".")
|
|
:= string)
|
|
|
|
(defrule domain
|
|
:= dot-atom
|
|
:= domain-literal :reduce (list domain-literal :internet))
|
|
|
|
;; actually, according to the RFC, dot-atoms don't allow spaces in
|
|
;; between but these rules do
|
|
(defrule dot-atom
|
|
:= (+ atom "."))
|
|
|
|
(defrule atom
|
|
:= atext+
|
|
:reduce (apply #'concatenate 'string atext)))
|
|
|
|
(deflazy define-grammar
|
|
(let ((*package* #.*package*)
|
|
(*compile-print* (when npg::*debug* t)))
|
|
(reset-grammar)
|
|
(format t "~&creating e-mail address grammar...~%")
|
|
(populate-grammar)
|
|
(let ((grammar (npg:generate-grammar #'string=)))
|
|
(reset-grammar)
|
|
(npg:print-grammar-figures grammar)
|
|
grammar)))
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; The lexical analyser
|
|
|
|
(defstruct cursor
|
|
stream
|
|
(position 0))
|
|
|
|
(defun read-delimited-string (stream end-char &key nesting-start-char (escape-char #\\))
|
|
(labels ((collect ()
|
|
(with-output-to-string (out)
|
|
(loop
|
|
for c = (read-char stream nil)
|
|
while (and c (not (char= c end-char)))
|
|
do (cond ((char= c escape-char)
|
|
(awhen (read-char stream nil)
|
|
(write-char it out)))
|
|
((and nesting-start-char
|
|
(char= c nesting-start-char))
|
|
(write-char nesting-start-char out)
|
|
(write-string (collect) out)
|
|
(write-char end-char out))
|
|
(t (write-char c out)))))))
|
|
(collect)))
|
|
|
|
|
|
(defun read-string (cursor)
|
|
(make-token :type 'string
|
|
:value (read-delimited-string (cursor-stream cursor) #\")
|
|
:position (incf (cursor-position cursor))))
|
|
|
|
(defun read-domain-literal (cursor)
|
|
(make-token :type 'domain-literal
|
|
:value (read-delimited-string (cursor-stream cursor) #\])
|
|
:position (incf (cursor-position cursor))))
|
|
|
|
(defun read-comment (cursor)
|
|
(make-token :type 'comment
|
|
:value (read-delimited-string (cursor-stream cursor) #\) :nesting-start-char #\()
|
|
:position (incf (cursor-position cursor))))
|
|
|
|
(declaim (inline atom-component-p))
|
|
(defun atom-component-p (c)
|
|
(declare (type character c))
|
|
(not (find c " ()\"[]@.<>:;,")))
|
|
|
|
(defun read-atext (first-character cursor)
|
|
(be string (with-output-to-string (out)
|
|
(write-char first-character out)
|
|
(loop
|
|
for c = (read-char (cursor-stream cursor) nil)
|
|
while (and c (atom-component-p c))
|
|
do (write-char c out)
|
|
finally (when c
|
|
(unread-char c (cursor-stream cursor)))))
|
|
(make-token :type 'atext
|
|
:value string
|
|
:position (incf (cursor-position cursor)))))
|
|
|
|
(defmethod read-next-tokens ((cursor cursor))
|
|
(flet ((make-keyword (c)
|
|
(make-token :type 'keyword
|
|
:value (string c)
|
|
:position (incf (cursor-position cursor)))))
|
|
(be in (cursor-stream cursor)
|
|
(loop
|
|
for c = (read-char in nil)
|
|
while c
|
|
unless (whitespace-p c)
|
|
return (list
|
|
(cond ((char= #\( c)
|
|
(read-comment cursor))
|
|
((char= #\" c)
|
|
(read-string cursor))
|
|
((char= #\[ c)
|
|
(read-domain-literal cursor))
|
|
((find c "@.<>:;,")
|
|
(make-keyword c))
|
|
(t
|
|
;; anything else is considered a text atom even
|
|
;; though it's just a single character
|
|
(read-atext c cursor))))))))
|
|
|
|
(defun analyse-string (string)
|
|
"Return the list of tokens produced by a lexical analysis of
|
|
STRING. These are the tokens that would be seen by the parser."
|
|
(with-input-from-string (stream string)
|
|
(be cursor (make-cursor :stream stream)
|
|
(loop
|
|
for tokens = (read-next-tokens cursor)
|
|
until (endp tokens)
|
|
append tokens))))
|
|
|
|
(defun mailboxes-only (list-of-mailboxes-and-groups)
|
|
"Return a flat list of MAILBOX-ADDRESSes from
|
|
LIST-OF-MAILBOXES-AND-GROUPS, which is the kind of list returned
|
|
by PARSE-ADDRESSES. This turns out to be useful when your
|
|
program is not interested in mailbox groups and expects the user
|
|
addresses only."
|
|
(mapcan #'(lambda (mbx)
|
|
(if (typep mbx 'mailbox-group)
|
|
(mbxg-mailboxes mbx)
|
|
(list mbx)))
|
|
list-of-mailboxes-and-groups))
|
|
|
|
(defun parse-addresses (string &key no-groups)
|
|
"Parse STRING and return a list of MAILBOX-ADDRESSes or
|
|
MAILBOX-GROUPs. If STRING is unparsable return NIL. If
|
|
NO-GROUPS is true, return a flat list of mailboxes throwing away
|
|
the group containers, if any."
|
|
(be grammar (force define-grammar)
|
|
(with-input-from-string (stream string)
|
|
(be* cursor (make-cursor :stream stream)
|
|
mailboxes (ignore-errors ; ignore parsing errors
|
|
(parse grammar 'address-list cursor))
|
|
(if no-groups
|
|
(mailboxes-only mailboxes)
|
|
mailboxes)))))
|
|
|
|
(defun debug-addresses (string)
|
|
"More or less like PARSE-ADDRESSES, but don't ignore parsing errors."
|
|
(be grammar (force define-grammar)
|
|
(with-input-from-string (stream string)
|
|
(be cursor (make-cursor :stream stream)
|
|
(parse grammar 'address-list cursor)))))
|
|
|