feat: move mblog header handling into mime4cl

Accessing the headers of a MIME message feels like something mime4cl
should handle. We implemented this ad hoc in mblog before in order to
not need to worry about doing it in a sensible way. Now we introduce a
decent-ish interface for getting a header from a MIME message,
mime-message-header-values:

* It returns a list because MIME message headers may appear multiple
  times.

* It decodes RFC2047 only upon request, as you may want to be stricter
  about parsing certain fields.

* It checks header name equality case insensitively.

The code for decoding the RFC2047 string is retained and still uses
babel for doing the actual decoding.

Change-Id: I58bbbe4b46dbded04160b481a28a40d14775673d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/5150
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
This commit is contained in:
sterni 2022-02-01 00:01:59 +01:00
parent 81c47da91c
commit 5bc73de59d
7 changed files with 43 additions and 37 deletions

View file

@ -6,6 +6,7 @@ depot.nix.buildLisp.library {
name = "mime4cl";
deps = [
depot.third_party.lisp.babel
depot.third_party.lisp.sclf
depot.third_party.lisp.npg
depot.third_party.lisp.trivial-gray-streams

View file

@ -644,7 +644,7 @@ method of RFC2047 and return a sequence of bytes."
(vector-push-extend (char-code c) output-sequence)))
finally (return output-sequence)))
(defun decode-RFC2047-string (encoding string &key (start 0) (end (length string)))
(defun decode-RFC2047-part (encoding string &key (start 0) (end (length string)))
"Decode STRING according to RFC2047 and return a sequence of
bytes."
(gcase (encoding string-equal)
@ -674,10 +674,24 @@ sequence, a charset string indicating the original coding."
(push (subseq text previous-end start)
result))
(setf previous-end (+ end 2))
(push (cons (decode-RFC2047-string encoding text :start (1+ second-?) :end end)
(push (cons (decode-RFC2047-part encoding text :start (1+ second-?) :end end)
charset)
result))
finally (unless (= previous-end (length text))
(push (subseq text previous-end (length text))
result))
(return (nreverse result))))
(defun decode-RFC2047 (text)
"Decode TEXT into a fully decoded string. Whenever a non ASCII part is
encountered, try to decode it using babel, otherwise signal an error."
(flet ((decode-part (part)
(etypecase part
(cons (babel:octets-to-string
(car part)
:encoding (babel-encodings:get-character-encoding
(intern (string-upcase (cdr part)) 'keyword))))
(string part))))
(apply #'concatenate
(cons 'string
(mapcar #'decode-part (mime:parse-RFC2047-text text))))))

View file

@ -622,6 +622,20 @@ found in STREAM."
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defun mime-message-header-values (name message &key decode)
"Return all values of the header with NAME in MESSAGE, optionally decoding
it according to RFC2047 if :DECODE is T."
(loop ;; A header may occur multiple times
for header in (mime-message-headers message)
;; MIME Headers should be case insensitive
;; https://stackoverflow.com/a/6143644
when (string-equal (car header) name)
collect (if decode
(decode-RFC2047 (cdr header))
(cdr header))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defvar *lazy-mime-decode* t
"If true don't decode mime bodies in memory.")

View file

@ -30,6 +30,8 @@
#:process-wait
#:process-alive-p
#:run-program)
(:import-from :babel :octets-to-string)
(:import-from :babel-encodings :get-character-encoding)
(:export #:*lazy-mime-decode*
#:print-mime-part
#:read-mime-message
@ -61,6 +63,7 @@
#:mime-type-string
#:mime-type-parameters
#:mime-message-headers
#:mime-message-header-values
#:mime=
#:find-mime-part-by-path
#:find-mime-part-by-id

View file

@ -19,7 +19,6 @@
}
depot.lisp.klatre
depot.third_party.lisp.alexandria
depot.third_party.lisp.babel
depot.third_party.lisp.closure-html
depot.third_party.lisp.cl-date-time-parser
depot.third_party.lisp.cl-who

View file

@ -19,15 +19,8 @@
surrounds them with angle brackets for a MIME header"
(concatenate 'string "<" cid ">"))
;; TODO(sterni): move into mime4cl
(defun find-mime-message-header (header-name message)
(when-let ((header (assoc header-name
(mime:mime-message-headers message)
:test #'string-equal)))
(cdr header)))
(defun find-mime-message-date (message)
(when-let ((date-string (find-mime-message-header "Date" message)))
(when-let ((date-string (car (mime:mime-message-header-values "Date" message))))
(date-time-parser:parse-date-time date-string)))
;;; main implementation
@ -65,24 +58,10 @@
(defun apple-note-p (msg)
"Checks X-Uniform-Type-Identifier of a MIME:MIME-MESSAGE
to determine if a given mime message claims to be an Apple Note."
(when-let (uniform-id (assoc "X-Uniform-Type-Identifier"
(mime:mime-message-headers msg)
:test #'string-equal))
(string-equal (cdr uniform-id) "com.apple.mail-note")))
(defun decode-RFC2047-to-string (input)
(apply
#'concatenate
(cons 'string
(mapcar
(lambda (el)
(etypecase el
(cons (babel:octets-to-string
(car el)
:encoding (babel-encodings:get-character-encoding
(intern (string-upcase (cdr el)) 'keyword))))
(string el)))
(mime:parse-RFC2047-text input)))))
(when-let (uniform-id (car (mime:mime-message-header-values
"X-Uniform-Type-Identifier"
msg)))
(string-equal uniform-id "com.apple.mail-note")))
(defun make-apple-note (msg)
(check-type msg mime-message)
@ -91,12 +70,10 @@
(error "Passed message is not an Apple Note according to headers"))
(let ((text-part (mime:find-mime-text-part msg))
(subject (when-let ((val (find-mime-message-header "Subject" msg)))
;; TODO(sterni): mime4cl should do this
(decode-RFC2047-to-string val)))
(uuid (when-let ((val (find-mime-message-header
"X-Universally-Unique-Identifier"
msg)))
(subject (car (mime:mime-message-header-values "Subject" msg :decode t)))
(uuid (when-let ((val (car (mime:mime-message-header-values
"X-Universally-Unique-Identifier"
msg))))
(string-downcase val)))
(time (find-mime-message-date msg)))
;; The idea here is that we don't need to check a lot manually, instead

View file

@ -8,8 +8,6 @@
(defpackage :note
(:use
:common-lisp
:babel
:babel-encodings
:closure-html
:cl-date-time-parser
:mime4cl)