feat(sterni/mblog): convert apple note mime msgs to html

For now mblog only contains the mnote-html executable which takes a mime
message from a maildir and prints the equivalent HTML fragment to
stdout. It is intended to work with the mblaze(7) utilities,
i. e. mnote-html resolves all `object` tags to proper `img` inclusions
with the correct filename, so mshow(1)'s -x version can supply the
needed image files. A note created using Apple's Notes app (tested with
the iOS version) can be converted in a viewable HTML file like this:

    $ mnote-html path/to/msg > fragment.html
    $ mshow -x path/to/msg
    $ cat <(echo "<!DOCTYPE html>") fragment.html > document.html
    $ xdg-open document.html

Note that only the limited feature set of Apple Notes when using the
IMAP backend is supported. The iCloud-based one has more (quite neat)
features, but its notes can only accessed via an internal API as far as
I know.

This CLI is a bit impractical due to the big startup overhead of loading
the lisp image. mblog should be become a fully fletched static site
generator in the future, but this is a good starting point and providing
the mnote-html tool is certainly useful.

Change-Id: Iee6d1558e939b932da1e70ca2d2ae75638d855df
Reviewed-on: https://cl.tvl.fyi/c/depot/+/3271
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
This commit is contained in:
sterni 2021-08-02 15:13:05 +02:00
parent 7f31562acf
commit 8e156e6b86
5 changed files with 250 additions and 0 deletions

View file

@ -0,0 +1,17 @@
(in-package :mblog)
(declaim (optimize (safety 3)))
(defparameter +synopsis+ "mnote-html FILE [FILE [ ... ]]")
;; TODO(sterni): handle relevant conditions
(defun main ()
(let* ((args (uiop:command-line-arguments))
(help-p (or (not args)
(find-if (lambda (x)
(member x '("-h" "--help" "--usage")
:test #'string=))
args))))
(if help-p (format *error-output* "Usage: ~A~%" +synopsis+)
(loop for arg in args
do (apple-note-html-fragment
(mime:mime-message (pathname arg)) *standard-output*)))))

View file

@ -0,0 +1,31 @@
{ depot, pkgs, ... }:
depot.nix.buildLisp.program {
name = "mnote-html";
srcs = [
./packages.lisp
./transformer.lisp
./note.lisp
./cli.lisp
];
deps = [
{
sbcl = depot.nix.buildLisp.bundled "uiop";
default = depot.nix.buildLisp.bundled "asdf";
}
depot.third_party.lisp.alexandria
depot.third_party.lisp.closure-html
depot.third_party.lisp.cl-who
depot.third_party.lisp.mime4cl
];
main = "mblog:main";
# due to sclf
brokenOn = [
"ccl"
"ecl"
];
}

View file

@ -0,0 +1,60 @@
(in-package :mblog)
(declaim (optimize (safety 3)))
;;; util
(defun html-escape-stream (in out)
"Escape characters read from stream IN and write them to
stream OUT escaped using WHO:ESCAPE-CHAR-MINIMAL."
(loop for char = (read-char in nil nil)
while char
do (write-string (who:escape-char-minimal char) out)))
(defun cid-header-value (cid)
"Takes a Content-ID as present in Apple Notes' <object> tags and properly
surrounds them with angle brackets for a MIME header"
(concatenate 'string "<" cid ">"))
;;; main implementation
;; TODO(sterni): make this a “parser” instead of a predicate
(defun apple-note-p (msg)
"Checks X-Uniform-Type-Identifier of a MIME:MIME-MESSAGE
to determine if a given mime message is an Apple Note."
(when-let (uniform-id (assoc "X-Uniform-Type-Identifier"
(mime:mime-message-headers msg)
:test #'string=))
(string= (cdr uniform-id) "com.apple.mail-note")))
(defun apple-note-html-fragment (msg out)
"Takes a MIME:MIME-MESSAGE and writes its text content as HTML to
the OUT stream. The <object> tags are resolved to <img> which
refer to the respective attachment's filename as a relative path,
but extraction of the attachments must be done separately. The
surrounding <html> and <body> tags are stripped and <head>
discarded completely, so only a fragment which can be included
in custom templates remains."
(let ((text (find-mime-text-part msg)))
(cond
;; Sanity checking of the note
((not (apple-note-p msg))
(error "Unsupported or missing X-Uniform-Type-Identifier"))
((not text) (error "Malformed Apple Note: no text part"))
;; notemap creates text/plain notes we need to handle properly.
;; Additionally we *could* check X-Mailer which notemap sets
((string= (mime:mime-subtype text) "plain")
(html-escape-stream (mime:mime-body-stream text :binary nil) out))
;; Notes.app creates text/html parts
((string= (mime:mime-subtype text) "html")
(closure-html:parse
(mime:mime-body-stream text)
(make-instance
'apple-note-transformer
:cid-lookup
(lambda (cid)
(when-let* ((part (mime:find-mime-part-by-id msg (cid-header-value cid)))
(file (mime:mime-part-file-name part)))
file))
:next-handler
(closure-html:make-character-stream-sink out))))
(t (error "Malformed Apple Note: unknown mime type")))))

View file

@ -0,0 +1,15 @@
(defpackage :mblog
(:use
:common-lisp
:mime4cl
:closure-html
:who
:uiop)
(:shadow :with-html-output) ; conflict between closure-html and who
(:import-from
:alexandria
:when-let*
:when-let
:starts-with-subseq
:ends-with-subseq)
(:export :main))

View file

@ -0,0 +1,127 @@
(in-package :mblog)
(declaim (optimize (safety 3)))
;; Throw away these tags and all of their children
(defparameter +discard-tags-with-children+ '("HEAD"))
;; Only “strip” these tags and leave their content as is
(defparameter +discard-tags-only+ '("BODY" "HTML"))
;; This is basically the same as cxml's PROXY-HANDLER.
;; Couldn't be bothered to make a BROADCAST-HANDLER because I
;; only need to pass through to one handler. It accepts every
;; event and passes it on to NEXT-HANDLER. This is useful for
;; subclassing mostly where an event can be modified or passed
;; on as is via CALL-NEXT-METHOD.
(defclass hax-proxy-handler (hax:default-handler)
((next-handler
:initarg :next-handler
:accessor proxy-next-handler)))
;; Define the trivial handlers which just call themselves for NEXT-HANDLER
(macrolet ((def-proxy-handler (name (&rest args))
`(defmethod ,name ((h hax-proxy-handler) ,@args)
(,name (proxy-next-handler h) ,@args))))
(def-proxy-handler hax:start-document (name p-id s-id))
(def-proxy-handler hax:end-document ())
(def-proxy-handler hax:start-element (name attrs))
(def-proxy-handler hax:end-element (name))
(def-proxy-handler hax:characters (data))
(def-proxy-handler hax:unescaped (data))
(def-proxy-handler hax:comment (data)))
(defclass apple-note-transformer (hax-proxy-handler)
((cid-lookup
:initarg :cid-lookup
:initform (lambda (cid) nil)
:accessor transformer-cid-lookup)
(discard-until
:initarg :discard-until
:initform nil
:accessor transformer-discard-until)
(depth
:initarg :depth
:initform 0
:accessor transformer-depth))
(:documentation
"HAX handler that strips unnecessary tags from the HTML of a com.apple.mail-note
and resolves references to attachments to IMG tags."))
;; Define the “boring” handlers which just call the next method (i. e. the next
;; handler) unless discard-until is not nil in which case the event is dropped.
(macrolet ((def-filter-handler (name (&rest args))
`(defmethod ,name ((h apple-note-transformer) ,@args)
(when (not (transformer-discard-until h))
(call-next-method)))))
(def-filter-handler hax:start-document (name p-id s-id))
(def-filter-handler hax:end-document ())
(def-filter-handler hax:characters (data))
(def-filter-handler hax:unescaped (data))
(def-filter-handler hax:comment (data)))
(defun parse-content-id (attrlist)
(when-let (data (find-if (lambda (x)
(string= (hax:attribute-name x) "DATA"))
attrlist))
(multiple-value-bind (starts-with-cid-p suffix)
(starts-with-subseq "cid:" (hax:attribute-value data)
:return-suffix t :test #'char=)
(if starts-with-cid-p suffix data))))
(defmethod hax:start-element ((handler apple-note-transformer) name attrs)
(with-accessors ((discard-until transformer-discard-until)
(next-handler proxy-next-handler)
(cid-lookup transformer-cid-lookup)
(depth transformer-depth))
handler
(cond
;; If we are discarding, any started element is dropped,
;; since the end-condition only is reached via END-ELEMENT.
(discard-until nil)
;; If we are not discarding any outer elements, we can set
;; up a new discard condition if we encounter an appropriate
;; element.
((member name +discard-tags-with-children+ :test #'string=)
(setf discard-until (cons name depth)))
;; Only drop this event, must be mirrored in END-ELEMENT to
;; avoid invalidly nested HTML.
((member name +discard-tags-only+ :test #'string=) nil)
;; If we encounter an object tag, we drop it and its contents,
;; but only after inspecting its attributes and emitting new
;; events representing an img tag which includes the respective
;; attachment via its filename.
((string= name "OBJECT")
(progn
(setf discard-until (cons "OBJECT" depth))
;; TODO(sterni): check type and only resolve images, raise error
;; otherwise. We should only encounter images anyways, since
;; other types are only supported for iCloud which doesn't seem
;; to use IMAP for sync these days.
(when-let* ((cid (parse-content-id attrs))
(file (apply cid-lookup (list cid)))
(src (hax:make-attribute "SRC" file)))
(hax:start-element next-handler "IMG" (list src))
(hax:end-element next-handler "IMG"))))
;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
(t (call-next-method)))
(setf depth (1+ depth))))
(defmethod hax:end-element ((handler apple-note-transformer) name)
(with-accessors ((discard-until transformer-discard-until)
(depth transformer-depth))
handler
(setf depth (1- depth))
(cond
;; If we are discarding and encounter the same tag again at the same
;; depth, we can stop, but still have to discard the current tag.
((and discard-until
(string= (car discard-until) name)
(= (cdr discard-until) depth))
(setf discard-until nil))
;; In all other cases, we drop properly.
(discard-until nil)
;; Mirrored tag stripping as in START-ELEMENT
((member name +discard-tags-only+ :test #'string=) nil)
;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
(t (call-next-method)))))