tvl-depot/web/panettone/src/inline-markdown.lisp
sterni a2ecd53f58 chore: clean up some obsolete TODOs
* //3p/nix: probably not worth investing time into this anymore

* //users/sterni/emacs: The emoji problem disappeared by itself with a
  newer emacs version, however a different one remains…

* //web/panettone: If we ever want to change the behavior, we should
  just decide the behavior statically instead of using conditions and
  restarts, as we only call it in one place, so making different
  decisions depending on call sites is not really a use case we have.

Change-Id: Iff9d439ce356db41ce34d690fb7b6a01822022fa
Reviewed-on: https://cl.tvl.fyi/c/depot/+/5223
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
Autosubmit: sterni <sternenseemann@systemli.org>
2022-02-04 15:55:19 +00:00

127 lines
5.2 KiB
Common Lisp

(in-package :panettone.inline-markdown)
(declaim (optimize (safety 3)))
(define-constant +inline-markup-types+
'(("~~" :del)
("*" :em)
("`" :code))
:test #'equal)
(defun next-token (mkdn &optional (escaped nil))
"Parses and returns the next token from the beginning of
an inline markdown string which is not altered. The resulting
tokens are either :normal (normal text), :special (syntactically
significant) or :escaped (escaped using \\). If the string is
empty, a pseudo-token named :endofinput is returned. Return value
is a list where the first element is the token type, the second
the token content and optionally the third the markup type."
; special tokens are syntactically significant characters
; or strings for our inline markdown subset. “normal” tokens
; the strings in between
(let* ((special-toks #.'(cons (list "\\" :escape) +inline-markup-types+))
(toks (loop
for tok in special-toks
for pos = (search (car tok) mkdn)
when pos collect (cons tok pos)))
(next-tok
(unless (null toks)
(reduce (lambda (a b) (if (< (cdr a) (cdr b)) a b)) toks))))
(cond
; end of input
((= (length mkdn) 0) (list :endofinput ""))
; no special tokens, just return entire string
((null next-tok) (list :normal mkdn))
; special token, but not at the beginning of the string
; so we return everything until the special token as
; a string
((> (cdr next-tok) 0) (list :normal (subseq mkdn 0 (cdr next-tok))))
; \ at the beginning of the string: we get the next
; token and mark it as escaped unless we are already
; escaping in which case we just return the backslash
; as a special token
((eq (cadr (car next-tok)) :escape)
(if escaped
(list :special "\\")
(list :escaped
(next-token (subseq mkdn 1) t))))
; any other special token at the beginning of the string
; here we also pass the markup type as a third list element
; to prevent unnecessesary lookups
(t (list :special
(subseq mkdn 0 (length (car (car next-tok))))
(cadr (car next-tok)))))))
(defun token-length (tok-type tok-str)
"Returns the string length consumed by a call
to next-token returning the given token type and string."
(check-type tok-type symbol)
(if (eq tok-type :escaped)
; backslash + length of escaped token
(progn
(check-type tok-str list)
(1+ (token-length (car tok-str) (cadr tok-str))))
(progn
(check-type tok-str string)
(length tok-str))))
(defun write-tag (tag pos &optional (target *standard-output*))
"Wrapper around who:convert-tag-to-string-list to
only output a single :opening or :closing tag."
(check-type tag symbol)
(check-type pos symbol)
(let
((index
(cond
((eq pos :opening) 0)
((eq pos :closing) 3)
(t (error 'simple-type-error)))))
(dolist
(tag-part (subseq
(who:convert-tag-to-string-list tag nil nil nil)
index (+ index 3)))
(write-string tag-part target))))
(defun render-inline-markdown (s &optional (target *standard-output*) (in :normal))
"Render inline markdown, a subset of markdown safe to render
inside inline elements. The resulting html is directly written
to a specified stream or *standard-output* to integrate well
with cl-who."
(check-type s string)
(check-type target stream)
(loop
for (tok-type tok-str tok-markup) = (next-token s)
do (setq s (subseq s (token-length tok-type tok-str)))
when (eq tok-type :endofinput)
return ""
when (eq tok-type :normal)
do (write-string (who:escape-string tok-str) target)
when (eq tok-type :escaped)
do (progn
; if normal tokens are escaped we treat the \ as if it were \\
;
; TODO(sterni): maybe also use the :normal behavior in :code except for #\`.
(when (eq (car tok-str) :normal)
(write-char #\\ target))
(write-string (who:escape-string (cadr tok-str)) target))
when (eq tok-type :special)
do (cond
; we are on the outer level and encounter a special token:
; render surrounding tags and call ourselves to render
; inner content.
((eq in :normal)
(progn
(write-tag tok-markup :opening target)
(setq s (render-inline-markdown s target tok-markup))
(write-tag tok-markup :closing target)))
; we are on the inner level and encounter the token that initiated
; our markup again, meaning we need to return to the outer level.
; we return the remaining string to be consumed.
((eq in tok-markup) (return s))
; remaining case: we are on the inner level and encounter different markup.
; we don't support nested markup for simplicity reasons, so instead we
; just render any nested markdown tokens as if they were escaped. This
; only eliminates the slight use case for nesting :em inside :del, but
; shouldn't be too bad. As a side effect this is the precise behavior
; we want for :code.
(t (write-string (who:escape-string tok-str) target)))))