feat(cheddar): Render code blocks in Markdown via syntect

Implements fully static (i.e. no JavaScript!) highlighting of code
blocks when rendering Markdown.

This works by walking through the Comrak AST and replacing any code
blocks with pre-rendered HTML blocks.

Syntaxes are chosen based on the "block info", which is the string
users put after the block's opening fence. This can either be
a (case-insensitive) name of a syntax, or alternatively a file
extension associated with the desired syntax.

The theme is set to one that imitates GitHub.
This commit is contained in:
Vincent Ambo 2019-12-21 15:57:02 +00:00
parent 671dbc7f72
commit f904951384

View file

@ -1,4 +1,5 @@
use comrak::{markdown_to_html, ComrakOptions}; use comrak::nodes::{AstNode, NodeValue, NodeHtmlBlock};
use comrak::{Arena, parse_document, format_html, ComrakOptions};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::env; use std::env;
use std::ffi::OsStr; use std::ffi::OsStr;
@ -9,19 +10,36 @@ use std::path::Path;
use syntect::dumps::from_binary; use syntect::dumps::from_binary;
use syntect::easy::HighlightLines; use syntect::easy::HighlightLines;
use syntect::highlighting::ThemeSet; use syntect::highlighting::ThemeSet;
use syntect::parsing::SyntaxSet; use syntect::parsing::{SyntaxSet, SyntaxReference};
use syntect::html::{ use syntect::html::{
append_highlighted_html_for_styled_line,
start_highlighted_html_snippet,
IncludeBackground, IncludeBackground,
append_highlighted_html_for_styled_line,
highlighted_html_for_string,
start_highlighted_html_snippet,
}; };
// Set up syntaxes as a lazy_static. Initialisation might not be lazy_static! {
// Load syntaxes & themes lazily. Initialisation might not be
// required in the case of Markdown rendering (if there's no code // required in the case of Markdown rendering (if there's no code
// blocks within the document). // blocks within the document).
lazy_static! {
static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES"))); static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES")));
static ref THEMES: ThemeSet = ThemeSet::load_defaults();
// Configure Comrak's Markdown rendering with all the bells &
// whistles!
static ref MD_OPTS: ComrakOptions = ComrakOptions{
ext_strikethrough: true,
ext_tagfilter: true,
ext_table: true,
ext_autolink: true,
ext_tasklist: true,
ext_header_ids: Some(String::new()), // yyeeesss!
ext_footnotes: true,
ext_description_lists: true,
unsafe_: true, // required for tagfilter
..ComrakOptions::default()
};
} }
fn args_extension() -> Option<String> { fn args_extension() -> Option<String> {
@ -44,27 +62,66 @@ fn should_continue(res: &io::Result<usize>) -> bool {
} }
} }
// This function is taken from the Comrak documentation.
fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F) where F : Fn(&'a AstNode<'a>) {
f(node);
for c in node.children() {
iter_nodes(c, f);
}
}
// Many of the syntaxes in the syntax list have random capitalisations, which
// means that name matching for the block info of a code block in HTML fails.
//
// Instead, try finding a syntax match by comparing case insensitively (for
// ASCII characters, anyways).
fn find_syntax_case_insensitive(info: &str) -> Option<&'static SyntaxReference> {
SYNTAXES.syntaxes().iter().rev().find(|&s| info.eq_ignore_ascii_case(&s.name))
}
fn format_markdown() { fn format_markdown() {
let document = {
let mut buffer = String::new(); let mut buffer = String::new();
let stdin = io::stdin(); let stdin = io::stdin();
let mut stdin = stdin.lock(); let mut stdin = stdin.lock();
stdin.read_to_string(&mut buffer).expect("failed to read stdin"); stdin.read_to_string(&mut buffer).expect("failed to read stdin");
buffer
// Markdown rendering is configurd with most of the bells &
// whistles here:
let opts = ComrakOptions{
ext_strikethrough: true,
ext_tagfilter: true,
ext_table: true,
ext_autolink: true,
ext_tasklist: true,
ext_header_ids: Some(String::new()), // yyeeesss!
ext_footnotes: true,
ext_description_lists: true,
..ComrakOptions::default()
}; };
print!("{}", markdown_to_html(&buffer, &opts)); let arena = Arena::new();
let root = parse_document(&arena, &document, &MD_OPTS);
// Syntax highlighting is implemented by traversing the arena and
// replacing all code blocks with HTML blocks rendered by syntect.
iter_nodes(root, &|node| {
let mut ast = node.data.borrow_mut();
match &ast.value {
NodeValue::CodeBlock(code_block) => {
let theme = &THEMES.themes["InspiredGitHub"];
let info = String::from_utf8_lossy(&code_block.info);
let syntax = find_syntax_case_insensitive(&info)
.or_else(|| SYNTAXES.find_syntax_by_extension(&info))
.unwrap_or_else(|| SYNTAXES.find_syntax_plain_text());
let code = String::from_utf8_lossy(&code_block.literal);
let rendered = highlighted_html_for_string(
&code, &SYNTAXES, syntax, theme,
);
let block = NodeHtmlBlock {
block_type: 1, // It's unclear what behaviour is toggled by this
literal: rendered.into_bytes(),
};
ast.value = NodeValue::HtmlBlock(block);
},
_ => (),
};
});
format_html(root, &MD_OPTS, &mut io::stdout())
.expect("Markdown rendering failed");
} }
fn format_code(extension: String) { fn format_code(extension: String) {
@ -76,8 +133,7 @@ fn format_code(extension: String) {
let mut read_result = stdin.read_line(&mut linebuf); let mut read_result = stdin.read_line(&mut linebuf);
// Set up the highlighter // Set up the highlighter
let ts = ThemeSet::load_defaults(); let theme = &THEMES.themes["InspiredGitHub"];
let theme = &ts.themes["InspiredGitHub"];
let syntax = SYNTAXES.find_syntax_by_extension(&extension) let syntax = SYNTAXES.find_syntax_by_extension(&extension)
.or_else(|| SYNTAXES.find_syntax_by_first_line(&linebuf)) .or_else(|| SYNTAXES.find_syntax_by_first_line(&linebuf))