2020-06-19 04:39:53 +02:00
|
|
|
use clap::{App, Arg};
|
2020-01-11 06:06:36 +01:00
|
|
|
use comrak::arena_tree::Node;
|
2020-06-19 04:39:53 +02:00
|
|
|
use comrak::nodes::{Ast, AstNode, NodeCodeBlock, NodeHtmlBlock, NodeValue};
|
|
|
|
use comrak::{format_html, parse_document, Arena, ComrakOptions};
|
2019-12-21 15:45:11 +01:00
|
|
|
use lazy_static::lazy_static;
|
2020-06-20 03:57:31 +02:00
|
|
|
use rouille::try_or_400;
|
|
|
|
use rouille::Response;
|
|
|
|
use serde::Deserialize;
|
2020-01-11 06:06:36 +01:00
|
|
|
use std::cell::RefCell;
|
2020-06-15 01:19:36 +02:00
|
|
|
use std::collections::HashMap;
|
2019-12-21 05:20:15 +01:00
|
|
|
use std::env;
|
|
|
|
use std::ffi::OsStr;
|
2020-06-19 04:39:53 +02:00
|
|
|
use std::io;
|
2019-12-21 05:20:15 +01:00
|
|
|
use std::io::BufRead;
|
2020-06-19 04:35:05 +02:00
|
|
|
use std::io::Write;
|
2019-12-21 05:20:15 +01:00
|
|
|
use std::path::Path;
|
2019-12-21 05:55:10 +01:00
|
|
|
use syntect::dumps::from_binary;
|
2019-12-21 15:45:11 +01:00
|
|
|
use syntect::easy::HighlightLines;
|
2020-06-20 03:57:31 +02:00
|
|
|
use syntect::highlighting::{Theme, ThemeSet};
|
2020-06-19 04:39:53 +02:00
|
|
|
use syntect::parsing::{SyntaxReference, SyntaxSet};
|
2019-12-21 17:18:26 +01:00
|
|
|
use syntect::util::LinesWithEndings;
|
2020-06-20 03:57:31 +02:00
|
|
|
use serde_json::json;
|
2019-12-21 05:20:15 +01:00
|
|
|
|
|
|
|
use syntect::html::{
|
2020-06-19 04:39:53 +02:00
|
|
|
append_highlighted_html_for_styled_line, start_highlighted_html_snippet, IncludeBackground,
|
2019-12-21 05:20:15 +01:00
|
|
|
};
|
|
|
|
|
2019-12-21 15:09:12 +01:00
|
|
|
lazy_static! {
|
2019-12-21 16:57:02 +01:00
|
|
|
// Load syntaxes & themes lazily. Initialisation might not be
|
|
|
|
// required in the case of Markdown rendering (if there's no code
|
|
|
|
// blocks within the document).
|
2019-12-21 15:09:12 +01:00
|
|
|
static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES")));
|
2019-12-21 16:57:02 +01:00
|
|
|
static ref THEMES: ThemeSet = ThemeSet::load_defaults();
|
|
|
|
|
|
|
|
// Configure Comrak's Markdown rendering with all the bells &
|
|
|
|
// whistles!
|
|
|
|
static ref MD_OPTS: ComrakOptions = ComrakOptions{
|
|
|
|
ext_strikethrough: true,
|
|
|
|
ext_tagfilter: true,
|
|
|
|
ext_table: true,
|
|
|
|
ext_autolink: true,
|
|
|
|
ext_tasklist: true,
|
|
|
|
ext_header_ids: Some(String::new()), // yyeeesss!
|
|
|
|
ext_footnotes: true,
|
|
|
|
ext_description_lists: true,
|
|
|
|
unsafe_: true, // required for tagfilter
|
|
|
|
..ComrakOptions::default()
|
|
|
|
};
|
2020-06-15 01:19:36 +02:00
|
|
|
|
|
|
|
// Configures a map of specific filenames to languages, for cases
|
|
|
|
// where the detection by extension or other heuristics fails.
|
|
|
|
static ref FILENAME_OVERRIDES: HashMap<&'static str, &'static str> = {
|
|
|
|
let mut map = HashMap::new();
|
|
|
|
// rules.pl is the canonical name of the submit rule file in
|
|
|
|
// Gerrit, which is written in Prolog.
|
|
|
|
map.insert("rules.pl", "Prolog");
|
|
|
|
map
|
|
|
|
};
|
2019-12-21 15:09:12 +01:00
|
|
|
}
|
|
|
|
|
2019-12-21 17:18:26 +01:00
|
|
|
// HTML fragment used when rendering inline blocks in Markdown documents.
|
|
|
|
// Emulates the GitHub style (subtle background hue and padding).
|
|
|
|
const BLOCK_PRE: &str = "<pre style=\"background-color:#f6f8fa;padding:16px;\">\n";
|
|
|
|
|
2019-12-21 05:20:15 +01:00
|
|
|
fn should_continue(res: &io::Result<usize>) -> bool {
|
|
|
|
match *res {
|
|
|
|
Ok(n) => n > 0,
|
|
|
|
Err(_) => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-21 16:57:02 +01:00
|
|
|
// This function is taken from the Comrak documentation.
|
2020-06-19 04:39:53 +02:00
|
|
|
fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F)
|
|
|
|
where
|
|
|
|
F: Fn(&'a AstNode<'a>),
|
|
|
|
{
|
2019-12-21 16:57:02 +01:00
|
|
|
f(node);
|
|
|
|
for c in node.children() {
|
|
|
|
iter_nodes(c, f);
|
|
|
|
}
|
|
|
|
}
|
2019-12-21 15:45:11 +01:00
|
|
|
|
2019-12-21 16:57:02 +01:00
|
|
|
// Many of the syntaxes in the syntax list have random capitalisations, which
|
|
|
|
// means that name matching for the block info of a code block in HTML fails.
|
|
|
|
//
|
|
|
|
// Instead, try finding a syntax match by comparing case insensitively (for
|
|
|
|
// ASCII characters, anyways).
|
|
|
|
fn find_syntax_case_insensitive(info: &str) -> Option<&'static SyntaxReference> {
|
2020-01-11 03:17:19 +01:00
|
|
|
// TODO(tazjin): memoize this lookup
|
2020-06-19 04:39:53 +02:00
|
|
|
SYNTAXES
|
|
|
|
.syntaxes()
|
|
|
|
.iter()
|
|
|
|
.rev()
|
|
|
|
.find(|&s| info.eq_ignore_ascii_case(&s.name))
|
2019-12-21 16:57:02 +01:00
|
|
|
}
|
|
|
|
|
2020-01-11 03:17:19 +01:00
|
|
|
// Replaces code-block inside of a Markdown AST with HTML blocks rendered by
|
|
|
|
// syntect. This enables static (i.e. no JavaScript) syntax highlighting, even
|
|
|
|
// of complex languages.
|
|
|
|
fn highlight_code_block(code_block: &NodeCodeBlock) -> NodeValue {
|
|
|
|
let theme = &THEMES.themes["InspiredGitHub"];
|
|
|
|
let info = String::from_utf8_lossy(&code_block.info);
|
|
|
|
|
|
|
|
let syntax = find_syntax_case_insensitive(&info)
|
|
|
|
.or_else(|| SYNTAXES.find_syntax_by_extension(&info))
|
|
|
|
.unwrap_or_else(|| SYNTAXES.find_syntax_plain_text());
|
|
|
|
|
|
|
|
let code = String::from_utf8_lossy(&code_block.literal);
|
|
|
|
|
|
|
|
let rendered = {
|
|
|
|
// Write the block preamble manually to get exactly the
|
|
|
|
// desired layout:
|
|
|
|
let mut hl = HighlightLines::new(syntax, theme);
|
|
|
|
let mut buf = BLOCK_PRE.to_string();
|
|
|
|
|
|
|
|
for line in LinesWithEndings::from(&code) {
|
|
|
|
let regions = hl.highlight(line, &SYNTAXES);
|
2020-06-19 04:39:53 +02:00
|
|
|
append_highlighted_html_for_styled_line(®ions[..], IncludeBackground::No, &mut buf);
|
2020-01-11 03:17:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
buf.push_str("</pre>");
|
|
|
|
buf
|
|
|
|
};
|
|
|
|
|
|
|
|
let block = NodeHtmlBlock {
|
|
|
|
block_type: 1, // It's unclear what behaviour is toggled by this
|
|
|
|
literal: rendered.into_bytes(),
|
|
|
|
};
|
|
|
|
|
|
|
|
NodeValue::HtmlBlock(block)
|
|
|
|
}
|
|
|
|
|
2020-01-11 06:06:36 +01:00
|
|
|
// Supported callout elements (which each have their own distinct rendering):
|
|
|
|
enum Callout {
|
|
|
|
Todo,
|
|
|
|
Warning,
|
|
|
|
Question,
|
|
|
|
Tip,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine whether the first child of the supplied node contains a text that
|
|
|
|
// should cause a callout section to be rendered.
|
|
|
|
fn has_callout<'a>(node: &Node<'a, RefCell<Ast>>) -> Option<Callout> {
|
|
|
|
match node.first_child().map(|c| c.data.borrow()) {
|
|
|
|
Some(child) => match &child.value {
|
|
|
|
NodeValue::Text(text) => {
|
|
|
|
if text.starts_with("TODO".as_bytes()) {
|
2020-06-19 04:39:53 +02:00
|
|
|
return Some(Callout::Todo);
|
2020-01-11 06:06:36 +01:00
|
|
|
} else if text.starts_with("WARNING".as_bytes()) {
|
2020-06-19 04:39:53 +02:00
|
|
|
return Some(Callout::Warning);
|
2020-01-11 06:06:36 +01:00
|
|
|
} else if text.starts_with("QUESTION".as_bytes()) {
|
2020-06-19 04:39:53 +02:00
|
|
|
return Some(Callout::Question);
|
2020-01-11 06:06:36 +01:00
|
|
|
} else if text.starts_with("TIP".as_bytes()) {
|
2020-06-19 04:39:53 +02:00
|
|
|
return Some(Callout::Tip);
|
2020-01-11 06:06:36 +01:00
|
|
|
}
|
|
|
|
|
2020-06-19 04:39:53 +02:00
|
|
|
return None;
|
|
|
|
}
|
2020-01-11 06:06:36 +01:00
|
|
|
_ => return None,
|
|
|
|
},
|
|
|
|
_ => return None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn format_callout_paragraph(callout: Callout) -> NodeValue {
|
|
|
|
let class = match callout {
|
|
|
|
Callout::Todo => "cheddar-todo",
|
|
|
|
Callout::Warning => "cheddar-warning",
|
|
|
|
Callout::Question => "cheddar-question",
|
|
|
|
Callout::Tip => "cheddar-tip",
|
|
|
|
};
|
|
|
|
|
|
|
|
NodeValue::HtmlBlock(NodeHtmlBlock {
|
|
|
|
block_type: 1,
|
|
|
|
literal: format!("<p class=\"cheddar-callout {}\">", class).into_bytes(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-06-19 04:35:05 +02:00
|
|
|
fn format_markdown<R: BufRead, W: Write>(reader: &mut R, writer: &mut W) {
|
2019-12-21 16:57:02 +01:00
|
|
|
let document = {
|
|
|
|
let mut buffer = String::new();
|
2020-06-19 04:39:53 +02:00
|
|
|
reader
|
|
|
|
.read_to_string(&mut buffer)
|
|
|
|
.expect("reading should work");
|
2020-06-19 04:35:05 +02:00
|
|
|
drop(reader);
|
2019-12-21 16:57:02 +01:00
|
|
|
buffer
|
2019-12-21 15:45:11 +01:00
|
|
|
};
|
|
|
|
|
2019-12-21 16:57:02 +01:00
|
|
|
let arena = Arena::new();
|
|
|
|
let root = parse_document(&arena, &document, &MD_OPTS);
|
|
|
|
|
2020-01-11 06:06:36 +01:00
|
|
|
// This node must exist with a lifetime greater than that of the parsed AST
|
|
|
|
// in case that callouts are encountered (otherwise insertion into the tree
|
|
|
|
// is not possible).
|
|
|
|
let p_close = Node::new(RefCell::new(Ast {
|
|
|
|
start_line: 0, // TODO(tazjin): hrmm
|
|
|
|
content: vec![],
|
|
|
|
open: false,
|
|
|
|
last_line_blank: false,
|
|
|
|
value: NodeValue::HtmlBlock(NodeHtmlBlock {
|
|
|
|
block_type: 1,
|
|
|
|
literal: "</p>".as_bytes().to_vec(),
|
|
|
|
}),
|
|
|
|
}));
|
|
|
|
|
2019-12-21 16:57:02 +01:00
|
|
|
// Syntax highlighting is implemented by traversing the arena and
|
|
|
|
// replacing all code blocks with HTML blocks rendered by syntect.
|
|
|
|
iter_nodes(root, &|node| {
|
|
|
|
let mut ast = node.data.borrow_mut();
|
2020-01-11 03:17:19 +01:00
|
|
|
let new = match &ast.value {
|
|
|
|
NodeValue::CodeBlock(code) => Some(highlight_code_block(code)),
|
2020-06-19 04:39:53 +02:00
|
|
|
NodeValue::Paragraph => {
|
|
|
|
if let Some(callout) = has_callout(node) {
|
|
|
|
node.insert_after(&p_close);
|
|
|
|
Some(format_callout_paragraph(callout))
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
2020-01-11 03:17:19 +01:00
|
|
|
_ => None,
|
2019-12-21 16:57:02 +01:00
|
|
|
};
|
2020-01-11 03:17:19 +01:00
|
|
|
|
|
|
|
if let Some(new_value) = new {
|
|
|
|
ast.value = new_value
|
|
|
|
}
|
2019-12-21 16:57:02 +01:00
|
|
|
});
|
|
|
|
|
2020-06-19 04:39:53 +02:00
|
|
|
format_html(root, &MD_OPTS, writer).expect("Markdown rendering failed");
|
2019-12-21 15:09:12 +01:00
|
|
|
}
|
2019-12-21 05:20:15 +01:00
|
|
|
|
2020-06-20 03:57:31 +02:00
|
|
|
fn find_syntax_for_file(filename: &str) -> &'static SyntaxReference {
|
|
|
|
return (*FILENAME_OVERRIDES)
|
|
|
|
.get(filename)
|
|
|
|
.and_then(|name| SYNTAXES.find_syntax_by_name(name))
|
|
|
|
.or_else(|| {
|
|
|
|
Path::new(filename)
|
|
|
|
.extension()
|
|
|
|
.and_then(OsStr::to_str)
|
|
|
|
.and_then(|s| SYNTAXES.find_syntax_by_extension(s))
|
|
|
|
})
|
|
|
|
.unwrap_or_else(|| SYNTAXES.find_syntax_plain_text());
|
|
|
|
}
|
|
|
|
|
|
|
|
fn format_code<R: BufRead, W: Write>(
|
|
|
|
theme: &Theme,
|
|
|
|
reader: &mut R,
|
|
|
|
writer: &mut W,
|
|
|
|
filename: &str,
|
|
|
|
) {
|
2019-12-21 05:20:15 +01:00
|
|
|
let mut linebuf = String::new();
|
|
|
|
|
|
|
|
// Get the first line, we might need it for syntax identification.
|
2020-06-19 04:35:05 +02:00
|
|
|
let mut read_result = reader.read_line(&mut linebuf);
|
2020-06-20 03:57:31 +02:00
|
|
|
let syntax = find_syntax_for_file(filename);
|
2019-12-21 15:09:12 +01:00
|
|
|
|
2019-12-21 05:20:15 +01:00
|
|
|
let mut hl = HighlightLines::new(syntax, theme);
|
|
|
|
let (mut outbuf, bg) = start_highlighted_html_snippet(theme);
|
|
|
|
|
|
|
|
// Rather than using the `lines` iterator, read each line manually
|
|
|
|
// and maintain buffer state.
|
|
|
|
//
|
|
|
|
// This is done because the syntax highlighter requires trailing
|
|
|
|
// newlines to be efficient, and those are stripped in the lines
|
|
|
|
// iterator.
|
|
|
|
while should_continue(&read_result) {
|
2019-12-21 15:09:12 +01:00
|
|
|
let regions = hl.highlight(&linebuf, &SYNTAXES);
|
2019-12-21 05:20:15 +01:00
|
|
|
|
|
|
|
append_highlighted_html_for_styled_line(
|
|
|
|
®ions[..],
|
|
|
|
IncludeBackground::IfDifferent(bg),
|
|
|
|
&mut outbuf,
|
|
|
|
);
|
|
|
|
|
|
|
|
// immediately output the current state to avoid keeping
|
|
|
|
// things in memory
|
2020-06-19 04:35:05 +02:00
|
|
|
write!(writer, "{}", outbuf).expect("write should not fail");
|
2019-12-21 05:20:15 +01:00
|
|
|
|
|
|
|
// merry go round again
|
|
|
|
linebuf.clear();
|
|
|
|
outbuf.clear();
|
2020-06-19 04:35:05 +02:00
|
|
|
read_result = reader.read_line(&mut linebuf);
|
2019-12-21 05:20:15 +01:00
|
|
|
}
|
|
|
|
|
2020-06-19 04:35:05 +02:00
|
|
|
writeln!(writer, "</pre>").expect("write should not fail");
|
2019-12-21 05:20:15 +01:00
|
|
|
}
|
2019-12-21 15:09:12 +01:00
|
|
|
|
2020-06-20 03:57:31 +02:00
|
|
|
// Starts a Sourcegraph-compatible syntax highlighting server. This
|
|
|
|
// replaces the 'syntect_server' component of Sourcegraph.
|
|
|
|
fn highlighting_server(listen: &str) {
|
|
|
|
println!("Starting syntax highlighting server on '{}'", listen);
|
|
|
|
#[derive(Deserialize)]
|
|
|
|
struct SourcegraphQuery {
|
|
|
|
filepath: String,
|
|
|
|
theme: String,
|
|
|
|
code: String,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sourcegraph only uses a single endpoint, so we don't attempt to
|
|
|
|
// deal with routing here for now.
|
|
|
|
rouille::start_server(listen, move |request| {
|
|
|
|
let query: SourcegraphQuery = try_or_400!(rouille::input::json_input(request));
|
|
|
|
println!("Handling highlighting request for '{}'", query.filepath);
|
|
|
|
let mut buf: Vec<u8> = Vec::new();
|
|
|
|
|
|
|
|
// We don't use syntect with the sourcegraph themes bundled
|
|
|
|
// currently, so let's fall back to something that is kind of
|
|
|
|
// similar (tm).
|
|
|
|
let theme = &THEMES.themes[match query.theme.as_str() {
|
|
|
|
"Sourcegraph (light)" => "Solarized (light)",
|
|
|
|
_ => "Solarized (dark)",
|
|
|
|
}];
|
|
|
|
|
|
|
|
format_code(
|
|
|
|
theme,
|
|
|
|
&mut query.code.as_bytes(),
|
|
|
|
&mut buf,
|
|
|
|
&query.filepath,
|
|
|
|
);
|
|
|
|
|
|
|
|
Response::json(&json!({
|
|
|
|
"is_plaintext": false,
|
|
|
|
"data": String::from_utf8_lossy(&buf)
|
|
|
|
}))
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2019-12-21 15:09:12 +01:00
|
|
|
fn main() {
|
2020-06-17 02:41:34 +02:00
|
|
|
// Parse the command-line flags passed to cheddar to determine
|
|
|
|
// whether it is running in about-filter mode (`--about-filter`)
|
|
|
|
// and what file extension has been supplied.
|
|
|
|
let matches = App::new("cheddar")
|
|
|
|
.about("TVL's syntax highlighter")
|
2020-06-19 04:39:53 +02:00
|
|
|
.arg(
|
|
|
|
Arg::with_name("about-filter")
|
|
|
|
.help("Run as a cgit about-filter (renders Markdown)")
|
|
|
|
.long("about-filter")
|
|
|
|
.takes_value(false),
|
|
|
|
)
|
2020-06-20 03:57:31 +02:00
|
|
|
.arg(
|
|
|
|
Arg::with_name("sourcegraph-server")
|
|
|
|
.help("Run as a Sourcegraph compatible web-server")
|
|
|
|
.long("sourcegraph-server")
|
|
|
|
.takes_value(false),
|
|
|
|
)
|
|
|
|
.arg(
|
|
|
|
Arg::with_name("listen")
|
|
|
|
.help("Address to listen on")
|
|
|
|
.long("listen")
|
|
|
|
.takes_value(true),
|
|
|
|
)
|
2020-06-19 04:39:53 +02:00
|
|
|
.arg(Arg::with_name("filename").help("File to render").index(1))
|
2020-06-17 02:41:34 +02:00
|
|
|
.get_matches();
|
|
|
|
|
2020-06-20 03:57:31 +02:00
|
|
|
if matches.is_present("sourcegraph-server") {
|
|
|
|
highlighting_server(
|
|
|
|
matches
|
|
|
|
.value_of("listen")
|
|
|
|
.expect("Listening address is required for server mode"),
|
|
|
|
);
|
|
|
|
return;
|
2020-06-17 02:41:34 +02:00
|
|
|
}
|
|
|
|
|
2020-06-20 03:57:31 +02:00
|
|
|
let filename = matches.value_of("filename").expect("filename is required");
|
2020-01-20 00:46:04 +01:00
|
|
|
|
2020-06-19 04:35:05 +02:00
|
|
|
let stdin = io::stdin();
|
|
|
|
let mut in_handle = stdin.lock();
|
|
|
|
|
|
|
|
let stdout = io::stdout();
|
|
|
|
let mut out_handle = stdout.lock();
|
|
|
|
|
2020-06-20 03:57:31 +02:00
|
|
|
if matches.is_present("about-filter") && filename.ends_with(".md") {
|
|
|
|
format_markdown(&mut in_handle, &mut out_handle);
|
|
|
|
} else {
|
|
|
|
format_code(
|
|
|
|
&THEMES.themes["InspiredGitHub"],
|
|
|
|
&mut in_handle,
|
|
|
|
&mut out_handle,
|
|
|
|
filename,
|
|
|
|
);
|
2019-12-21 15:09:12 +01:00
|
|
|
}
|
|
|
|
}
|