refactor(tvix/eval): rewrite xml emitter to be simple-stupid
In order to be compatible with the nix XML generator, it’s easier to generate the XML directly, instead of going through a library which we have to bend to do what we need. Removes dependency on `xml-rs`, which came with a full XML parser that we didn’t use. Only takes a tiny bit of code for the XML escaping, somewhat simplified. I add a little escaping value, to make sure we have the same behaviour as nix proper. Interestingly enough, we never need to escape XML attribute names, because the `builtins.toXML` format encodes user-defined values as attribute keys only. So we only escape attribute values. Fixes: https://b.tvl.fyi/issues/399 Change-Id: If4d407d324864b3bb9aa3160e2ec6889f7727127 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11697 Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de> Autosubmit: Profpatsch <mail@profpatsch.de>
This commit is contained in:
parent
e7be342256
commit
5b2ba0efa1
9 changed files with 205 additions and 89 deletions
7
tvix/Cargo.lock
generated
7
tvix/Cargo.lock
generated
|
@ -4167,7 +4167,6 @@ dependencies = [
|
|||
"test-strategy",
|
||||
"toml",
|
||||
"tvix-eval-builtin-macros",
|
||||
"xml-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4844,12 +4843,6 @@ dependencies = [
|
|||
"rustix",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xml-rs"
|
||||
version = "0.8.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fcb9cbac069e033553e8bb871be2fbdffcab578eb25bd0f7c508cedc6dcd75a"
|
||||
|
||||
[[package]]
|
||||
name = "xz2"
|
||||
version = "0.1.7"
|
||||
|
|
|
@ -13222,10 +13222,6 @@ rec {
|
|||
packageId = "tvix-eval-builtin-macros";
|
||||
rename = "builtin-macros";
|
||||
}
|
||||
{
|
||||
name = "xml-rs";
|
||||
packageId = "xml-rs";
|
||||
}
|
||||
];
|
||||
devDependencies = [
|
||||
{
|
||||
|
@ -16041,18 +16037,6 @@ rec {
|
|||
};
|
||||
resolvedDefaultFeatures = [ "default" "unsupported" ];
|
||||
};
|
||||
"xml-rs" = rec {
|
||||
crateName = "xml-rs";
|
||||
version = "0.8.19";
|
||||
edition = "2021";
|
||||
crateBin = [ ];
|
||||
sha256 = "0nnpvk3fv32hgh7vs9gbg2swmzxx5yz73f4b7rak7q39q2x9rjqg";
|
||||
libName = "xml";
|
||||
authors = [
|
||||
"Vladimir Matveev <vmatveev@citrine.cc>"
|
||||
];
|
||||
|
||||
};
|
||||
"xz2" = rec {
|
||||
crateName = "xz2";
|
||||
version = "0.1.7";
|
||||
|
|
|
@ -30,7 +30,6 @@ smol_str = "0.2.0"
|
|||
tabwriter = "1.2"
|
||||
test-strategy = { version = "0.2.1", optional = true }
|
||||
toml = "0.6.0"
|
||||
xml-rs = "0.8.4"
|
||||
sha2 = "0.10.8"
|
||||
sha1 = "0.10.6"
|
||||
md-5 = "0.10.6"
|
||||
|
|
|
@ -3,59 +3,43 @@
|
|||
//! things in nixpkgs rely on.
|
||||
|
||||
use bstr::ByteSlice;
|
||||
use std::borrow::Cow;
|
||||
use std::{io::Write, rc::Rc};
|
||||
use xml::writer::events::XmlEvent;
|
||||
use xml::writer::EmitterConfig;
|
||||
use xml::writer::EventWriter;
|
||||
|
||||
use crate::{ErrorKind, Value};
|
||||
|
||||
/// Recursively serialise a value to XML. The value *must* have been
|
||||
/// deep-forced before being passed to this function.
|
||||
pub fn value_to_xml<W: Write>(mut writer: W, value: &Value) -> Result<(), ErrorKind> {
|
||||
let config = EmitterConfig {
|
||||
perform_indent: true,
|
||||
pad_self_closing: true,
|
||||
|
||||
// Nix uses single-quotes *only* in the document declaration,
|
||||
// so we need to write it manually.
|
||||
write_document_declaration: false,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Write a literal document declaration, using C++-Nix-style
|
||||
// single quotes.
|
||||
writeln!(writer, "<?xml version='1.0' encoding='utf-8'?>")?;
|
||||
|
||||
let mut writer = EventWriter::new_with_config(writer, config);
|
||||
let mut emitter = XmlEmitter::new(writer);
|
||||
|
||||
writer.write(XmlEvent::start_element("expr"))?;
|
||||
value_variant_to_xml(&mut writer, value)?;
|
||||
writer.write(XmlEvent::end_element())?;
|
||||
|
||||
// Unwrap the writer to add the final newline that C++ Nix adds.
|
||||
writeln!(writer.into_inner())?;
|
||||
emitter.write_open_tag("expr", &[])?;
|
||||
value_variant_to_xml(&mut emitter, value)?;
|
||||
emitter.write_closing_tag("expr")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_typed_value<W: Write, V: ToString>(
|
||||
w: &mut EventWriter<W>,
|
||||
name: &str,
|
||||
w: &mut XmlEmitter<W>,
|
||||
name_unescaped: &str,
|
||||
value: V,
|
||||
) -> Result<(), ErrorKind> {
|
||||
w.write(XmlEvent::start_element(name).attr("value", &value.to_string()))?;
|
||||
w.write(XmlEvent::end_element())?;
|
||||
w.write_self_closing_tag(name_unescaped, &[("value", &value.to_string())])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Result<(), ErrorKind> {
|
||||
fn value_variant_to_xml<W: Write>(w: &mut XmlEmitter<W>, value: &Value) -> Result<(), ErrorKind> {
|
||||
match value {
|
||||
Value::Thunk(t) => return value_variant_to_xml(w, &t.value()),
|
||||
|
||||
Value::Null => {
|
||||
w.write(XmlEvent::start_element("null"))?;
|
||||
w.write(XmlEvent::end_element())
|
||||
w.write_open_tag("null", &[])?;
|
||||
w.write_closing_tag("null")?;
|
||||
}
|
||||
|
||||
Value::Bool(b) => return write_typed_value(w, "bool", b),
|
||||
|
@ -65,50 +49,46 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu
|
|||
Value::Path(p) => return write_typed_value(w, "path", p.to_string_lossy()),
|
||||
|
||||
Value::List(list) => {
|
||||
w.write(XmlEvent::start_element("list"))?;
|
||||
w.write_open_tag("list", &[])?;
|
||||
|
||||
for elem in list.into_iter() {
|
||||
value_variant_to_xml(w, elem)?;
|
||||
}
|
||||
|
||||
w.write(XmlEvent::end_element())
|
||||
w.write_closing_tag("list")?;
|
||||
}
|
||||
|
||||
Value::Attrs(attrs) => {
|
||||
w.write(XmlEvent::start_element("attrs"))?;
|
||||
w.write_open_tag("attrs", &[])?;
|
||||
|
||||
for elem in attrs.iter() {
|
||||
w.write(XmlEvent::start_element("attr").attr("name", &elem.0.to_str_lossy()))?;
|
||||
w.write_open_tag("attr", &[("name", &elem.0.to_str_lossy())])?;
|
||||
value_variant_to_xml(w, elem.1)?;
|
||||
w.write(XmlEvent::end_element())?;
|
||||
w.write_closing_tag("attr")?;
|
||||
}
|
||||
|
||||
w.write(XmlEvent::end_element())
|
||||
w.write_closing_tag("attrs")?;
|
||||
}
|
||||
|
||||
Value::Closure(c) => {
|
||||
w.write(XmlEvent::start_element("function"))?;
|
||||
w.write_open_tag("function", &[])?;
|
||||
|
||||
match &c.lambda.formals {
|
||||
Some(formals) => {
|
||||
let mut attrspat = XmlEvent::start_element("attrspat");
|
||||
let mut attrs: Vec<(&str, &str)> = Vec::with_capacity(2);
|
||||
if formals.ellipsis {
|
||||
attrspat = attrspat.attr("ellipsis", "1");
|
||||
attrs.push(("ellipsis", "1"));
|
||||
}
|
||||
if let Some(ref name) = &formals.name {
|
||||
attrspat = attrspat.attr("name", name.as_str());
|
||||
attrs.push(("name", name.as_str()));
|
||||
}
|
||||
|
||||
w.write(attrspat)?;
|
||||
|
||||
w.write_open_tag("attrspat", &attrs)?;
|
||||
for arg in formals.arguments.iter() {
|
||||
w.write(
|
||||
XmlEvent::start_element("attr").attr("name", &arg.0.to_str_lossy()),
|
||||
)?;
|
||||
w.write(XmlEvent::end_element())?;
|
||||
w.write_self_closing_tag("attr", &[("name", &arg.0.to_str_lossy())])?;
|
||||
}
|
||||
|
||||
w.write(XmlEvent::end_element())?;
|
||||
w.write_closing_tag("attrspat")?;
|
||||
}
|
||||
None => {
|
||||
// TODO(tazjin): tvix does not currently persist function
|
||||
|
@ -120,17 +100,16 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu
|
|||
// If we don't want to persist the data, we can re-parse the
|
||||
// AST from the spans of the lambda's bytecode and figure it
|
||||
// out that way, but it needs some investigating.
|
||||
w.write(XmlEvent::start_element("varpat").attr("name", /* fake: */ "x"))?;
|
||||
w.write(XmlEvent::end_element())?;
|
||||
w.write_self_closing_tag("varpat", &[("name", /* fake: */ "x")])?;
|
||||
}
|
||||
}
|
||||
|
||||
w.write(XmlEvent::end_element())
|
||||
w.write_closing_tag("function")?;
|
||||
}
|
||||
|
||||
Value::Builtin(_) => {
|
||||
w.write(XmlEvent::start_element("unevaluated"))?;
|
||||
w.write(XmlEvent::end_element())
|
||||
w.write_open_tag("unevaluated", &[])?;
|
||||
w.write_closing_tag("unevaluated")?;
|
||||
}
|
||||
|
||||
Value::AttrNotFound
|
||||
|
@ -148,7 +127,174 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu
|
|||
Value::Catchable(_) => {
|
||||
panic!("tvix bug: value_to_xml() called on a value which had not been deep-forced")
|
||||
}
|
||||
}?;
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// A simple-stupid XML emitter, which implements only the subset needed for byte-by-byte compat with C++ nix’ `builtins.toXML`.
|
||||
struct XmlEmitter<W> {
|
||||
/// The current indentation
|
||||
cur_indent: usize,
|
||||
writer: W,
|
||||
}
|
||||
|
||||
impl<W: Write> XmlEmitter<W> {
|
||||
pub fn new(writer: W) -> Self {
|
||||
XmlEmitter {
|
||||
cur_indent: 0,
|
||||
writer,
|
||||
}
|
||||
}
|
||||
|
||||
/// Write an open tag with the given name (which is not escaped!)
|
||||
/// and attributes (Keys are not escaped! Only attribute values are.)
|
||||
pub fn write_open_tag(
|
||||
&mut self,
|
||||
name_unescaped: &str,
|
||||
attrs: &[(&str, &str)],
|
||||
) -> std::io::Result<()> {
|
||||
self.add_indent()?;
|
||||
self.writer.write_all(b"<")?;
|
||||
self.writer.write_all(name_unescaped.as_bytes())?;
|
||||
self.write_attrs_escape_vals(attrs)?;
|
||||
self.writer.write_all(b">\n")?;
|
||||
self.cur_indent += 2;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write a self-closing open tag with the given name (which is not escaped!)
|
||||
/// and attributes (Keys are not escaped! Only attribute values are.)
|
||||
pub fn write_self_closing_tag(
|
||||
&mut self,
|
||||
name_unescaped: &str,
|
||||
attrs: &[(&str, &str)],
|
||||
) -> std::io::Result<()> {
|
||||
self.add_indent()?;
|
||||
self.writer.write_all(b"<")?;
|
||||
self.writer.write_all(name_unescaped.as_bytes())?;
|
||||
self.write_attrs_escape_vals(attrs)?;
|
||||
self.writer.write_all(b" />\n")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write a closing tag with the given name (which is not escaped!)
|
||||
pub fn write_closing_tag(&mut self, name_unescaped: &str) -> std::io::Result<()> {
|
||||
self.cur_indent -= 2;
|
||||
self.add_indent()?;
|
||||
self.writer.write_all(b"</")?;
|
||||
self.writer.write_all(name_unescaped.as_bytes())?;
|
||||
self.writer.write_all(b">\n")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn add_indent(&mut self) -> std::io::Result<()> {
|
||||
self.writer.write_all(&b" ".repeat(self.cur_indent))
|
||||
}
|
||||
|
||||
/// Write an attribute list
|
||||
fn write_attrs_escape_vals(&mut self, attrs: &[(&str, &str)]) -> std::io::Result<()> {
|
||||
for (name, val) in attrs {
|
||||
self.writer.write_all(b" ")?;
|
||||
self.writer.write_all(name.as_bytes())?;
|
||||
self.writer.write_all(br#"=""#)?;
|
||||
self.writer
|
||||
.write_all(Self::escape_attr_value(val).as_bytes())?;
|
||||
self.writer.write_all(b"\"")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Escape the given attribute value, making sure we only actually clone the string if we needed to replace something.
|
||||
fn escape_attr_value(s: &str) -> Cow<str> {
|
||||
let mut last_escape: usize = 0;
|
||||
let mut res: Cow<str> = Cow::Borrowed("");
|
||||
// iterating via char_indices gives us the ability to index the original string slice at character boundaries
|
||||
for (idx, c) in s.char_indices() {
|
||||
match Self::should_escape_char(c) {
|
||||
None => {}
|
||||
Some(new) => {
|
||||
// add characters since the last escape we did
|
||||
res += &s[last_escape..idx];
|
||||
// add the escaped value
|
||||
res += new;
|
||||
last_escape = idx + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// we did not need to escape anything, so borrow original string
|
||||
if last_escape == 0 {
|
||||
Cow::Borrowed(s)
|
||||
} else {
|
||||
// add the remaining characters
|
||||
res += &s[last_escape..];
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
fn should_escape_char(c: char) -> Option<&'static str> {
|
||||
match c {
|
||||
'<' => Some("<"),
|
||||
'>' => Some(">"),
|
||||
'"' => Some("""),
|
||||
'\'' => Some("'"),
|
||||
'&' => Some("&"),
|
||||
'\n' => Some("
"),
|
||||
'\r' => Some("
"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use bytes::buf::Writer;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use crate::builtins::to_xml::XmlEmitter;
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[test]
|
||||
fn xml_gen() {
|
||||
let mut buf = Vec::new();
|
||||
let mut x = XmlEmitter::new(&mut buf);
|
||||
x.write_open_tag("hello", &[("hi", "it’s me"), ("no", "<escape>")])
|
||||
.unwrap();
|
||||
x.write_self_closing_tag("self-closing", &[("tag", "yay")])
|
||||
.unwrap();
|
||||
x.write_closing_tag("hello").unwrap();
|
||||
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&buf).unwrap(),
|
||||
r##"<hello hi="it’s me" no="<escape>">
|
||||
<self-closing tag="yay" />
|
||||
</hello>
|
||||
"##
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xml_escape() {
|
||||
match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("ab<>c&de") {
|
||||
Cow::Owned(s) => assert_eq!(s, "ab<>c&de".to_string(), "escape stuff"),
|
||||
Cow::Borrowed(s) => panic!("s should be owned {}", s),
|
||||
}
|
||||
match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("") {
|
||||
Cow::Borrowed(s) => assert_eq!(s, "", "empty escape is borrowed"),
|
||||
Cow::Owned(s) => panic!("s should be borrowed {}", s),
|
||||
}
|
||||
match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!ŷbla") {
|
||||
Cow::Borrowed(s) => assert_eq!(s, "hi!ŷbla", "no escape is borrowed"),
|
||||
Cow::Owned(s) => panic!("s should be borrowed {}", s),
|
||||
}
|
||||
match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!<ŷ>bla") {
|
||||
Cow::Owned(s) => assert_eq!(
|
||||
s,
|
||||
"hi!<ŷ>bla".to_string(),
|
||||
"multi-byte chars are correctly used"
|
||||
),
|
||||
Cow::Borrowed(s) => panic!("s should be owned {}", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,6 @@ use std::{fmt::Debug, fmt::Display, num::ParseIntError};
|
|||
use codemap::{File, Span};
|
||||
use codemap_diagnostic::{ColorConfig, Diagnostic, Emitter, Level, SpanLabel, SpanStyle};
|
||||
use smol_str::SmolStr;
|
||||
use xml::writer::Error as XmlError;
|
||||
|
||||
use crate::spans::ToSpan;
|
||||
use crate::value::{CoercionKind, NixString};
|
||||
|
@ -194,9 +193,6 @@ pub enum ErrorKind {
|
|||
/// Invalid UTF-8 was encoutered somewhere
|
||||
Utf8,
|
||||
|
||||
/// Errors while serialising to XML.
|
||||
Xml(Rc<XmlError>),
|
||||
|
||||
/// Variant for errors that bubble up to eval from other Tvix
|
||||
/// components.
|
||||
TvixError(Rc<dyn error::Error>),
|
||||
|
@ -248,7 +244,6 @@ impl error::Error for Error {
|
|||
errors.first().map(|e| e as &dyn error::Error)
|
||||
}
|
||||
ErrorKind::IO { error, .. } => Some(error.as_ref()),
|
||||
ErrorKind::Xml(error) => Some(error.as_ref()),
|
||||
ErrorKind::TvixError(error) => Some(error.as_ref()),
|
||||
_ => None,
|
||||
}
|
||||
|
@ -285,12 +280,6 @@ impl From<bstr::FromUtf8Error> for ErrorKind {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<XmlError> for ErrorKind {
|
||||
fn from(err: XmlError) -> Self {
|
||||
Self::Xml(Rc::new(err))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for ErrorKind {
|
||||
fn from(e: io::Error) -> Self {
|
||||
ErrorKind::IO {
|
||||
|
@ -506,8 +495,6 @@ to a missing value in the attribute set(s) included via `with`."#,
|
|||
write!(f, "Invalid UTF-8 in string")
|
||||
}
|
||||
|
||||
ErrorKind::Xml(error) => write!(f, "failed to serialise to XML: {error}"),
|
||||
|
||||
ErrorKind::TvixError(inner_error) => {
|
||||
write!(f, "{inner_error}")
|
||||
}
|
||||
|
@ -823,7 +810,6 @@ impl Error {
|
|||
| ErrorKind::JsonError(_)
|
||||
| ErrorKind::NotSerialisableToJson(_)
|
||||
| ErrorKind::FromTomlError(_)
|
||||
| ErrorKind::Xml(_)
|
||||
| ErrorKind::Utf8
|
||||
| ErrorKind::TvixError(_)
|
||||
| ErrorKind::TvixBug { .. }
|
||||
|
@ -870,7 +856,6 @@ impl Error {
|
|||
ErrorKind::UnexpectedArgument { .. } => "E031",
|
||||
ErrorKind::RelativePathResolution(_) => "E032",
|
||||
ErrorKind::DivisionByZero => "E033",
|
||||
ErrorKind::Xml(_) => "E034",
|
||||
ErrorKind::FromTomlError(_) => "E035",
|
||||
ErrorKind::NotSerialisableToJson(_) => "E036",
|
||||
ErrorKind::UnexpectedContext => "E037",
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<expr>
|
||||
<attrs>
|
||||
</attrs>
|
||||
</expr>
|
1
tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.nix
Normal file
1
tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.nix
Normal file
|
@ -0,0 +1 @@
|
|||
{ }
|
1
tvix/eval/src/tests/tvix_tests/eval-okay-toxml.exp
Normal file
1
tvix/eval/src/tests/tvix_tests/eval-okay-toxml.exp
Normal file
|
@ -0,0 +1 @@
|
|||
"<?xml version='1.0' encoding='utf-8'?>\n<expr>\n <attrs>\n <attr name=\"&-{\">\n <string value=\";&"\" />\n </attr>\n <attr name=\"a\">\n <string value=\"s\" />\n </attr>\n </attrs>\n</expr>\n"
|
2
tvix/eval/src/tests/tvix_tests/eval-okay-toxml.nix
Normal file
2
tvix/eval/src/tests/tvix_tests/eval-okay-toxml.nix
Normal file
|
@ -0,0 +1,2 @@
|
|||
# Check some corner cases regarding escaping.
|
||||
builtins.toXML { a = "s"; "&-{" = ";&\""; }
|
Loading…
Reference in a new issue