XmlEmitter gains a NixContext field, and `write_typed_value` extends it with all context elements present in the passed value. Once all serialization is done, a into_context() function returns the collected context, so we can construct a NixString with context. Tests for this live in tvix-glue, as we use builtins.derivation, which is not present in the tvix-eval crate. Fixes b/398. Change-Id: I85feaaa17b753885f8a017a54e419ec4e602af21 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11704 Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de> Autosubmit: flokli <flokli@flokli.de> Reviewed-by: Alyssa Ross <hi@alyssa.is>
321 lines
11 KiB
Rust
321 lines
11 KiB
Rust
//! This module implements `builtins.toXML`, which is a serialisation
|
||
//! of value information as well as internal tvix state that several
|
||
//! things in nixpkgs rely on.
|
||
|
||
use bstr::ByteSlice;
|
||
use std::borrow::Cow;
|
||
use std::{io::Write, rc::Rc};
|
||
|
||
use crate::{ErrorKind, NixContext, NixContextElement, Value};
|
||
|
||
/// Recursively serialise a value to XML. The value *must* have been
|
||
/// deep-forced before being passed to this function.
|
||
/// On success, returns the NixContext.
|
||
pub fn value_to_xml<W: Write>(mut writer: W, value: &Value) -> Result<NixContext, ErrorKind> {
|
||
// Write a literal document declaration, using C++-Nix-style
|
||
// single quotes.
|
||
writeln!(writer, "<?xml version='1.0' encoding='utf-8'?>")?;
|
||
|
||
let mut emitter = XmlEmitter::new(writer);
|
||
|
||
emitter.write_open_tag("expr", &[])?;
|
||
value_variant_to_xml(&mut emitter, value)?;
|
||
emitter.write_closing_tag("expr")?;
|
||
|
||
Ok(emitter.into_context())
|
||
}
|
||
|
||
fn write_typed_value<W: Write, V: ToString>(
|
||
w: &mut XmlEmitter<W>,
|
||
name_unescaped: &str,
|
||
value: V,
|
||
) -> Result<(), ErrorKind> {
|
||
w.write_self_closing_tag(name_unescaped, &[("value", &value.to_string())])?;
|
||
Ok(())
|
||
}
|
||
|
||
fn value_variant_to_xml<W: Write>(w: &mut XmlEmitter<W>, value: &Value) -> Result<(), ErrorKind> {
|
||
match value {
|
||
Value::Thunk(t) => return value_variant_to_xml(w, &t.value()),
|
||
|
||
Value::Null => {
|
||
w.write_open_tag("null", &[])?;
|
||
w.write_closing_tag("null")?;
|
||
}
|
||
|
||
Value::Bool(b) => return write_typed_value(w, "bool", b),
|
||
Value::Integer(i) => return write_typed_value(w, "int", i),
|
||
Value::Float(f) => return write_typed_value(w, "float", f),
|
||
Value::String(s) => {
|
||
if let Some(context) = s.context() {
|
||
w.extend_context(context.iter().cloned());
|
||
}
|
||
return write_typed_value(w, "string", s.to_str()?);
|
||
}
|
||
Value::Path(p) => return write_typed_value(w, "path", p.to_string_lossy()),
|
||
|
||
Value::List(list) => {
|
||
w.write_open_tag("list", &[])?;
|
||
|
||
for elem in list.into_iter() {
|
||
value_variant_to_xml(w, elem)?;
|
||
}
|
||
|
||
w.write_closing_tag("list")?;
|
||
}
|
||
|
||
Value::Attrs(attrs) => {
|
||
w.write_open_tag("attrs", &[])?;
|
||
|
||
for elem in attrs.iter() {
|
||
w.write_open_tag("attr", &[("name", &elem.0.to_str_lossy())])?;
|
||
value_variant_to_xml(w, elem.1)?;
|
||
w.write_closing_tag("attr")?;
|
||
}
|
||
|
||
w.write_closing_tag("attrs")?;
|
||
}
|
||
|
||
Value::Closure(c) => {
|
||
w.write_open_tag("function", &[])?;
|
||
|
||
match &c.lambda.formals {
|
||
Some(formals) => {
|
||
let mut attrs: Vec<(&str, &str)> = Vec::with_capacity(2);
|
||
if formals.ellipsis {
|
||
attrs.push(("ellipsis", "1"));
|
||
}
|
||
if let Some(ref name) = &formals.name {
|
||
attrs.push(("name", name.as_str()));
|
||
}
|
||
|
||
w.write_open_tag("attrspat", &attrs)?;
|
||
for arg in formals.arguments.iter() {
|
||
w.write_self_closing_tag("attr", &[("name", &arg.0.to_str_lossy())])?;
|
||
}
|
||
|
||
w.write_closing_tag("attrspat")?;
|
||
}
|
||
None => {
|
||
// TODO(tazjin): tvix does not currently persist function
|
||
// argument names anywhere (whereas we do for formals, as
|
||
// that is required for other runtime behaviour). Because of
|
||
// this the implementation here is fake, always returning
|
||
// the same argument name.
|
||
//
|
||
// If we don't want to persist the data, we can re-parse the
|
||
// AST from the spans of the lambda's bytecode and figure it
|
||
// out that way, but it needs some investigating.
|
||
w.write_self_closing_tag("varpat", &[("name", /* fake: */ "x")])?;
|
||
}
|
||
}
|
||
|
||
w.write_closing_tag("function")?;
|
||
}
|
||
|
||
Value::Builtin(_) => {
|
||
w.write_open_tag("unevaluated", &[])?;
|
||
w.write_closing_tag("unevaluated")?;
|
||
}
|
||
|
||
Value::AttrNotFound
|
||
| Value::Blueprint(_)
|
||
| Value::DeferredUpvalue(_)
|
||
| Value::UnresolvedPath(_)
|
||
| Value::Json(..)
|
||
| Value::FinaliseRequest(_) => {
|
||
return Err(ErrorKind::TvixBug {
|
||
msg: "internal value variant encountered in builtins.toXML",
|
||
metadata: Some(Rc::new(value.clone())),
|
||
})
|
||
}
|
||
|
||
Value::Catchable(_) => {
|
||
panic!("tvix bug: value_to_xml() called on a value which had not been deep-forced")
|
||
}
|
||
};
|
||
|
||
Ok(())
|
||
}
|
||
|
||
/// A simple-stupid XML emitter, which implements only the subset needed for byte-by-byte compat with C++ nix’ `builtins.toXML`.
|
||
struct XmlEmitter<W> {
|
||
/// The current indentation
|
||
cur_indent: usize,
|
||
writer: W,
|
||
context: NixContext,
|
||
}
|
||
|
||
impl<W: Write> XmlEmitter<W> {
|
||
pub fn new(writer: W) -> Self {
|
||
XmlEmitter {
|
||
cur_indent: 0,
|
||
writer,
|
||
context: Default::default(),
|
||
}
|
||
}
|
||
|
||
/// Write an open tag with the given name (which is not escaped!)
|
||
/// and attributes (Keys are not escaped! Only attribute values are.)
|
||
pub fn write_open_tag(
|
||
&mut self,
|
||
name_unescaped: &str,
|
||
attrs: &[(&str, &str)],
|
||
) -> std::io::Result<()> {
|
||
self.add_indent()?;
|
||
self.writer.write_all(b"<")?;
|
||
self.writer.write_all(name_unescaped.as_bytes())?;
|
||
self.write_attrs_escape_vals(attrs)?;
|
||
self.writer.write_all(b">\n")?;
|
||
self.cur_indent += 2;
|
||
Ok(())
|
||
}
|
||
|
||
/// Write a self-closing open tag with the given name (which is not escaped!)
|
||
/// and attributes (Keys are not escaped! Only attribute values are.)
|
||
pub fn write_self_closing_tag(
|
||
&mut self,
|
||
name_unescaped: &str,
|
||
attrs: &[(&str, &str)],
|
||
) -> std::io::Result<()> {
|
||
self.add_indent()?;
|
||
self.writer.write_all(b"<")?;
|
||
self.writer.write_all(name_unescaped.as_bytes())?;
|
||
self.write_attrs_escape_vals(attrs)?;
|
||
self.writer.write_all(b" />\n")?;
|
||
Ok(())
|
||
}
|
||
|
||
/// Write a closing tag with the given name (which is not escaped!)
|
||
pub fn write_closing_tag(&mut self, name_unescaped: &str) -> std::io::Result<()> {
|
||
self.cur_indent -= 2;
|
||
self.add_indent()?;
|
||
self.writer.write_all(b"</")?;
|
||
self.writer.write_all(name_unescaped.as_bytes())?;
|
||
self.writer.write_all(b">\n")?;
|
||
Ok(())
|
||
}
|
||
|
||
#[inline]
|
||
fn add_indent(&mut self) -> std::io::Result<()> {
|
||
self.writer.write_all(&b" ".repeat(self.cur_indent))
|
||
}
|
||
|
||
/// Write an attribute list
|
||
fn write_attrs_escape_vals(&mut self, attrs: &[(&str, &str)]) -> std::io::Result<()> {
|
||
for (name, val) in attrs {
|
||
self.writer.write_all(b" ")?;
|
||
self.writer.write_all(name.as_bytes())?;
|
||
self.writer.write_all(br#"=""#)?;
|
||
self.writer
|
||
.write_all(Self::escape_attr_value(val).as_bytes())?;
|
||
self.writer.write_all(b"\"")?;
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
/// Escape the given attribute value, making sure we only actually clone the string if we needed to replace something.
|
||
fn escape_attr_value(s: &str) -> Cow<str> {
|
||
let mut last_escape: usize = 0;
|
||
let mut res: Cow<str> = Cow::Borrowed("");
|
||
// iterating via char_indices gives us the ability to index the original string slice at character boundaries
|
||
for (idx, c) in s.char_indices() {
|
||
match Self::should_escape_char(c) {
|
||
None => {}
|
||
Some(new) => {
|
||
// add characters since the last escape we did
|
||
res += &s[last_escape..idx];
|
||
// add the escaped value
|
||
res += new;
|
||
last_escape = idx + 1;
|
||
}
|
||
}
|
||
}
|
||
// we did not need to escape anything, so borrow original string
|
||
if last_escape == 0 {
|
||
Cow::Borrowed(s)
|
||
} else {
|
||
// add the remaining characters
|
||
res += &s[last_escape..];
|
||
res
|
||
}
|
||
}
|
||
|
||
fn should_escape_char(c: char) -> Option<&'static str> {
|
||
match c {
|
||
'<' => Some("<"),
|
||
'>' => Some(">"),
|
||
'"' => Some("""),
|
||
'\'' => Some("'"),
|
||
'&' => Some("&"),
|
||
'\n' => Some("
"),
|
||
'\r' => Some("
"),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
/// Extends the existing context with more context elements.
|
||
fn extend_context<T>(&mut self, iter: T)
|
||
where
|
||
T: IntoIterator<Item = NixContextElement>,
|
||
{
|
||
self.context.extend(iter)
|
||
}
|
||
|
||
/// Consumes [Self] and returns the [NixContext] collected.
|
||
fn into_context(self) -> NixContext {
|
||
self.context
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use bytes::buf::Writer;
|
||
use pretty_assertions::assert_eq;
|
||
|
||
use crate::builtins::to_xml::XmlEmitter;
|
||
use std::borrow::Cow;
|
||
|
||
#[test]
|
||
fn xml_gen() {
|
||
let mut buf = Vec::new();
|
||
let mut x = XmlEmitter::new(&mut buf);
|
||
x.write_open_tag("hello", &[("hi", "it’s me"), ("no", "<escape>")])
|
||
.unwrap();
|
||
x.write_self_closing_tag("self-closing", &[("tag", "yay")])
|
||
.unwrap();
|
||
x.write_closing_tag("hello").unwrap();
|
||
|
||
assert_eq!(
|
||
std::str::from_utf8(&buf).unwrap(),
|
||
r##"<hello hi="it’s me" no="<escape>">
|
||
<self-closing tag="yay" />
|
||
</hello>
|
||
"##
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn xml_escape() {
|
||
match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("ab<>c&de") {
|
||
Cow::Owned(s) => assert_eq!(s, "ab<>c&de".to_string(), "escape stuff"),
|
||
Cow::Borrowed(s) => panic!("s should be owned {}", s),
|
||
}
|
||
match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("") {
|
||
Cow::Borrowed(s) => assert_eq!(s, "", "empty escape is borrowed"),
|
||
Cow::Owned(s) => panic!("s should be borrowed {}", s),
|
||
}
|
||
match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!ŷbla") {
|
||
Cow::Borrowed(s) => assert_eq!(s, "hi!ŷbla", "no escape is borrowed"),
|
||
Cow::Owned(s) => panic!("s should be borrowed {}", s),
|
||
}
|
||
match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!<ŷ>bla") {
|
||
Cow::Owned(s) => assert_eq!(
|
||
s,
|
||
"hi!<ŷ>bla".to_string(),
|
||
"multi-byte chars are correctly used"
|
||
),
|
||
Cow::Borrowed(s) => panic!("s should be owned {}", s),
|
||
}
|
||
}
|
||
}
|