feat(tvix/eval): use lexical-core to format float

Apparently our naive implementation of float formatting, which simply
used {:.5}, and trimmed trailing "0" strings not sufficient.

It wrongly trimmed numbers with zeroes but no decimal point, like
`10000` got trimmed to `1`.

Nix uses `std::to_string` on the double, which according to
https://en.cppreference.com/w/cpp/string/basic_string/to_string
is equivalent to `std::sprintf(buf, "%f", value)`.

https://en.cppreference.com/w/cpp/io/c/fprintf mentions this is treated
like this:

> Precision specifies the exact number of digits to appear after
> the decimal point character. The default precision is 6. In the
> alternative implementation decimal point character is written even if
> no digits follow it. For infinity and not-a-number conversion style
> see notes.

This doesn't seem to be the case though, and Nix uses scientific
notation in some cases.

There's a whole bunch of strategies to determine which is a more compact
notation, and which notation should be used for a given number.
https://github.com/rust-lang/rust/issues/24556 provides some pointers
into various rabbit holes for those interested.

This gist seems to be that currently a different formatting is not
exposed in rust directly, at least not for public consumption.

There is the
[lexical-core](https://github.com/Alexhuszagh/rust-lexical) crate
though, which provides a way to format floats with various strategies
and formats.

Change our implementation of `TotalDisplay` for the `Value::Float` case
to use that. We still need to do some post-processing, because Nix
always adds the sign in scientific notation (and there's no way to
configure lexical-core to do that), and lexical-core in some cases keeps
the trailing zeros.

Even with all that in place, there as a difference in `eval-okay-
fromjson.nix` (from tvix-tests), which I couldn't get to work. I updated
the fixture to a less problematic number.

With this, the testsuite passes again, and does for the upcoming CL
introducing builtins.fromTOML, and enabling the nix testsuite bits for
it, too.

Change-Id: Ie6fba5619e1d9fd7ce669a51594658b029057acc
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7922
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: tazjin <tazjin@tvl.su>
This commit is contained in:
Florian Klink 2023-01-24 19:27:20 +01:00 committed by clbot
parent 192dac5a74
commit 1facd889bb
7 changed files with 483 additions and 4 deletions

View file

@ -333,6 +333,70 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lexical-core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46"
dependencies = [
"lexical-parse-float",
"lexical-parse-integer",
"lexical-util",
"lexical-write-float",
"lexical-write-integer",
]
[[package]]
name = "lexical-parse-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
dependencies = [
"lexical-parse-integer",
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-parse-integer"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-util"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc"
dependencies = [
"static_assertions",
]
[[package]]
name = "lexical-write-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862"
dependencies = [
"lexical-util",
"lexical-write-integer",
"static_assertions",
]
[[package]]
name = "lexical-write-integer"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "libc"
version = "0.2.134"
@ -594,6 +658,12 @@ dependencies = [
"serde",
]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.101"
@ -657,6 +727,8 @@ dependencies = [
"codemap-diagnostic",
"dirs",
"imbl",
"lazy_static",
"lexical-core",
"path-clean",
"regex",
"rnix",

72
tvix/Cargo.lock generated
View file

@ -1036,6 +1036,70 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lexical-core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46"
dependencies = [
"lexical-parse-float",
"lexical-parse-integer",
"lexical-util",
"lexical-write-float",
"lexical-write-integer",
]
[[package]]
name = "lexical-parse-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
dependencies = [
"lexical-parse-integer",
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-parse-integer"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "lexical-util"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc"
dependencies = [
"static_assertions",
]
[[package]]
name = "lexical-write-float"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862"
dependencies = [
"lexical-util",
"lexical-write-integer",
"static_assertions",
]
[[package]]
name = "lexical-write-integer"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446"
dependencies = [
"lexical-util",
"static_assertions",
]
[[package]]
name = "libc"
version = "0.2.137"
@ -1900,6 +1964,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "str-buf"
version = "1.0.6"
@ -2410,6 +2480,8 @@ dependencies = [
"dirs",
"imbl",
"itertools",
"lazy_static",
"lexical-core",
"path-clean",
"pretty_assertions",
"proptest",

View file

@ -2997,6 +2997,236 @@ rec {
"spin_no_std" = [ "spin" ];
};
};
"lexical-core" = rec {
crateName = "lexical-core";
version = "0.8.5";
edition = "2018";
sha256 = "0ihf0x3vrk25fq3bv9q35m0xax0wmvwkh0j0pjm2yk4ddvh5vpic";
authors = [
"Alex Huszagh <ahuszagh@gmail.com>"
];
dependencies = [
{
name = "lexical-parse-float";
packageId = "lexical-parse-float";
optional = true;
usesDefaultFeatures = false;
}
{
name = "lexical-parse-integer";
packageId = "lexical-parse-integer";
optional = true;
usesDefaultFeatures = false;
}
{
name = "lexical-util";
packageId = "lexical-util";
usesDefaultFeatures = false;
}
{
name = "lexical-write-float";
packageId = "lexical-write-float";
optional = true;
usesDefaultFeatures = false;
}
{
name = "lexical-write-integer";
packageId = "lexical-write-integer";
optional = true;
usesDefaultFeatures = false;
}
];
features = {
"compact" = [ "lexical-write-integer/compact" "lexical-write-float/compact" "lexical-parse-integer/compact" "lexical-parse-float/compact" ];
"default" = [ "std" "write-integers" "write-floats" "parse-integers" "parse-floats" ];
"f128" = [ "lexical-util/f128" "lexical-parse-float/f128" "lexical-write-float/f128" ];
"f16" = [ "lexical-util/f16" "lexical-parse-float/f16" "lexical-write-float/f16" ];
"format" = [ "lexical-util/format" "lexical-parse-integer/format" "lexical-parse-float/format" "lexical-write-integer/format" "lexical-write-float/format" ];
"lexical-parse-float" = [ "dep:lexical-parse-float" ];
"lexical-parse-integer" = [ "dep:lexical-parse-integer" ];
"lexical-write-float" = [ "dep:lexical-write-float" ];
"lexical-write-integer" = [ "dep:lexical-write-integer" ];
"lint" = [ "lexical-util/lint" "lexical-write-integer/lint" "lexical-write-float/lint" "lexical-parse-integer/lint" "lexical-parse-float/lint" ];
"nightly" = [ "lexical-write-integer/nightly" "lexical-write-float/nightly" "lexical-parse-integer/nightly" "lexical-parse-float/nightly" ];
"parse-floats" = [ "lexical-parse-float" "parse" "floats" ];
"parse-integers" = [ "lexical-parse-integer" "parse" "integers" ];
"power-of-two" = [ "lexical-util/power-of-two" "lexical-write-integer/power-of-two" "lexical-write-float/power-of-two" "lexical-parse-integer/power-of-two" "lexical-parse-float/power-of-two" ];
"radix" = [ "lexical-util/radix" "lexical-write-integer/radix" "lexical-write-float/radix" "lexical-parse-integer/radix" "lexical-parse-float/radix" ];
"safe" = [ "lexical-write-integer/safe" "lexical-write-float/safe" "lexical-parse-integer/safe" "lexical-parse-float/safe" ];
"std" = [ "lexical-util/std" "lexical-write-integer/std" "lexical-write-float/std" "lexical-parse-integer/std" "lexical-parse-float/std" ];
"write-floats" = [ "lexical-write-float" "write" "floats" ];
"write-integers" = [ "lexical-write-integer" "write" "integers" ];
};
resolvedDefaultFeatures = [ "default" "floats" "format" "integers" "lexical-parse-float" "lexical-parse-integer" "lexical-write-float" "lexical-write-integer" "parse" "parse-floats" "parse-integers" "std" "write" "write-floats" "write-integers" ];
};
"lexical-parse-float" = rec {
crateName = "lexical-parse-float";
version = "0.8.5";
edition = "2018";
sha256 = "0py0gp8hlzcrlvjqmqlpl2v1as65iiqxq2xsabxvhc01pmg3lfv8";
authors = [
"Alex Huszagh <ahuszagh@gmail.com>"
];
dependencies = [
{
name = "lexical-parse-integer";
packageId = "lexical-parse-integer";
usesDefaultFeatures = false;
}
{
name = "lexical-util";
packageId = "lexical-util";
usesDefaultFeatures = false;
features = [ "parse-floats" ];
}
{
name = "static_assertions";
packageId = "static_assertions";
}
];
features = {
"compact" = [ "lexical-util/compact" "lexical-parse-integer/compact" ];
"default" = [ "std" ];
"f128" = [ "lexical-util/f128" ];
"f16" = [ "lexical-util/f16" ];
"format" = [ "lexical-util/format" "lexical-parse-integer/format" ];
"lint" = [ "lexical-util/lint" "lexical-parse-integer/lint" ];
"nightly" = [ "lexical-parse-integer/nightly" ];
"power-of-two" = [ "lexical-util/power-of-two" "lexical-parse-integer/power-of-two" ];
"radix" = [ "lexical-util/radix" "lexical-parse-integer/radix" "power-of-two" ];
"safe" = [ "lexical-parse-integer/safe" ];
"std" = [ "lexical-util/std" "lexical-parse-integer/std" ];
};
resolvedDefaultFeatures = [ "format" "std" ];
};
"lexical-parse-integer" = rec {
crateName = "lexical-parse-integer";
version = "0.8.6";
edition = "2018";
sha256 = "1sayji3mpvb2xsjq56qcq3whfz8px9a6fxk5v7v15hyhbr4982bd";
authors = [
"Alex Huszagh <ahuszagh@gmail.com>"
];
dependencies = [
{
name = "lexical-util";
packageId = "lexical-util";
usesDefaultFeatures = false;
features = [ "parse-integers" ];
}
{
name = "static_assertions";
packageId = "static_assertions";
}
];
features = {
"compact" = [ "lexical-util/compact" ];
"default" = [ "std" ];
"format" = [ "lexical-util/format" ];
"lint" = [ "lexical-util/lint" ];
"power-of-two" = [ "lexical-util/power-of-two" ];
"radix" = [ "lexical-util/radix" "power-of-two" ];
"std" = [ "lexical-util/std" ];
};
resolvedDefaultFeatures = [ "format" "std" ];
};
"lexical-util" = rec {
crateName = "lexical-util";
version = "0.8.5";
edition = "2018";
sha256 = "1z73qkv7yxhsbc4aiginn1dqmsj8jarkrdlyxc88g2gz2vzvjmaj";
authors = [
"Alex Huszagh <ahuszagh@gmail.com>"
];
dependencies = [
{
name = "static_assertions";
packageId = "static_assertions";
}
];
features = {
"default" = [ "std" ];
"f128" = [ "floats" ];
"f16" = [ "floats" ];
"parse-floats" = [ "parse" "floats" ];
"parse-integers" = [ "parse" "integers" ];
"radix" = [ "power-of-two" ];
"write-floats" = [ "write" "floats" ];
"write-integers" = [ "write" "integers" ];
};
resolvedDefaultFeatures = [ "floats" "format" "integers" "parse" "parse-floats" "parse-integers" "std" "write" "write-floats" "write-integers" ];
};
"lexical-write-float" = rec {
crateName = "lexical-write-float";
version = "0.8.5";
edition = "2018";
sha256 = "0qk825l0csvnksh9sywb51996cjc2bylq6rxjaiha7sqqjhvmjmc";
authors = [
"Alex Huszagh <ahuszagh@gmail.com>"
];
dependencies = [
{
name = "lexical-util";
packageId = "lexical-util";
usesDefaultFeatures = false;
features = [ "write-floats" ];
}
{
name = "lexical-write-integer";
packageId = "lexical-write-integer";
usesDefaultFeatures = false;
}
{
name = "static_assertions";
packageId = "static_assertions";
}
];
features = {
"compact" = [ "lexical-util/compact" "lexical-write-integer/compact" ];
"default" = [ "std" ];
"f128" = [ "lexical-util/f128" ];
"f16" = [ "lexical-util/f16" ];
"format" = [ "lexical-util/format" ];
"lint" = [ "lexical-util/lint" "lexical-write-integer/lint" ];
"nightly" = [ "lexical-write-integer/nightly" ];
"power-of-two" = [ "lexical-util/power-of-two" "lexical-write-integer/power-of-two" ];
"radix" = [ "lexical-util/radix" "lexical-write-integer/radix" "power-of-two" ];
"safe" = [ "lexical-write-integer/safe" ];
"std" = [ "lexical-util/std" "lexical-write-integer/std" ];
};
resolvedDefaultFeatures = [ "format" "std" ];
};
"lexical-write-integer" = rec {
crateName = "lexical-write-integer";
version = "0.8.5";
edition = "2018";
sha256 = "0ii4hmvqrg6pd4j9y1pkhkp0nw2wpivjzmljh6v6ca22yk8z7dp1";
authors = [
"Alex Huszagh <ahuszagh@gmail.com>"
];
dependencies = [
{
name = "lexical-util";
packageId = "lexical-util";
usesDefaultFeatures = false;
features = [ "write-integers" ];
}
{
name = "static_assertions";
packageId = "static_assertions";
}
];
features = {
"compact" = [ "lexical-util/compact" ];
"default" = [ "std" ];
"format" = [ "lexical-util/format" ];
"lint" = [ "lexical-util/lint" ];
"power-of-two" = [ "lexical-util/power-of-two" ];
"radix" = [ "lexical-util/radix" "power-of-two" ];
"std" = [ "lexical-util/std" ];
};
resolvedDefaultFeatures = [ "format" "std" ];
};
"libc" = rec {
crateName = "libc";
version = "0.2.137";
@ -5464,6 +5694,16 @@ rec {
features = { };
resolvedDefaultFeatures = [ "all" ];
};
"static_assertions" = rec {
crateName = "static_assertions";
version = "1.1.0";
edition = "2015";
sha256 = "0gsl6xmw10gvn3zs1rv99laj5ig7ylffnh71f9l34js4nr4r7sx2";
authors = [
"Nikolai Vazquez"
];
features = { };
};
"str-buf" = rec {
crateName = "str-buf";
version = "1.0.6";
@ -7176,6 +7416,15 @@ rec {
packageId = "imbl";
features = [ "serde" ];
}
{
name = "lazy_static";
packageId = "lazy_static";
}
{
name = "lexical-core";
packageId = "lexical-core";
features = [ "format" "parse-floats" ];
}
{
name = "path-clean";
packageId = "path-clean";

View file

@ -15,6 +15,8 @@ codemap = "0.1.3"
codemap-diagnostic = "0.1.1"
dirs = "4.0.0"
imbl = { version = "2.0", features = [ "serde" ] }
lazy_static = "1.4.0"
lexical-core = { version = "0.8.5", features = ["format", "parse-floats"] }
path-clean = "0.1"
proptest = { version = "1.0.0", default_features = false, features = ["std", "alloc", "break-dead-code", "tempfile"], optional = true }
regex = "1.6.0"

View file

@ -1 +1 @@
[ { Image = { Animated = false; Height = 600; IDs = [ 116 943 234 38793 true false null -100 ]; Latitude = 37.7668; Longitude = -122.3959; Thumbnail = { Height = 125; Url = "http://www.example.com/image/481989943"; Width = 100; }; Title = "View from 15th Floor"; Width = 800; }; } { name = "a"; value = "b"; } [ 1 2 3 4 ] ]
[ { Image = { Animated = false; Height = 600; IDs = [ 116 943 234 38793 true false null -100 ]; Latitude = 37.7668; Longitude = -122.396; Thumbnail = { Height = 125; Url = "http://www.example.com/image/481989943"; Width = 100; }; Title = "View from 15th Floor"; Width = 800; }; } { name = "a"; value = "b"; } [ 1 2 3 4 ] ]

View file

@ -15,7 +15,7 @@
"Animated" : false,
"IDs": [116, 943, 234, 38793, true ,false,null, -100],
"Latitude": 37.7668,
"Longitude": -122.3959
"Longitude": -122.396
}
}
'')

View file

@ -1,11 +1,13 @@
//! This module implements the backing representation of runtime
//! values in the Nix language.
use std::cmp::Ordering;
use std::num::{NonZeroI32, NonZeroUsize};
use std::ops::Deref;
use std::path::PathBuf;
use std::rc::Rc;
use std::{cell::Ref, fmt::Display};
use lexical_core::format::CXX_LITERAL;
use serde::{Deserialize, Serialize};
#[cfg(feature = "arbitrary")]
@ -32,6 +34,8 @@ pub use thunk::Thunk;
use self::thunk::ThunkSet;
use lazy_static::lazy_static;
#[warn(variant_size_differences)]
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(untagged)]
@ -72,6 +76,17 @@ pub enum Value {
UnresolvedPath(PathBuf),
}
lazy_static! {
static ref WRITE_FLOAT_OPTIONS: lexical_core::WriteFloatOptions =
lexical_core::WriteFloatOptionsBuilder::new()
.trim_floats(true)
.round_mode(lexical_core::write_float_options::RoundMode::Round)
.positive_exponent_break(Some(NonZeroI32::new(5).unwrap()))
.max_significant_digits(Some(NonZeroUsize::new(6).unwrap()))
.build()
.unwrap();
}
// Helper macros to generate the to_*/as_* macros while accounting for
// thunks.
@ -514,9 +529,78 @@ impl TotalDisplay for Value {
Value::Builtin(builtin) => builtin.fmt(f),
// Nix prints floats with a maximum precision of 5 digits
// only.
// only. Except when it decides to use scientific notation
// (with a + after the `e`, and zero-padded to 0 digits)
Value::Float(num) => {
write!(f, "{}", format!("{:.5}", num).trim_end_matches(['.', '0']))
let mut buf = [b'0'; lexical_core::BUFFER_SIZE];
let mut s = lexical_core::write_with_options::<f64, { CXX_LITERAL }>(
num.clone(),
&mut buf,
&WRITE_FLOAT_OPTIONS,
);
// apply some postprocessing on the buffer. If scientific
// notation is used (we see an `e`), and the next character is
// a digit, add the missing `+` sign.)
let mut new_s = Vec::with_capacity(s.len());
if s.contains(&b'e') {
for (i, c) in s.iter().enumerate() {
// encountered `e`
if c == &b'e' {
// next character is a digit (so no negative exponent)
if s.len() > i && s[i + 1].is_ascii_digit() {
// copy everything from the start up to (including) the e
new_s.extend_from_slice(&s[0..=i]);
// add the missing '+'
new_s.push(b'+');
// check for the remaining characters.
// If it's only one, we need to prepend a trailing zero
if s.len() == i + 2 {
new_s.push(b'0');
}
new_s.extend_from_slice(&s[i + 1..]);
break;
}
}
}
// if we modified the scientific notation, flip the reference
if new_s.len() != 0 {
s = &mut new_s
}
}
// else, if this is not scientific notation, and there's a
// decimal point, make sure we really drop trailing zeroes.
// In some cases, lexical_core doesn't.
else if s.contains(&b'.') {
for (i, c) in s.iter().enumerate() {
// at `.``
if c == &b'.' {
// trim zeroes from the right side.
let frac = String::from_utf8_lossy(&s[i + 1..]);
let frac_no_trailing_zeroes = frac.trim_end_matches("0");
if frac.len() != frac_no_trailing_zeroes.len() {
// we managed to strip something, construct new_s
if frac_no_trailing_zeroes.is_empty() {
// if frac_no_trailing_zeroes is empty, the fractional part was all zeroes, so we can drop the decimal point as well
new_s.extend_from_slice(&s[0..=i - 1]);
} else {
// else, assemble the rest of the string
new_s.extend_from_slice(&s[0..=i]);
new_s.extend_from_slice(frac_no_trailing_zeroes.as_bytes());
}
// flip the reference
s = &mut new_s;
break;
}
}
}
}
write!(f, "{}", format!("{}", String::from_utf8_lossy(&s)))
}
// internal types