tvl-depot/users/sterni/nix/utf8/tests/default.nix
Profpatsch eb41eef612 chore(nix): move rustSimple from users.Profpatsch.writers
I think it’s solid enough to use in a wider context.

Change-Id: If53e8bbb6b90fa88d73fb42730db470e822ea182
Reviewed-on: https://cl.tvl.fyi/c/depot/+/3055
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
Reviewed-by: lukegb <lukegb@tvl.fyi>
2021-04-24 10:23:55 +00:00

121 lines
4 KiB
Nix
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{ depot, pkgs, lib, ... }:
let
inherit (pkgs)
runCommandLocal
;
inherit (depot.nix.runTestsuite)
runTestsuite
it
assertEq
assertThrows
assertDoesNotThrow
;
inherit (depot.nix.writers)
rustSimple
;
inherit (depot.users.sterni.nix)
int
utf8
string
char
;
rustDecoder = rustSimple {
name = "utf8-decode";
} ''
use std::io::{self, Read};
fn main() -> std::io::Result<()> {
let mut buffer = String::new();
io::stdin().read_to_string(&mut buffer)?;
print!("[ ");
for c in buffer.chars() {
print!("{} ", u32::from(c));
}
print!("]");
Ok(())
}
'';
rustDecode = s:
let
expr = runCommandLocal "${s}-decoded" {} ''
printf '%s' ${lib.escapeShellArg s} | ${rustDecoder} > $out
'';
in import expr;
hexDecode = l:
utf8.decode (string.fromBytes (builtins.map int.fromHex l));
testFailures = it "checks UTF-8 decoding failures" [
(assertThrows "emtpy bytestring throws" (utf8.decode ""))
(assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ]))
# examples from The Unicode Standard
(assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ]))
(assertThrows "ill-formed: E0 9F 80" (hexDecode [ "E0" "9F" "80" ]))
(assertEq "well-formed: F4 80 83 92" (hexDecode [ "F4" "80" "83" "92" ]) [ 1048786 ])
];
testAscii = it "checks decoding of ascii strings"
(builtins.map (s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\""
(string.toBytes s) (utf8.decode s)) [
"foo bar"
"hello\nworld"
"carriage\r\nreturn"
"1238398494829304 []<><>({})[]!!)"
(string.take 127 char.allChars)
]);
randomUnicode = [
"🥰👨👨👧👦🐈👩🏽🦰"
# https://kermitproject.org/utf8.html
""
"An preost wes on leoden, Laȝamon was ihoten"
"Sîne klâwen durh die wolken sint geslagen,"
"Τ γλσσα μο δωσαν λληνικ"
"На берегу пустынных волн"
" "
"ி ிி ிி ி , "
" ಿ ಿ "
];
# https://kermitproject.org/utf8.html
glassSentences = [
"Euro Symbol: ."
"Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα."
"Íslenska / Icelandic: Ég get etið gler án þess að meiða mig."
"Polish: Mogę jeść szkło, i mi nie szkodzi."
"Romanian: Pot să mănânc sticlă și ea nu mă rănește."
"Ukrainian: Я можу їсти шкло, й воно мені не пошкодить."
"Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։"
"Georgian: ."
"Hindi: , ."
"Hebrew(2): אני יכול לאכול זכוכית וזה לא מזיק לי."
"Yiddish(2): איך קען עסן גלאָז און עס טוט מיר נישט װײ."
"Arabic(2): أنا قادر على أكل الزجاج و هذا لا يؤلمني."
"Japanese: "
"Thai: "
];
testDecoding = it "checks decoding of UTF-8 strings against Rust's String"
(builtins.map
(s: assertEq "Decoding of ${s} is correct" (utf8.decode s) (rustDecode s))
(lib.flatten [
glassSentences
randomUnicode
]));
in
runTestsuite "nix.utf8" [
testFailures
testAscii
testDecoding
]