tvl-depot/users/sterni/nix/utf8/tests/default.nix
sterni 8d4b2f3d54 refactor(sterni): use pkgs over third_party to import from nixpkgs
This should ease migrating to a distinction between depot.third_party
and pkgs (as in nixpkgs) in the future.

Ref cl/2910, b/108.

Change-Id: I53a854071fddd7c0d0526cc4c5b16998202082c6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2913
Tested-by: BuildkiteCI
Reviewed-by: tazjin <mail@tazj.in>
2021-04-10 11:40:18 +00:00

121 lines
4.1 KiB
Nix
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{ depot, pkgs, lib, ... }:
let
inherit (pkgs)
runCommandLocal
;
inherit (depot.nix.runTestsuite)
runTestsuite
it
assertEq
assertThrows
assertDoesNotThrow
;
inherit (depot.users.Profpatsch.writers)
rustSimple
;
inherit (depot.users.sterni.nix)
int
utf8
string
char
;
rustDecoder = rustSimple {
name = "utf8-decode";
} ''
use std::io::{self, Read};
fn main() -> std::io::Result<()> {
let mut buffer = String::new();
io::stdin().read_to_string(&mut buffer)?;
print!("[ ");
for c in buffer.chars() {
print!("{} ", u32::from(c));
}
print!("]");
Ok(())
}
'';
rustDecode = s:
let
expr = runCommandLocal "${s}-decoded" {} ''
printf '%s' ${lib.escapeShellArg s} | ${rustDecoder} > $out
'';
in import expr;
hexDecode = l:
utf8.decode (string.fromBytes (builtins.map int.fromHex l));
testFailures = it "checks UTF-8 decoding failures" [
(assertThrows "emtpy bytestring throws" (utf8.decode ""))
(assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ]))
# examples from The Unicode Standard
(assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ]))
(assertThrows "ill-formed: E0 9F 80" (hexDecode [ "E0" "9F" "80" ]))
(assertEq "well-formed: F4 80 83 92" (hexDecode [ "F4" "80" "83" "92" ]) [ 1048786 ])
];
testAscii = it "checks decoding of ascii strings"
(builtins.map (s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\""
(string.toBytes s) (utf8.decode s)) [
"foo bar"
"hello\nworld"
"carriage\r\nreturn"
"1238398494829304 []<><>({})[]!!)"
(string.take 127 char.allChars)
]);
randomUnicode = [
"🥰👨👨👧👦🐈👩🏽🦰"
# https://kermitproject.org/utf8.html
""
"An preost wes on leoden, Laȝamon was ihoten"
"Sîne klâwen durh die wolken sint geslagen,"
"Τ γλσσα μο δωσαν λληνικ"
"На берегу пустынных волн"
" "
"ி ிி ிி ி , "
" ಿ ಿ "
];
# https://kermitproject.org/utf8.html
glassSentences = [
"Euro Symbol: ."
"Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα."
"Íslenska / Icelandic: Ég get etið gler án þess að meiða mig."
"Polish: Mogę jeść szkło, i mi nie szkodzi."
"Romanian: Pot să mănânc sticlă și ea nu mă rănește."
"Ukrainian: Я можу їсти шкло, й воно мені не пошкодить."
"Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։"
"Georgian: ."
"Hindi: , ."
"Hebrew(2): אני יכול לאכול זכוכית וזה לא מזיק לי."
"Yiddish(2): איך קען עסן גלאָז און עס טוט מיר נישט װײ."
"Arabic(2): أنا قادر على أكل الزجاج و هذا لا يؤلمني."
"Japanese: "
"Thai: "
];
testDecoding = it "checks decoding of UTF-8 strings against Rust's String"
(builtins.map
(s: assertEq "Decoding of ${s} is correct" (utf8.decode s) (rustDecode s))
(lib.flatten [
glassSentences
randomUnicode
]));
in
runTestsuite "nix.utf8" [
testFailures
testAscii
testDecoding
]