feat(sterni/nix/utf8): allow decoding the empty string

Change-Id: I8de9cd28c822ac5befbcd16e118440cd13cd86e9
This commit is contained in:
sterni 2021-11-22 22:20:16 +01:00
parent 8615322bc8
commit ab92c42f59
2 changed files with 3 additions and 3 deletions

View file

@ -201,10 +201,10 @@ let
# filter out all iteration steps without a codepoint value # filter out all iteration steps without a codepoint value
codepoint != null codepoint != null
# if we are at the iteration step of the input string, throw # if we are at the iteration step of a non-empty input string, throw
# an error if no codepoint was returned, as it indicates an incomplete # an error if no codepoint was returned, as it indicates an incomplete
# UTF-8 sequence. # UTF-8 sequence.
|| (stringIndex == stringLength - 1 && throw earlyEndMsg) || (stringLength > 0 && stringIndex == stringLength - 1 && throw earlyEndMsg)
) iterResult ) iterResult
); );

View file

@ -56,7 +56,6 @@ let
utf8.decode (string.fromBytes (builtins.map int.fromHex l)); utf8.decode (string.fromBytes (builtins.map int.fromHex l));
testFailures = it "checks UTF-8 decoding failures" [ testFailures = it "checks UTF-8 decoding failures" [
(assertThrows "emtpy bytestring throws" (utf8.decode ""))
(assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ])) (assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ]))
# examples from The Unicode Standard # examples from The Unicode Standard
(assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ])) (assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ]))
@ -75,6 +74,7 @@ let
]); ]);
randomUnicode = [ randomUnicode = [
"" # empty string should yield empty list
"🥰👨👨👧👦🐈👩🏽🦰" "🥰👨👨👧👦🐈👩🏽🦰"
# https://kermitproject.org/utf8.html # https://kermitproject.org/utf8.html
"" ""