diff --git a/users/sterni/nix/utf8/default.nix b/users/sterni/nix/utf8/default.nix index c89263cd8..c4a3e8eb0 100644 --- a/users/sterni/nix/utf8/default.nix +++ b/users/sterni/nix/utf8/default.nix @@ -201,10 +201,10 @@ let # filter out all iteration steps without a codepoint value codepoint != null - # if we are at the iteration step of the input string, throw + # if we are at the iteration step of a non-empty input string, throw # an error if no codepoint was returned, as it indicates an incomplete # UTF-8 sequence. - || (stringIndex == stringLength - 1 && throw earlyEndMsg) + || (stringLength > 0 && stringIndex == stringLength - 1 && throw earlyEndMsg) ) iterResult ); diff --git a/users/sterni/nix/utf8/tests/default.nix b/users/sterni/nix/utf8/tests/default.nix index ed38bd124..2f8054fad 100644 --- a/users/sterni/nix/utf8/tests/default.nix +++ b/users/sterni/nix/utf8/tests/default.nix @@ -56,7 +56,6 @@ let utf8.decode (string.fromBytes (builtins.map int.fromHex l)); testFailures = it "checks UTF-8 decoding failures" [ - (assertThrows "emtpy bytestring throws" (utf8.decode "")) (assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ])) # examples from The Unicode Standard (assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ])) @@ -75,6 +74,7 @@ let ]); randomUnicode = [ + "" # empty string should yield empty list "🥰👨‍👨‍👧‍👦🐈‍⬛👩🏽‍🦰" # https://kermitproject.org/utf8.html "ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ"