tvl-depot/users/sterni/nix/url/default.nix
sterni a5f2b446aa feat(sterni/nix/url): implement urldecoding
We use builtins.split directly as it should be a bit more efficient as
lib.splitStrings. Also its returning of a list for every regex match is
useful to update the state while parsing the tokens:

* The tokens are obtained by splitting the string at every '%'
* Everytime we see a boundary (that is a list in the returned
  list of builtins.split), we know that the first two chars of
  the next string are a percent encoded character.

One implementation flaw is that it will currently crash if it encounters
mal-formed URLs (since int.fromHex chrashes if it encounters any non
hex digit characters) and accepts some malformed urlencoding like
"foo %A".

Change-Id: I90d08d7a71b16b4f4a4879214abd7aeff46c20c8
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2744
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
2021-04-01 13:09:46 +00:00

81 lines
1.6 KiB
Nix

{ depot, lib, ... }:
let
inherit (depot.users.sterni.nix)
char
int
string
flow
;
reserved = c: builtins.elem c [
"!" "#" "$" "&" "'" "(" ")"
"*" "+" "," "/" ":" ";" "="
"?" "@" "[" "]"
];
unreserved = c: char.asciiAlphaNum c
|| builtins.elem c [ "-" "_" "." "~" ];
percentEncode = c:
if unreserved c
then c
else "%" + (string.fit {
width = 2;
char = "0";
side = "left";
} (int.toHex (char.ord c)));
encode = { leaveReserved ? false }: s:
let
chars = lib.stringToCharacters s;
tr = c:
if leaveReserved && reserved c
then c
else percentEncode c;
in lib.concatStrings (builtins.map tr chars);
decode = s:
let
tokens = builtins.split "%" s;
decodeStep =
{ result ? ""
, inPercent ? false
}: s:
flow.cond [
[
(builtins.isList s)
{
inherit result;
inPercent = true;
}
]
[
inPercent
{
inPercent = false;
# first two characters came after an %
# the rest is the string until the next %
result = result
+ char.chr (int.fromHex (string.take 2 s))
+ (string.drop 2 s);
}
]
[
(!inPercent)
{
result = result + s;
}
]
];
in
(builtins.foldl' decodeStep {} tokens).result;
in {
inherit
encode
decode
;
}