aed86ea4b3
Haskell implementation of the netncode parser & generators. Change-Id: I904df752e78641110af2857e71d8206bf432d427 Reviewed-on: https://cl.tvl.fyi/c/depot/+/6114 Tested-by: BuildkiteCI Reviewed-by: Profpatsch <mail@profpatsch.de>
399 lines
14 KiB
Haskell
399 lines
14 KiB
Haskell
{-# LANGUAGE AllowAmbiguousTypes #-}
|
||
{-# LANGUAGE DeriveFunctor #-}
|
||
{-# LANGUAGE DerivingStrategies #-}
|
||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||
{-# LANGUAGE LambdaCase #-}
|
||
{-# LANGUAGE OverloadedStrings #-}
|
||
{-# LANGUAGE QuasiQuotes #-}
|
||
{-# LANGUAGE RecordWildCards #-}
|
||
{-# LANGUAGE ScopedTypeVariables #-}
|
||
{-# LANGUAGE TemplateHaskell #-}
|
||
{-# LANGUAGE TypeApplications #-}
|
||
|
||
module Netencode where
|
||
|
||
import Control.Applicative (many)
|
||
import qualified Data.Attoparsec.ByteString as Atto
|
||
import qualified Data.Attoparsec.ByteString.Char8 as Atto.Char
|
||
import qualified Data.ByteString as ByteString
|
||
import Data.ByteString.Builder (Builder)
|
||
import qualified Data.ByteString.Builder as Builder
|
||
import qualified Data.ByteString.Lazy as ByteString.Lazy
|
||
import Data.Coerce (coerce)
|
||
import Data.Fix (Fix (Fix))
|
||
import qualified Data.Fix as Fix
|
||
import Data.Functor.Classes (Eq1 (liftEq))
|
||
import Data.Int (Int16, Int32, Int64, Int8)
|
||
import Data.List.NonEmpty (nonEmpty)
|
||
import Data.Map.NonEmpty (NEMap)
|
||
import qualified Data.Map.NonEmpty as NEMap
|
||
import qualified Data.Semigroup as Semi
|
||
import Data.String (IsString)
|
||
import Data.Word (Word16, Word32, Word64)
|
||
import GHC.Exts (fromString)
|
||
import qualified Hedgehog as Hedge
|
||
import qualified Hedgehog.Gen as Gen
|
||
import qualified Hedgehog.Range as Range
|
||
import MyPrelude
|
||
import Text.Show.Deriving
|
||
import Prelude hiding (sum)
|
||
import Data.Maybe (fromMaybe)
|
||
|
||
data TF rec
|
||
= Unit
|
||
| N1 Bool
|
||
| N3 Word8
|
||
| N6 Word64
|
||
| I6 Int64
|
||
| Text Text
|
||
| Bytes ByteString
|
||
| Sum (Tag Text rec)
|
||
| Record (NEMap Text rec)
|
||
| List [rec]
|
||
deriving stock (Show, Eq, Functor)
|
||
|
||
instance Eq1 TF where
|
||
liftEq _ Unit Unit = True
|
||
liftEq _ (N1 b) (N1 b') = b == b'
|
||
liftEq _ (N3 w8) (N3 w8') = w8 == w8'
|
||
liftEq _ (N6 w64) (N6 w64') = w64 == w64'
|
||
liftEq _ (I6 i64) (I6 i64') = i64 == i64'
|
||
liftEq _ (Text t) (Text t') = t == t'
|
||
liftEq _ (Bytes b) (Bytes b') = b == b'
|
||
liftEq eq (Sum t) (Sum t') = eq (t & tagVal) (t' & tagVal)
|
||
liftEq eq (Record m) (Record m') = liftEq eq m m'
|
||
liftEq eq (List xs) (List xs') = liftEq eq xs xs'
|
||
liftEq _ _ _ = False
|
||
|
||
data Tag tag val = Tag
|
||
{ tagTag :: tag,
|
||
tagVal :: val
|
||
}
|
||
deriving stock (Show, Eq, Functor)
|
||
|
||
$(Text.Show.Deriving.deriveShow1 ''Tag)
|
||
$(Text.Show.Deriving.deriveShow1 ''TF)
|
||
|
||
newtype T = T (Fix TF)
|
||
deriving stock (Eq, Show)
|
||
|
||
unit :: T
|
||
unit = T $ Fix Unit
|
||
|
||
n1 :: Bool -> T
|
||
n1 = T . Fix . N1
|
||
|
||
n3 :: Word8 -> T
|
||
n3 = T . Fix . N3
|
||
|
||
n6 :: Word64 -> T
|
||
n6 = T . Fix . N6
|
||
|
||
i6 :: Int64 -> T
|
||
i6 = T . Fix . I6
|
||
|
||
text :: Text -> T
|
||
text = T . Fix . Text
|
||
|
||
bytes :: ByteString -> T
|
||
bytes = T . Fix . Bytes
|
||
|
||
tag :: Text -> T -> T
|
||
tag key val = T $ Fix $ Sum $ coerce @(Tag Text T) @(Tag Text (Fix TF)) $ Tag key val
|
||
|
||
record :: NEMap Text T -> T
|
||
record = T . Fix . Record . coerce @(NEMap Text T) @(NEMap Text (Fix TF))
|
||
|
||
list :: [T] -> T
|
||
list = T . Fix . List . coerce @[T] @([Fix TF])
|
||
|
||
netencodeEncodeStable :: T -> Builder
|
||
netencodeEncodeStable (T fix) = Fix.foldFix (netencodeEncodeStableF id) fix
|
||
|
||
-- Stable encoding of a netencode value. Record keys will be sorted lexicographically ascending.
|
||
netencodeEncodeStableF :: (rec -> Builder) -> TF rec -> Builder
|
||
netencodeEncodeStableF inner tf = builder go
|
||
where
|
||
-- TODO: directly pass in BL?
|
||
innerBL = fromBuilder . inner
|
||
go = case tf of
|
||
Unit -> "u,"
|
||
N1 False -> "n1:0,"
|
||
N1 True -> "n1:1,"
|
||
N3 w8 -> "n3:" <> fromBuilder (Builder.word8Dec w8) <> ","
|
||
N6 w64 -> "n6:" <> fromBuilder (Builder.word64Dec w64) <> ","
|
||
I6 i64 -> "i6:" <> fromBuilder (Builder.int64Dec i64) <> ","
|
||
Text t ->
|
||
let b = fromText t
|
||
in "t" <> builderLenDec b <> ":" <> b <> ","
|
||
Bytes b -> "b" <> builderLenDec (fromByteString b) <> ":" <> fromByteString b <> ","
|
||
Sum (Tag key val) -> encTag key val
|
||
Record m ->
|
||
-- NEMap uses Map internally, and that folds in lexicographic ascending order over the key.
|
||
-- Since these are `Text` in our case, this is stable.
|
||
let mBuilder = m & NEMap.foldMapWithKey encTag
|
||
in "{" <> builderLenDec mBuilder <> ":" <> mBuilder <> "}"
|
||
List xs ->
|
||
let xsBuilder = xs <&> innerBL & mconcat
|
||
in "[" <> builderLenDec xsBuilder <> ":" <> xsBuilder <> "]"
|
||
where
|
||
encTag key val =
|
||
let bKey = fromText key
|
||
in "<" <> builderLenDec bKey <> ":" <> bKey <> "|" <> innerBL val
|
||
|
||
-- | A builder that knows its own size in bytes
|
||
newtype BL = BL (Builder, Semi.Sum Natural)
|
||
deriving newtype (Monoid, Semigroup)
|
||
|
||
instance IsString BL where
|
||
fromString s =
|
||
BL
|
||
( fromString @Builder s,
|
||
fromString @ByteString s
|
||
& ByteString.length
|
||
& intToNatural
|
||
& fromMaybe 0
|
||
& Semi.Sum
|
||
)
|
||
|
||
builder :: BL -> Builder
|
||
builder (BL (b, _)) = b
|
||
|
||
builderLen :: BL -> Natural
|
||
builderLen (BL (_, len)) = Semi.getSum $ len
|
||
|
||
builderLenDec :: BL -> BL
|
||
builderLenDec (BL (_, len)) =
|
||
let b = Builder.intDec $ (len & Semi.getSum & fromIntegral @Natural @Int)
|
||
in b & fromBuilder
|
||
|
||
-- | Not efficient, goes back to a lazy bytestring to get the length
|
||
fromBuilder :: Builder -> BL
|
||
fromBuilder b =
|
||
BL
|
||
( b,
|
||
b
|
||
& Builder.toLazyByteString
|
||
& ByteString.Lazy.length
|
||
& fromIntegral @Int64 @Natural
|
||
& Semi.Sum
|
||
)
|
||
|
||
fromByteString :: ByteString -> BL
|
||
fromByteString b =
|
||
BL
|
||
( Builder.byteString b,
|
||
b
|
||
& ByteString.length
|
||
& fromIntegral @Int @Natural
|
||
& Semi.Sum
|
||
)
|
||
|
||
fromText :: Text -> BL
|
||
fromText t = t & textToBytesUtf8 & fromByteString
|
||
|
||
netencodeParser :: Atto.Parser T
|
||
netencodeParser = T <$> go
|
||
where
|
||
go = Fix <$> netencodeParserF go
|
||
|
||
netencodeParserF :: Atto.Parser rec -> Atto.Parser (TF rec)
|
||
netencodeParserF inner = do
|
||
typeTag <- Atto.Char.anyChar
|
||
case typeTag of
|
||
't' -> Text <$> textParser
|
||
'b' -> Bytes <$> bytesParser
|
||
'u' -> unitParser
|
||
'<' -> Sum <$> tagParser
|
||
'{' -> Record <$> recordParser
|
||
'[' -> List <$> listParser
|
||
'n' -> naturalParser
|
||
'i' -> I6 <$> intParser
|
||
c -> fail ([c] <> " is not a valid netencode tag")
|
||
where
|
||
bytesParser = do
|
||
len <- boundedDecimalFail Atto.<?> "bytes is missing a digit specifying the length"
|
||
_ <- Atto.Char.char ':' Atto.<?> "bytes did not have : after length"
|
||
bytes' <- Atto.take len
|
||
_ <- Atto.Char.char ',' Atto.<?> "bytes did not end with ,"
|
||
pure bytes'
|
||
|
||
textParser = do
|
||
len <- boundedDecimalFail Atto.<?> "text is missing a digit specifying the length"
|
||
_ <- Atto.Char.char ':' Atto.<?> "text did not have : after length"
|
||
text' <-
|
||
Atto.take len <&> bytesToTextUtf8 >>= \case
|
||
Left err -> fail [fmt|cannot decode text as utf8: {err & prettyError}|]
|
||
Right t -> pure t
|
||
_ <- Atto.Char.char ',' Atto.<?> "text did not end with ,"
|
||
pure text'
|
||
|
||
unitParser = do
|
||
_ <- Atto.Char.char ',' Atto.<?> "unit did not end with ,"
|
||
pure $ Unit
|
||
|
||
tagParser = do
|
||
len <- boundedDecimalFail Atto.<?> "tag is missing a digit specifying the length"
|
||
_ <- Atto.Char.char ':' Atto.<?> "tag did not have : after length"
|
||
tagTag <-
|
||
Atto.take len <&> bytesToTextUtf8 >>= \case
|
||
Left err -> fail [fmt|cannot decode tag key as utf8: {err & prettyError}|]
|
||
Right t -> pure t
|
||
_ <- Atto.Char.char '|' Atto.<?> "tag was missing the key/value separator (|)"
|
||
tagVal <- inner
|
||
pure $ Tag {..}
|
||
|
||
recordParser = do
|
||
-- TODO: the record does not use its inner length because we are descending into the inner parsers.
|
||
-- This is a smell! In theory it can be used to skip parsing the whole inner keys.
|
||
_len <- boundedDecimalFail Atto.<?> "record is missing a digit specifying the length"
|
||
_ <- Atto.Char.char ':' Atto.<?> "record did not have : after length"
|
||
record' <-
|
||
many (Atto.Char.char '<' >> tagParser) <&> nonEmpty >>= \case
|
||
Nothing -> fail "record is not allowed to have 0 elements"
|
||
Just tags ->
|
||
pure $
|
||
tags <&> (\t -> (t & tagTag, t & tagVal))
|
||
-- later keys are preferred if they are duplicates, according to the standard
|
||
& NEMap.fromList
|
||
_ <- Atto.Char.char '}' Atto.<?> "record did not end with }"
|
||
pure record'
|
||
|
||
listParser = do
|
||
-- TODO: the list does not use its inner length because we are descending into the inner parsers.
|
||
-- This is a smell! In theory it can be used to skip parsing the whole inner keys.
|
||
_len <- boundedDecimalFail Atto.<?> "list is missing a digit specifying the length"
|
||
_ <- Atto.Char.char ':' Atto.<?> "list did not have : after length"
|
||
-- TODO: allow empty lists?
|
||
list' <- many inner
|
||
_ <- Atto.Char.char ']' Atto.<?> "list did not end with ]"
|
||
pure list'
|
||
|
||
intParser = do
|
||
let p :: forall parseSize. (Bounded parseSize, Integral parseSize) => (Integer -> Atto.Parser Int64)
|
||
p n = do
|
||
_ <- Atto.Char.char ':' Atto.<?> [fmt|i{n & show} did not have : after length|]
|
||
isNegative <- Atto.option False (Atto.Char.char '-' <&> \_c -> True)
|
||
int <-
|
||
boundedDecimal @parseSize >>= \case
|
||
Nothing -> fail [fmt|cannot parse into i{n & show}, the number is too big (would overflow)|]
|
||
Just i ->
|
||
pure $
|
||
if isNegative
|
||
then -- TODO: this should alread be done in the decimal parser, @minBound@ cannot be parsed cause it’s one more than @(-maxBound)@!
|
||
(-i)
|
||
else i
|
||
_ <- Atto.Char.char ',' Atto.<?> [fmt|i{n & show} did not end with ,|]
|
||
pure $ fromIntegral @parseSize @Int64 int
|
||
digit <- Atto.Char.digit
|
||
case digit of
|
||
-- TODO: separate parser for i1 and i2 that makes sure the boundaries are right!
|
||
'1' -> p @Int8 1
|
||
'2' -> p @Int8 2
|
||
'3' -> p @Int8 3
|
||
'4' -> p @Int16 4
|
||
'5' -> p @Int32 5
|
||
'6' -> p @Int64 6
|
||
'7' -> fail [fmt|i parser only supports numbers up to size 6, was 7|]
|
||
'8' -> fail [fmt|i parser only supports numbers up to size 6, was 8|]
|
||
'9' -> fail [fmt|i parser only supports numbers up to size 6, was 9|]
|
||
o -> fail [fmt|i number with length {o & show} not possible|]
|
||
|
||
naturalParser = do
|
||
let p :: forall parseSize finalSize. (Bounded parseSize, Integral parseSize, Num finalSize) => (Integer -> Atto.Parser finalSize)
|
||
p n = do
|
||
_ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|]
|
||
int <-
|
||
boundedDecimal @parseSize >>= \case
|
||
Nothing -> fail [fmt|cannot parse into n{n & show}, the number is too big (would overflow)|]
|
||
Just i -> pure i
|
||
|
||
_ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|]
|
||
pure $ fromIntegral @parseSize @finalSize int
|
||
let b n = do
|
||
_ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|]
|
||
bool <-
|
||
(Atto.Char.char '0' >> pure False)
|
||
<|> (Atto.Char.char '1' >> pure True)
|
||
_ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|]
|
||
pure bool
|
||
|
||
digit <- Atto.Char.digit
|
||
case digit of
|
||
-- TODO: separate parser for n1 and n2 that makes sure the boundaries are right!
|
||
'1' -> N1 <$> b 1
|
||
'2' -> N3 <$> p @Word8 @Word8 2
|
||
'3' -> N3 <$> p @Word8 @Word8 3
|
||
'4' -> N6 <$> p @Word16 @Word64 4
|
||
'5' -> N6 <$> p @Word32 @Word64 5
|
||
'6' -> N6 <$> p @Word64 @Word64 6
|
||
'7' -> fail [fmt|n parser only supports numbers up to size 6, was 7|]
|
||
'8' -> fail [fmt|n parser only supports numbers up to size 6, was 8|]
|
||
'9' -> fail [fmt|n parser only supports numbers up to size 6, was 9|]
|
||
o -> fail [fmt|n number with length {o & show} not possible|]
|
||
|
||
-- | via https://www.extrema.is/blog/2021/10/20/parsing-bounded-integers
|
||
boundedDecimal :: forall a. (Bounded a, Integral a) => Atto.Parser (Maybe a)
|
||
boundedDecimal = do
|
||
i :: Integer <- decimal
|
||
pure $
|
||
if (i :: Integer) > fromIntegral (maxBound :: a)
|
||
then Nothing
|
||
else Just $ fromIntegral i
|
||
where
|
||
-- Copied from @Attoparsec.Text@ and adjusted to bytestring
|
||
decimal :: (Integral a2) => Atto.Parser a2
|
||
decimal = ByteString.foldl' step 0 <$> Atto.Char.takeWhile1 Atto.Char.isDigit
|
||
where
|
||
step a c = a * 10 + fromIntegral (c - 48)
|
||
{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int) #-}
|
||
{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int64) #-}
|
||
{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word8) #-}
|
||
{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word64) #-}
|
||
|
||
boundedDecimalFail :: Atto.Parser Int
|
||
boundedDecimalFail =
|
||
boundedDecimal >>= \case
|
||
Nothing -> fail "decimal out of range"
|
||
Just a -> pure a
|
||
|
||
genNetencode :: Hedge.MonadGen m => m T
|
||
genNetencode =
|
||
Gen.recursive
|
||
Gen.choice
|
||
[ -- these are bundled into one Gen, so that scalar elements get chosen less frequently, and the generator produces nicely nested examples
|
||
Gen.frequency
|
||
[ (1, pure unit),
|
||
(1, n1 <$> Gen.bool),
|
||
(1, n3 <$> Gen.element [0, 1, 5]),
|
||
(1, n6 <$> Gen.element [0, 1, 5]),
|
||
(1, i6 <$> Gen.element [-1, 1, 5]),
|
||
(2, text <$> Gen.text (Range.linear 1 10) Gen.lower),
|
||
(2, bytes <$> Gen.bytes (Range.linear 1 10))
|
||
]
|
||
]
|
||
[ do
|
||
key <- Gen.text (Range.linear 3 10) Gen.lower
|
||
val <- genNetencode
|
||
pure $ tag key val,
|
||
record
|
||
<$> ( let k = Gen.text (Range.linear 3 10) Gen.lower
|
||
v = genNetencode
|
||
in NEMap.insertMap <$> k <*> v
|
||
<*> ( (Gen.map (Range.linear 0 3)) $
|
||
(,) <$> k <*> v
|
||
)
|
||
)
|
||
]
|
||
|
||
prop_netencodeRoundtrip :: Hedge.Property
|
||
prop_netencodeRoundtrip = Hedge.property $ do
|
||
enc <- Hedge.forAll genNetencode
|
||
( Atto.parseOnly
|
||
netencodeParser
|
||
( netencodeEncodeStable enc
|
||
& Builder.toLazyByteString
|
||
& toStrictBytes
|
||
)
|
||
)
|
||
Hedge.=== (Right enc)
|