feat(users/Profpatsch/netencode): Add initial Haskell parser

A simple categorical parser that does not implement Monad, and does
not contain an `m` and some rudementary error message handling.

In the future I’d probably want to wrap everything in an additional
`m`, so that subparsers can somehow use `Selective` to throw errors
from within `m` that contain the parsing context if at all possible.
Hard to do without Monad, I have to say. Not even stuff like `StateT`
works without the inner `m` implementing `Monad`.

Change-Id: I1366eda606ddfb019637b09c82d8b0e30bd4e318
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7797
Tested-by: BuildkiteCI
Reviewed-by: Profpatsch <mail@profpatsch.de>
This commit is contained in:
Profpatsch 2023-01-08 23:41:17 +01:00
parent 8cdefc5b25
commit cd40585ea4
7 changed files with 169 additions and 45 deletions

View file

@ -1,4 +1,3 @@
{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE ImplicitParams #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
@ -52,10 +51,10 @@ module MyPrelude
when,
unless,
guard,
ExceptT,
ExceptT (..),
runExceptT,
MonadError,
throwError,
MonadThrow,
throwM,
MonadIO,
liftIO,
MonadReader,
@ -79,6 +78,8 @@ module MyPrelude
traverseFold,
traverseFold1,
traverseFoldDefault,
MonadTrans,
lift,
-- * Data types
Coercible,
@ -145,15 +146,15 @@ where
import Control.Applicative ((<|>))
import Control.Category (Category, (>>>))
import Control.Monad (guard, join, unless, when)
import Control.Monad.Catch (MonadThrow (throwM))
import Control.Monad.Except
( ExceptT,
MonadError,
( ExceptT (..),
runExceptT,
throwError,
)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Identity (Identity (Identity))
import Control.Monad.Reader (MonadReader, asks)
import Control.Monad.Trans (MonadTrans (lift))
import Data.Bifunctor (Bifunctor, bimap, first, second)
import Data.ByteString
( ByteString,

View file

@ -30,6 +30,7 @@ library
, profunctors
, containers
, error
, exceptions
, bytestring
, mtl
, hspec
@ -38,4 +39,4 @@ library
, nicify-lib
, ansi-terminal
, vector
default-language: Haskell2010
default-language: GHC2021

View file

@ -1,6 +1,7 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
@ -13,57 +14,56 @@
module Netencode where
import Control.Applicative (many)
import qualified Data.Attoparsec.ByteString as Atto
import qualified Data.Attoparsec.ByteString.Char8 as Atto.Char
import qualified Data.ByteString as ByteString
import Data.Attoparsec.ByteString qualified as Atto
import Data.Attoparsec.ByteString.Char8 qualified as Atto.Char
import Data.ByteString qualified as ByteString
import Data.ByteString.Builder (Builder)
import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Lazy as ByteString.Lazy
import Data.ByteString.Builder qualified as Builder
import Data.ByteString.Lazy qualified as ByteString.Lazy
import Data.Coerce (coerce)
import Data.Fix (Fix (Fix))
import qualified Data.Fix as Fix
import Data.Fix qualified as Fix
import Data.Functor.Classes (Eq1 (liftEq))
import Data.Int (Int16, Int32, Int64, Int8)
import Data.List.NonEmpty (nonEmpty)
import Data.Map.NonEmpty (NEMap)
import qualified Data.Map.NonEmpty as NEMap
import qualified Data.Semigroup as Semi
import Data.Map.NonEmpty qualified as NEMap
import Data.Maybe (fromMaybe)
import Data.Semigroup qualified as Semi
import Data.String (IsString)
import Data.Word (Word16, Word32, Word64)
import GHC.Exts (fromString)
import qualified Hedgehog as Hedge
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
import Hedgehog qualified as Hedge
import Hedgehog.Gen qualified as Gen
import Hedgehog.Range qualified as Range
import MyPrelude
import Text.Show.Deriving
import Prelude hiding (sum)
import Data.Maybe (fromMaybe)
-- | Netencode type base functor.
--
-- Recursive elements have a @rec@.
data TF rec
= Unit
-- ^ Unit value
| N1 Bool
-- ^ Boolean (2^1)
| N3 Word8
-- ^ Byte (2^3)
| N6 Word64
-- ^ 64-bit Natural (2^6)
| I6 Int64
-- ^ 64-bit Integer (2^6)
| Text Text
-- ^ Unicode Text
| Bytes ByteString
-- ^ Arbitrary Bytestring
| Sum (Tag Text rec)
-- ^ A constructor of a(n open) Sum
| Record (NEMap Text rec)
-- ^ Record
| List [rec]
-- ^ List
= -- | Unit value
Unit
| -- | Boolean (2^1)
N1 Bool
| -- | Byte (2^3)
N3 Word8
| -- | 64-bit Natural (2^6)
N6 Word64
| -- | 64-bit Integer (2^6)
I6 Int64
| -- | Unicode Text
Text Text
| -- | Arbitrary Bytestring
Bytes ByteString
| -- | A constructor of a(n open) Sum
Sum (Tag Text rec)
| -- | Record
Record (NEMap Text rec)
| -- | List
List [rec]
deriving stock (Show, Eq, Functor)
instance Eq1 TF where
@ -90,7 +90,7 @@ $(Text.Show.Deriving.deriveShow1 ''Tag)
$(Text.Show.Deriving.deriveShow1 ''TF)
-- | The Netencode type
newtype T = T (Fix TF)
newtype T = T {unT :: Fix TF}
deriving stock (Eq, Show)
-- | Create a unit
@ -291,7 +291,8 @@ netencodeParserF inner = do
Nothing -> fail "record is not allowed to have 0 elements"
Just tags ->
pure $
tags <&> (\t -> (t & tagTag, t & tagVal))
tags
<&> (\t -> (t & tagTag, t & tagVal))
-- later keys are preferred if they are duplicates, according to the standard
& NEMap.fromList
_ <- Atto.Char.char '}' Atto.<?> "record did not end with }"
@ -421,7 +422,9 @@ genNetencode =
record
<$> ( let k = Gen.text (Range.linear 3 10) Gen.lower
v = genNetencode
in NEMap.insertMap <$> k <*> v
in NEMap.insertMap
<$> k
<*> v
<*> ( (Gen.map (Range.linear 0 3)) $
(,) <$> k <*> v
)

View file

@ -0,0 +1,112 @@
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE NoFieldSelectors #-}
{-# OPTIONS_GHC -Wall #-}
module Netencode.Parse where
import Control.Category qualified
import Control.Selective (Selective)
import Data.Error.Tree
import Data.Fix (Fix (..))
import Data.Functor.Compose
import Data.List qualified as List
import Data.Map.NonEmpty (NEMap)
import Data.Map.NonEmpty qualified as NEMap
import Data.Semigroupoid qualified as Semigroupiod
import Data.Semigroupoid qualified as Semigroupoid
import Data.Text qualified as Text
import MyPrelude
import Netencode qualified
import Prelude hiding (log)
newtype Parse from to
= -- TODO: the way @Context = [Text]@ has to be forwarded to everything is kinda shitty.
-- This is essentially just a difference list, and can probably be treated as a function in the output?
Parse (([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to))
deriving
(Functor, Applicative, Selective)
via ( Compose
( Compose
((->) ([Text], from))
(Validation (NonEmpty ErrorTree))
)
((,) [Text])
)
runParse :: Error -> Parse from to -> from -> Either ErrorTree to
runParse errMsg parser t =
(["$"], t)
& runParse' parser
<&> snd
& first (nestedMultiError errMsg)
& validationToEither
runParse' :: Parse from to -> ([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to)
runParse' (Parse f) from = f from
instance Semigroupoid Parse where
o p2 p1 = Parse $ \from -> case runParse' p1 from of
Failure err -> Failure err
Success to1 -> runParse' p2 to1
instance Category Parse where
(.) = Semigroupoid.o
id = Parse $ \t -> Success t
parseEither :: (([Text], from) -> Either ErrorTree ([Text], to)) -> Parse from to
parseEither f = Parse $ \from -> f from & eitherToListValidation
tAs :: (Netencode.TF (Fix Netencode.TF) -> Either ([Text] -> ErrorTree) to) -> Parse Netencode.T to
tAs f = parseEither ((\(context, Netencode.T (Fix tf)) -> f tf & bimap ($ context) (context,)))
key :: Text -> Parse (NEMap Text to) to
key name = parseEither $ \(context, rec) ->
rec
& NEMap.lookup name
& annotate (errorTreeContext (showContext context) [fmt|Key "{name}" does not exist|])
<&> (addContext name context,)
showContext :: [Text] -> Text
showContext context = context & List.reverse & Text.intercalate "."
addContext :: a -> [a] -> [a]
addContext = (:)
asText :: Parse Netencode.T Text
asText = tAs $ \case
Netencode.Text t -> pure t
other -> typeError "of text" other
asBytes :: Parse Netencode.T ByteString
asBytes = tAs $ \case
Netencode.Bytes b -> pure b
other -> typeError "of bytes" other
asRecord :: Parse Netencode.T (NEMap Text (Netencode.T))
asRecord = tAs $ \case
Netencode.Record rec -> pure (rec <&> Netencode.T)
other -> typeError "a record" other
typeError :: Text -> Netencode.TF ignored -> (Either ([Text] -> ErrorTree) b)
typeError should is = do
let otherS = is <&> (\_ -> ("" :: String)) & show
Left $ \context -> errorTreeContext (showContext context) [fmt|Value is not {should}, but a {otherS}|]
orThrowParseError ::
Parse (Either Error to) to
orThrowParseError = Parse $ \case
(context, Left err) ->
err
& singleError
& errorTreeContext (showContext context)
& singleton
& Failure
(context, Right to) -> Success (context, to)

View file

@ -18,6 +18,7 @@ let
src = depot.users.Profpatsch.exactSource ./. [
./netencode.cabal
./Netencode.hs
./Netencode/Parse.hs
];
libraryHaskellDepends = [

View file

@ -5,7 +5,9 @@ author: Profpatsch
maintainer: mail@profpatsch.de
library
exposed-modules: Netencode
exposed-modules:
Netencode,
Netencode.Parse
build-depends:
base >=4.15 && <5,
@ -16,5 +18,8 @@ library
data-fix,
bytestring,
attoparsec,
text,
semigroupoids,
selective
default-language: Haskell2010

View file

@ -37,6 +37,7 @@ pkgs.mkShell {
h.hspec-expectations-pretty-diff
depot.users.Profpatsch.my-prelude
depot.users.Profpatsch.netencode.netencode-hs
depot.users.Profpatsch.arglib.netencode.haskell
depot.users.Profpatsch.execline.exec-helpers-hs
]))