feat(users/Profpatsch): init jbovlaste sqlite
This is intended to convert the XML dump from https://jbovlaste.lojban.org/ to an sqlite database at one point. So far only XML parsing and some pretty printing Change-Id: I48c989a3109c8d513c812703fa7a8f2689a157ee Reviewed-on: https://cl.tvl.fyi/c/depot/+/8687 Tested-by: BuildkiteCI Reviewed-by: Profpatsch <mail@profpatsch.de> Autosubmit: Profpatsch <mail@profpatsch.de>
This commit is contained in:
parent
8c4730c433
commit
c2baefbecc
3 changed files with 213 additions and 0 deletions
110
users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs
Normal file
110
users/Profpatsch/jbovlaste-sqlite/JbovlasteSqlite.hs
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
{-# LANGUAGE QuasiQuotes #-}
|
||||||
|
|
||||||
|
module Main where
|
||||||
|
|
||||||
|
import Data.ByteString.Internal qualified as Bytes
|
||||||
|
import Data.Error.Tree
|
||||||
|
import Data.List qualified as List
|
||||||
|
import Data.Map.Strict qualified as Map
|
||||||
|
import Data.Text qualified as Text
|
||||||
|
import Data.Text.IO qualified as Text
|
||||||
|
import Label
|
||||||
|
import PossehlAnalyticsPrelude
|
||||||
|
import Text.XML (def)
|
||||||
|
import Text.XML qualified as Xml
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = do
|
||||||
|
f <- file
|
||||||
|
f.documentRoot
|
||||||
|
& filterElementsRec noUsers
|
||||||
|
& downTo (T2 (label @"maxdepth" 5) (label @"maxlistitems" 20))
|
||||||
|
& toTree
|
||||||
|
& prettyErrorTree
|
||||||
|
& Text.putStrLn
|
||||||
|
|
||||||
|
file :: IO Xml.Document
|
||||||
|
file = Xml.readFile def "./jbovlaste-en.xml"
|
||||||
|
|
||||||
|
-- | Filter XML elements recursively based on the given predicate
|
||||||
|
filterElementsRec :: (Xml.Element -> Bool) -> Xml.Element -> Xml.Element
|
||||||
|
filterElementsRec f el =
|
||||||
|
el
|
||||||
|
{ Xml.elementNodes =
|
||||||
|
mapMaybe
|
||||||
|
( \case
|
||||||
|
Xml.NodeElement el' ->
|
||||||
|
if f el'
|
||||||
|
then Just $ Xml.NodeElement $ filterElementsRec f el'
|
||||||
|
else Nothing
|
||||||
|
other -> Just other
|
||||||
|
)
|
||||||
|
el.elementNodes
|
||||||
|
}
|
||||||
|
|
||||||
|
-- | no <user> allowed
|
||||||
|
noUsers :: Xml.Element -> Bool
|
||||||
|
noUsers el = el.elementName.nameLocalName /= "user"
|
||||||
|
|
||||||
|
downTo :: (T2 "maxdepth" Int "maxlistitems" Int) -> Xml.Element -> Xml.Element
|
||||||
|
downTo n el =
|
||||||
|
if n.maxdepth > 0
|
||||||
|
then
|
||||||
|
el
|
||||||
|
{ Xml.elementNodes =
|
||||||
|
( do
|
||||||
|
let eleven = take (n.maxlistitems + 1) $ map down el.elementNodes
|
||||||
|
if List.length eleven == (n.maxlistitems + 1)
|
||||||
|
then eleven <> [Xml.NodeComment "snip!"]
|
||||||
|
else eleven
|
||||||
|
)
|
||||||
|
}
|
||||||
|
else el {Xml.elementNodes = [Xml.NodeComment "snip!"]}
|
||||||
|
where
|
||||||
|
down =
|
||||||
|
\case
|
||||||
|
Xml.NodeElement el' ->
|
||||||
|
Xml.NodeElement $
|
||||||
|
downTo
|
||||||
|
( T2
|
||||||
|
(label @"maxdepth" $ n.maxdepth - 1)
|
||||||
|
(label @"maxlistitems" n.maxlistitems)
|
||||||
|
)
|
||||||
|
el'
|
||||||
|
more -> more
|
||||||
|
|
||||||
|
toTree :: Xml.Element -> ErrorTree
|
||||||
|
toTree el = do
|
||||||
|
let outer =
|
||||||
|
if not $ null el.elementAttributes
|
||||||
|
then [fmt|<{name el.elementName}: {attrs el.elementAttributes}>|]
|
||||||
|
else [fmt|<{name el.elementName}>|]
|
||||||
|
|
||||||
|
case el.elementNodes & filter (not . isEmptyContent) & nonEmpty of
|
||||||
|
Nothing -> singleError (newError outer)
|
||||||
|
Just (n :| []) | not $ isElementNode n -> singleError $ errorContext outer (nodeErrorNoElement n)
|
||||||
|
Just nodes -> nestedMultiError (newError outer) (nodes <&> node)
|
||||||
|
where
|
||||||
|
isEmptyContent = \case
|
||||||
|
Xml.NodeContent c -> c & Text.all Bytes.isSpaceChar8
|
||||||
|
_ -> False
|
||||||
|
isElementNode = \case
|
||||||
|
Xml.NodeElement _ -> True
|
||||||
|
_ -> False
|
||||||
|
|
||||||
|
node :: Xml.Node -> ErrorTree
|
||||||
|
node = \case
|
||||||
|
Xml.NodeElement el' -> toTree el'
|
||||||
|
other -> singleError $ nodeErrorNoElement other
|
||||||
|
|
||||||
|
nodeErrorNoElement :: Xml.Node -> Error
|
||||||
|
nodeErrorNoElement = \case
|
||||||
|
Xml.NodeInstruction i -> [fmt|Instruction: {i & show}|]
|
||||||
|
Xml.NodeContent c -> [fmt|"{c & Text.replace "\"" "\\\""}"|]
|
||||||
|
Xml.NodeComment c -> [fmt|<!-- {c} -->|]
|
||||||
|
Xml.NodeElement _ -> error "NodeElement not allowed here"
|
||||||
|
|
||||||
|
name :: Xml.Name -> Text
|
||||||
|
name n = [fmt|{n.namePrefix & fromMaybe ""}{n.nameLocalName}|]
|
||||||
|
attrs :: Map Xml.Name Text -> Text
|
||||||
|
attrs a = a & Map.toList <&> (\(k, v) -> [fmt|{name k}={v}|]) & Text.intercalate ", " & \s -> [fmt|({s})|]
|
32
users/Profpatsch/jbovlaste-sqlite/default.nix
Normal file
32
users/Profpatsch/jbovlaste-sqlite/default.nix
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
{ depot, pkgs, lib, ... }:
|
||||||
|
|
||||||
|
let
|
||||||
|
# bins = depot.nix.getBins pkgs.sqlite ["sqlite3"];
|
||||||
|
|
||||||
|
jbovlaste-sqlite = pkgs.haskellPackages.mkDerivation {
|
||||||
|
pname = "jbovlaste-sqlite";
|
||||||
|
version = "0.1.0";
|
||||||
|
|
||||||
|
src = depot.users.Profpatsch.exactSource ./. [
|
||||||
|
./jbovlaste-sqlite.cabal
|
||||||
|
./JbovlasteSqlite.hs
|
||||||
|
];
|
||||||
|
|
||||||
|
libraryHaskellDepends = [
|
||||||
|
pkgs.haskellPackages.pa-prelude
|
||||||
|
pkgs.haskellPackages.pa-label
|
||||||
|
pkgs.haskellPackages.pa-error-tree
|
||||||
|
pkgs.haskellPackages.sqlite-simple
|
||||||
|
pkgs.haskellPackages.xml-conduit
|
||||||
|
depot.users.Profpatsch.arglib.netencode.haskell
|
||||||
|
depot.users.Profpatsch.netencode.netencode-hs
|
||||||
|
|
||||||
|
];
|
||||||
|
|
||||||
|
isExecutable = true;
|
||||||
|
isLibrary = false;
|
||||||
|
license = lib.licenses.mit;
|
||||||
|
};
|
||||||
|
|
||||||
|
in
|
||||||
|
jbovlaste-sqlite
|
71
users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal
Normal file
71
users/Profpatsch/jbovlaste-sqlite/jbovlaste-sqlite.cabal
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
cabal-version: 3.0
|
||||||
|
name: jbovlaste-sqlite
|
||||||
|
version: 0.1.0.0
|
||||||
|
author: Profpatsch
|
||||||
|
maintainer: mail@profpatsch.de
|
||||||
|
|
||||||
|
common common-options
|
||||||
|
ghc-options:
|
||||||
|
-Wall
|
||||||
|
-Wno-type-defaults
|
||||||
|
-Wunused-packages
|
||||||
|
-Wredundant-constraints
|
||||||
|
-fwarn-missing-deriving-strategies
|
||||||
|
|
||||||
|
-- See https://downloads.haskell.org/ghc/latest/docs/users_guide/exts.html
|
||||||
|
-- for a description of all these extensions
|
||||||
|
default-extensions:
|
||||||
|
-- Infer Applicative instead of Monad where possible
|
||||||
|
ApplicativeDo
|
||||||
|
|
||||||
|
-- Allow literal strings to be Text
|
||||||
|
OverloadedStrings
|
||||||
|
|
||||||
|
-- Syntactic sugar improvements
|
||||||
|
LambdaCase
|
||||||
|
MultiWayIf
|
||||||
|
|
||||||
|
-- Makes the (deprecated) usage of * instead of Data.Kind.Type an error
|
||||||
|
NoStarIsType
|
||||||
|
|
||||||
|
-- Convenient and crucial to deal with ambiguous field names, commonly
|
||||||
|
-- known as RecordDotSyntax
|
||||||
|
OverloadedRecordDot
|
||||||
|
|
||||||
|
-- does not export record fields as functions, use OverloadedRecordDot to access instead
|
||||||
|
NoFieldSelectors
|
||||||
|
|
||||||
|
-- Record punning
|
||||||
|
RecordWildCards
|
||||||
|
|
||||||
|
-- Improved Deriving
|
||||||
|
DerivingStrategies
|
||||||
|
DerivingVia
|
||||||
|
|
||||||
|
-- Type-level strings
|
||||||
|
DataKinds
|
||||||
|
|
||||||
|
-- to enable the `type` keyword in import lists (ormolu uses this automatically)
|
||||||
|
ExplicitNamespaces
|
||||||
|
|
||||||
|
default-language: GHC2021
|
||||||
|
|
||||||
|
|
||||||
|
executable jbovlaste-sqlite
|
||||||
|
import: common-options
|
||||||
|
|
||||||
|
main-is: JbovlasteSqlite.hs
|
||||||
|
|
||||||
|
build-depends:
|
||||||
|
base >=4.15 && <5,
|
||||||
|
pa-prelude,
|
||||||
|
pa-label,
|
||||||
|
pa-error-tree,
|
||||||
|
my-prelude,
|
||||||
|
containers,
|
||||||
|
bytestring,
|
||||||
|
arglib-netencode,
|
||||||
|
netencode,
|
||||||
|
text,
|
||||||
|
sqlite-simple,
|
||||||
|
xml-conduit,
|
Loading…
Reference in a new issue