feat(users/Profpatsch): init jbovlaste sqlite

This is intended to convert the XML dump from
https://jbovlaste.lojban.org/ to an sqlite database at one point.

So far only XML parsing and some pretty printing

Change-Id: I48c989a3109c8d513c812703fa7a8f2689a157ee
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8687
Tested-by: BuildkiteCI
Reviewed-by: Profpatsch <mail@profpatsch.de>
Autosubmit: Profpatsch <mail@profpatsch.de>
This commit is contained in:
Profpatsch 2023-05-31 01:59:00 +02:00 committed by clbot
parent 8c4730c433
commit c2baefbecc
3 changed files with 213 additions and 0 deletions

View file

@ -0,0 +1,110 @@
{-# LANGUAGE QuasiQuotes #-}
module Main where
import Data.ByteString.Internal qualified as Bytes
import Data.Error.Tree
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Text qualified as Text
import Data.Text.IO qualified as Text
import Label
import PossehlAnalyticsPrelude
import Text.XML (def)
import Text.XML qualified as Xml
main :: IO ()
main = do
f <- file
f.documentRoot
& filterElementsRec noUsers
& downTo (T2 (label @"maxdepth" 5) (label @"maxlistitems" 20))
& toTree
& prettyErrorTree
& Text.putStrLn
file :: IO Xml.Document
file = Xml.readFile def "./jbovlaste-en.xml"
-- | Filter XML elements recursively based on the given predicate
filterElementsRec :: (Xml.Element -> Bool) -> Xml.Element -> Xml.Element
filterElementsRec f el =
el
{ Xml.elementNodes =
mapMaybe
( \case
Xml.NodeElement el' ->
if f el'
then Just $ Xml.NodeElement $ filterElementsRec f el'
else Nothing
other -> Just other
)
el.elementNodes
}
-- | no <user> allowed
noUsers :: Xml.Element -> Bool
noUsers el = el.elementName.nameLocalName /= "user"
downTo :: (T2 "maxdepth" Int "maxlistitems" Int) -> Xml.Element -> Xml.Element
downTo n el =
if n.maxdepth > 0
then
el
{ Xml.elementNodes =
( do
let eleven = take (n.maxlistitems + 1) $ map down el.elementNodes
if List.length eleven == (n.maxlistitems + 1)
then eleven <> [Xml.NodeComment "snip!"]
else eleven
)
}
else el {Xml.elementNodes = [Xml.NodeComment "snip!"]}
where
down =
\case
Xml.NodeElement el' ->
Xml.NodeElement $
downTo
( T2
(label @"maxdepth" $ n.maxdepth - 1)
(label @"maxlistitems" n.maxlistitems)
)
el'
more -> more
toTree :: Xml.Element -> ErrorTree
toTree el = do
let outer =
if not $ null el.elementAttributes
then [fmt|<{name el.elementName}: {attrs el.elementAttributes}>|]
else [fmt|<{name el.elementName}>|]
case el.elementNodes & filter (not . isEmptyContent) & nonEmpty of
Nothing -> singleError (newError outer)
Just (n :| []) | not $ isElementNode n -> singleError $ errorContext outer (nodeErrorNoElement n)
Just nodes -> nestedMultiError (newError outer) (nodes <&> node)
where
isEmptyContent = \case
Xml.NodeContent c -> c & Text.all Bytes.isSpaceChar8
_ -> False
isElementNode = \case
Xml.NodeElement _ -> True
_ -> False
node :: Xml.Node -> ErrorTree
node = \case
Xml.NodeElement el' -> toTree el'
other -> singleError $ nodeErrorNoElement other
nodeErrorNoElement :: Xml.Node -> Error
nodeErrorNoElement = \case
Xml.NodeInstruction i -> [fmt|Instruction: {i & show}|]
Xml.NodeContent c -> [fmt|"{c & Text.replace "\"" "\\\""}"|]
Xml.NodeComment c -> [fmt|<!-- {c} -->|]
Xml.NodeElement _ -> error "NodeElement not allowed here"
name :: Xml.Name -> Text
name n = [fmt|{n.namePrefix & fromMaybe ""}{n.nameLocalName}|]
attrs :: Map Xml.Name Text -> Text
attrs a = a & Map.toList <&> (\(k, v) -> [fmt|{name k}={v}|]) & Text.intercalate ", " & \s -> [fmt|({s})|]

View file

@ -0,0 +1,32 @@
{ depot, pkgs, lib, ... }:
let
# bins = depot.nix.getBins pkgs.sqlite ["sqlite3"];
jbovlaste-sqlite = pkgs.haskellPackages.mkDerivation {
pname = "jbovlaste-sqlite";
version = "0.1.0";
src = depot.users.Profpatsch.exactSource ./. [
./jbovlaste-sqlite.cabal
./JbovlasteSqlite.hs
];
libraryHaskellDepends = [
pkgs.haskellPackages.pa-prelude
pkgs.haskellPackages.pa-label
pkgs.haskellPackages.pa-error-tree
pkgs.haskellPackages.sqlite-simple
pkgs.haskellPackages.xml-conduit
depot.users.Profpatsch.arglib.netencode.haskell
depot.users.Profpatsch.netencode.netencode-hs
];
isExecutable = true;
isLibrary = false;
license = lib.licenses.mit;
};
in
jbovlaste-sqlite

View file

@ -0,0 +1,71 @@
cabal-version: 3.0
name: jbovlaste-sqlite
version: 0.1.0.0
author: Profpatsch
maintainer: mail@profpatsch.de
common common-options
ghc-options:
-Wall
-Wno-type-defaults
-Wunused-packages
-Wredundant-constraints
-fwarn-missing-deriving-strategies
-- See https://downloads.haskell.org/ghc/latest/docs/users_guide/exts.html
-- for a description of all these extensions
default-extensions:
-- Infer Applicative instead of Monad where possible
ApplicativeDo
-- Allow literal strings to be Text
OverloadedStrings
-- Syntactic sugar improvements
LambdaCase
MultiWayIf
-- Makes the (deprecated) usage of * instead of Data.Kind.Type an error
NoStarIsType
-- Convenient and crucial to deal with ambiguous field names, commonly
-- known as RecordDotSyntax
OverloadedRecordDot
-- does not export record fields as functions, use OverloadedRecordDot to access instead
NoFieldSelectors
-- Record punning
RecordWildCards
-- Improved Deriving
DerivingStrategies
DerivingVia
-- Type-level strings
DataKinds
-- to enable the `type` keyword in import lists (ormolu uses this automatically)
ExplicitNamespaces
default-language: GHC2021
executable jbovlaste-sqlite
import: common-options
main-is: JbovlasteSqlite.hs
build-depends:
base >=4.15 && <5,
pa-prelude,
pa-label,
pa-error-tree,
my-prelude,
containers,
bytestring,
arglib-netencode,
netencode,
text,
sqlite-simple,
xml-conduit,