2023-05-31 01:59:00 +02:00
|
|
|
{-# LANGUAGE QuasiQuotes #-}
|
2023-06-05 10:54:19 +02:00
|
|
|
{-# OPTIONS_GHC -Wno-orphans #-}
|
2023-05-31 01:59:00 +02:00
|
|
|
|
|
|
|
module Main where
|
|
|
|
|
2023-06-05 10:54:19 +02:00
|
|
|
import Conduit ((.|))
|
|
|
|
import Conduit qualified as Cond
|
2023-06-04 02:34:48 +02:00
|
|
|
import Control.Category qualified as Cat
|
2023-06-05 10:54:19 +02:00
|
|
|
import Control.Foldl qualified as Fold
|
2023-05-31 01:59:00 +02:00
|
|
|
import Data.ByteString.Internal qualified as Bytes
|
|
|
|
import Data.Error.Tree
|
2023-06-05 10:54:19 +02:00
|
|
|
import Data.Int (Int64)
|
2023-05-31 01:59:00 +02:00
|
|
|
import Data.List qualified as List
|
|
|
|
import Data.Map.Strict qualified as Map
|
2023-06-04 02:34:48 +02:00
|
|
|
import Data.Maybe (catMaybes)
|
2023-05-31 01:59:00 +02:00
|
|
|
import Data.Text qualified as Text
|
|
|
|
import Data.Text.IO qualified as Text
|
2023-06-05 10:54:19 +02:00
|
|
|
import Database.SQLite.Simple qualified as Sqlite
|
|
|
|
import Database.SQLite.Simple.FromField qualified as Sqlite
|
|
|
|
import Database.SQLite.Simple.QQ qualified as Sqlite
|
2023-06-04 02:34:48 +02:00
|
|
|
import FieldParser qualified as Field
|
2023-05-31 01:59:00 +02:00
|
|
|
import Label
|
2023-10-15 23:03:43 +02:00
|
|
|
import Parse
|
2023-05-31 01:59:00 +02:00
|
|
|
import PossehlAnalyticsPrelude
|
|
|
|
import Text.XML (def)
|
|
|
|
import Text.XML qualified as Xml
|
2023-06-05 10:54:19 +02:00
|
|
|
import Prelude hiding (init, maybe)
|
2023-05-31 01:59:00 +02:00
|
|
|
|
|
|
|
main :: IO ()
|
|
|
|
main = do
|
|
|
|
f <- file
|
|
|
|
f.documentRoot
|
2023-06-04 02:34:48 +02:00
|
|
|
& filterDown
|
2023-05-31 01:59:00 +02:00
|
|
|
& toTree
|
|
|
|
& prettyErrorTree
|
|
|
|
& Text.putStrLn
|
|
|
|
|
2023-06-05 10:54:19 +02:00
|
|
|
test :: IO ()
|
|
|
|
test = do
|
|
|
|
withEnv $ \env -> do
|
|
|
|
migrate env
|
|
|
|
f <- file
|
|
|
|
parseJbovlasteXml f
|
|
|
|
& \case
|
|
|
|
Left errs -> Text.putStrLn $ prettyErrorTree errs
|
|
|
|
Right valsi -> insertValsi env valsi
|
|
|
|
|
2023-06-04 02:34:48 +02:00
|
|
|
filterDown :: Xml.Element -> Xml.Element
|
|
|
|
filterDown el =
|
|
|
|
el
|
|
|
|
& filterElementsRec noUsers
|
|
|
|
& downTo (T2 (label @"maxdepth" 5) (label @"maxlistitems" 30))
|
|
|
|
|
|
|
|
data Valsi = Valsi
|
2023-06-05 10:54:19 +02:00
|
|
|
{ word :: Text,
|
|
|
|
definition :: Text,
|
2023-06-04 02:34:48 +02:00
|
|
|
definitionId :: Natural,
|
|
|
|
typ :: Text,
|
|
|
|
selmaho :: Maybe Text,
|
|
|
|
notes :: Maybe Text,
|
|
|
|
glosswords :: [T2 "word" Text "sense" (Maybe Text)],
|
|
|
|
keywords :: [T3 "word" Text "place" Natural "sense" (Maybe Text)]
|
|
|
|
}
|
|
|
|
deriving stock (Show)
|
|
|
|
|
2023-06-05 10:54:19 +02:00
|
|
|
insertValsi :: Env -> [Valsi] -> IO ()
|
|
|
|
insertValsi env vs = do
|
|
|
|
Sqlite.withTransaction env.envData $
|
|
|
|
do
|
|
|
|
valsiIds <-
|
|
|
|
Cond.yieldMany vs
|
|
|
|
.| Cond.mapMC
|
|
|
|
( \v ->
|
|
|
|
Sqlite.queryNamed
|
|
|
|
@(Sqlite.Only Int64)
|
|
|
|
env.envData
|
|
|
|
[Sqlite.sql|
|
|
|
|
INSERT INTO valsi
|
|
|
|
(word , definition , type , selmaho , notes )
|
|
|
|
VALUES
|
|
|
|
(:word, :definition, :type, :selmaho, :notes)
|
|
|
|
RETURNING (id)
|
|
|
|
|]
|
|
|
|
[ ":word" Sqlite.:= v.word,
|
|
|
|
":definition" Sqlite.:= v.definition,
|
|
|
|
":type" Sqlite.:= v.typ,
|
|
|
|
":selmaho" Sqlite.:= v.selmaho,
|
|
|
|
":notes" Sqlite.:= v.notes
|
|
|
|
]
|
|
|
|
>>= \case
|
|
|
|
[one] -> pure one
|
|
|
|
_ -> error "more or less than one result"
|
|
|
|
)
|
|
|
|
.| Cond.sinkList
|
|
|
|
& Cond.runConduit
|
|
|
|
for_ (zip valsiIds vs) $ \(Sqlite.Only vId, v) -> do
|
|
|
|
for_ v.glosswords $ \g -> do
|
|
|
|
Sqlite.executeNamed
|
|
|
|
env.envData
|
|
|
|
[Sqlite.sql|
|
|
|
|
INSERT INTO glosswords
|
|
|
|
(valsi_id , word , sense )
|
|
|
|
VALUES
|
|
|
|
(:valsi_id, :word, :sense)
|
|
|
|
|]
|
|
|
|
[ ":valsi_id" Sqlite.:= vId,
|
|
|
|
":word" Sqlite.:= g.word,
|
|
|
|
":sense" Sqlite.:= g.sense
|
|
|
|
]
|
|
|
|
for_ (zip valsiIds vs) $ \(Sqlite.Only vId, v) -> do
|
|
|
|
for_ v.keywords $ \g -> do
|
|
|
|
Sqlite.executeNamed
|
|
|
|
env.envData
|
|
|
|
[Sqlite.sql|
|
|
|
|
INSERT INTO keywords
|
|
|
|
(valsi_id , word , place , sense )
|
|
|
|
VALUES
|
|
|
|
(:valsi_id, :word, :place, :sense)
|
|
|
|
|]
|
|
|
|
[ ":valsi_id" Sqlite.:= vId,
|
|
|
|
":word" Sqlite.:= g.word,
|
|
|
|
":place" Sqlite.:= (g.place & fromIntegral @Natural @Int),
|
|
|
|
":sense" Sqlite.:= g.sense
|
|
|
|
]
|
|
|
|
|
2023-10-15 23:03:43 +02:00
|
|
|
migrate :: (HasField "envData" p Sqlite.Connection) => p -> IO ()
|
2023-06-05 10:54:19 +02:00
|
|
|
migrate env = do
|
|
|
|
let x q = Sqlite.execute env.envData q ()
|
|
|
|
x
|
|
|
|
[Sqlite.sql|
|
|
|
|
CREATE TABLE IF NOT EXISTS valsi (
|
|
|
|
id integer PRIMARY KEY,
|
|
|
|
word text NOT NULL,
|
|
|
|
definition text NOT NULL,
|
|
|
|
type text NOT NULL,
|
|
|
|
selmaho text NULL,
|
|
|
|
notes text NULL
|
|
|
|
)
|
|
|
|
|]
|
|
|
|
x
|
|
|
|
[Sqlite.sql|
|
|
|
|
CREATE TABLE IF NOT EXISTS glosswords (
|
|
|
|
id integer PRIMARY KEY,
|
|
|
|
valsi_id integer NOT NULL,
|
|
|
|
word text NOT NULL,
|
|
|
|
sense text NULL,
|
|
|
|
FOREIGN KEY(valsi_id) REFERENCES valsi(id)
|
|
|
|
)
|
|
|
|
|]
|
|
|
|
x
|
|
|
|
[Sqlite.sql|
|
|
|
|
CREATE TABLE IF NOT EXISTS keywords (
|
|
|
|
id integer PRIMARY KEY,
|
|
|
|
valsi_id integer NOT NULL,
|
|
|
|
word text NOT NULL,
|
|
|
|
place integer NOT NULL,
|
|
|
|
sense text NULL,
|
|
|
|
FOREIGN KEY(valsi_id) REFERENCES valsi(id)
|
|
|
|
)
|
|
|
|
|]
|
|
|
|
|
|
|
|
data Env = Env
|
|
|
|
{ envData :: Sqlite.Connection
|
|
|
|
}
|
|
|
|
|
|
|
|
withEnv :: (Env -> IO a) -> IO a
|
|
|
|
withEnv inner = do
|
|
|
|
withSqlite "./jbovlaste.sqlite" $ \envData -> inner Env {..}
|
|
|
|
|
|
|
|
withSqlite :: String -> (Sqlite.Connection -> IO a) -> IO a
|
|
|
|
withSqlite fileName inner = Sqlite.withConnection fileName $ \conn -> do
|
|
|
|
-- Sqlite.setTrace conn (Just (\msg -> Text.hPutStrLn IO.stderr [fmt|{fileName}: {msg}|]))
|
|
|
|
Sqlite.execute conn [Sqlite.sql|PRAGMA foreign_keys = ON|] ()
|
|
|
|
inner conn
|
2023-06-04 02:34:48 +02:00
|
|
|
|
2023-10-15 23:03:43 +02:00
|
|
|
parseJbovlasteXml :: (HasField "documentRoot" r Xml.Element) => r -> Either ErrorTree [Valsi]
|
2023-06-04 02:34:48 +02:00
|
|
|
parseJbovlasteXml xml =
|
|
|
|
xml.documentRoot
|
|
|
|
& runParse
|
2023-06-05 10:54:19 +02:00
|
|
|
"cannot parse jbovlaste.xml"
|
|
|
|
parser
|
|
|
|
where
|
|
|
|
parser =
|
|
|
|
(element "dictionary" <&> (.elementNodes) <&> mapMaybe nodeElementMay)
|
|
|
|
>>> ( find
|
|
|
|
( element "direction"
|
|
|
|
>>> do
|
|
|
|
(attribute "from" >>> exactly showToText "lojban")
|
|
|
|
*> (attribute "to" >>> exactly showToText "English")
|
|
|
|
*> Cat.id
|
|
|
|
)
|
|
|
|
<&> (\x -> x.elementNodes <&> nodeElementMay)
|
|
|
|
)
|
|
|
|
>>> (multiple (maybe valsi) <&> catMaybes)
|
|
|
|
valsi =
|
|
|
|
element "valsi"
|
|
|
|
>>> do
|
|
|
|
let subNodes =
|
|
|
|
( Cat.id
|
|
|
|
<&> (.elementNodes)
|
|
|
|
<&> mapMaybe nodeElementMay
|
2023-06-04 02:34:48 +02:00
|
|
|
)
|
2023-06-05 10:54:19 +02:00
|
|
|
|
|
|
|
let subElementContent elName =
|
|
|
|
subNodes
|
|
|
|
>>> ( (find (element elName))
|
|
|
|
<&> (.elementNodes)
|
|
|
|
)
|
|
|
|
>>> exactlyOne
|
|
|
|
>>> content
|
|
|
|
let optionalSubElementContent elName =
|
|
|
|
subNodes
|
|
|
|
>>> ((findAll (element elName) >>> zeroOrOne))
|
|
|
|
>>> (maybe (lmap (.elementNodes) exactlyOne >>> content))
|
|
|
|
|
|
|
|
word <- attribute "word"
|
|
|
|
typ <- attribute "type"
|
|
|
|
selmaho <- optionalSubElementContent "selmaho"
|
|
|
|
definition <- subElementContent "definition"
|
|
|
|
definitionId <- subElementContent "definitionid" >>> fieldParser Field.decimalNatural
|
|
|
|
notes <- optionalSubElementContent "notes"
|
|
|
|
glosswords <-
|
|
|
|
(subNodes >>> findAll (element "glossword"))
|
|
|
|
>>> ( multiple $ do
|
|
|
|
word' <- label @"word" <$> (attribute "word")
|
|
|
|
sense <- label @"sense" <$> (attributeMay "sense")
|
|
|
|
pure $ T2 word' sense
|
|
|
|
)
|
|
|
|
keywords <-
|
|
|
|
(subNodes >>> findAll (element "keyword"))
|
|
|
|
>>> ( multiple $ do
|
|
|
|
word' <- label @"word" <$> (attribute "word")
|
|
|
|
place <- label @"place" <$> (attribute "place" >>> fieldParser Field.decimalNatural)
|
|
|
|
sense <- label @"sense" <$> (attributeMay "sense")
|
|
|
|
pure $ T3 word' place sense
|
|
|
|
)
|
|
|
|
|
|
|
|
pure $ Valsi {..}
|
2023-06-04 02:34:48 +02:00
|
|
|
|
2023-05-31 01:59:00 +02:00
|
|
|
file :: IO Xml.Document
|
|
|
|
file = Xml.readFile def "./jbovlaste-en.xml"
|
|
|
|
|
|
|
|
-- | Filter XML elements recursively based on the given predicate
|
|
|
|
filterElementsRec :: (Xml.Element -> Bool) -> Xml.Element -> Xml.Element
|
|
|
|
filterElementsRec f el =
|
|
|
|
el
|
|
|
|
{ Xml.elementNodes =
|
|
|
|
mapMaybe
|
|
|
|
( \case
|
|
|
|
Xml.NodeElement el' ->
|
|
|
|
if f el'
|
|
|
|
then Just $ Xml.NodeElement $ filterElementsRec f el'
|
|
|
|
else Nothing
|
|
|
|
other -> Just other
|
|
|
|
)
|
|
|
|
el.elementNodes
|
|
|
|
}
|
|
|
|
|
|
|
|
-- | no <user> allowed
|
|
|
|
noUsers :: Xml.Element -> Bool
|
|
|
|
noUsers el = el.elementName.nameLocalName /= "user"
|
|
|
|
|
|
|
|
downTo :: (T2 "maxdepth" Int "maxlistitems" Int) -> Xml.Element -> Xml.Element
|
|
|
|
downTo n el =
|
|
|
|
if n.maxdepth > 0
|
|
|
|
then
|
|
|
|
el
|
|
|
|
{ Xml.elementNodes =
|
|
|
|
( do
|
|
|
|
let eleven = take (n.maxlistitems + 1) $ map down el.elementNodes
|
|
|
|
if List.length eleven == (n.maxlistitems + 1)
|
|
|
|
then eleven <> [Xml.NodeComment "snip!"]
|
|
|
|
else eleven
|
|
|
|
)
|
|
|
|
}
|
|
|
|
else el {Xml.elementNodes = [Xml.NodeComment "snip!"]}
|
|
|
|
where
|
|
|
|
down =
|
|
|
|
\case
|
|
|
|
Xml.NodeElement el' ->
|
|
|
|
Xml.NodeElement $
|
|
|
|
downTo
|
|
|
|
( T2
|
|
|
|
(label @"maxdepth" $ n.maxdepth - 1)
|
|
|
|
(label @"maxlistitems" n.maxlistitems)
|
|
|
|
)
|
|
|
|
el'
|
|
|
|
more -> more
|
|
|
|
|
|
|
|
toTree :: Xml.Element -> ErrorTree
|
|
|
|
toTree el = do
|
|
|
|
case el.elementNodes & filter (not . isEmptyContent) & nonEmpty of
|
2023-06-04 02:34:48 +02:00
|
|
|
Nothing -> singleError (newError (prettyXmlElement el))
|
|
|
|
Just (n :| []) | not $ isElementNode n -> singleError $ errorContext (prettyXmlElement el) (nodeErrorNoElement n)
|
|
|
|
Just nodes -> nestedMultiError (newError (prettyXmlElement el)) (nodes <&> node)
|
2023-05-31 01:59:00 +02:00
|
|
|
where
|
|
|
|
isEmptyContent = \case
|
|
|
|
Xml.NodeContent c -> c & Text.all Bytes.isSpaceChar8
|
|
|
|
_ -> False
|
|
|
|
isElementNode = \case
|
|
|
|
Xml.NodeElement _ -> True
|
|
|
|
_ -> False
|
|
|
|
|
|
|
|
node :: Xml.Node -> ErrorTree
|
|
|
|
node = \case
|
|
|
|
Xml.NodeElement el' -> toTree el'
|
|
|
|
other -> singleError $ nodeErrorNoElement other
|
|
|
|
|
|
|
|
nodeErrorNoElement :: Xml.Node -> Error
|
|
|
|
nodeErrorNoElement = \case
|
|
|
|
Xml.NodeInstruction i -> [fmt|Instruction: {i & show}|]
|
|
|
|
Xml.NodeContent c -> [fmt|"{c & Text.replace "\"" "\\\""}"|]
|
|
|
|
Xml.NodeComment c -> [fmt|<!-- {c} -->|]
|
|
|
|
Xml.NodeElement _ -> error "NodeElement not allowed here"
|
|
|
|
|
2023-06-04 02:34:48 +02:00
|
|
|
prettyXmlName :: Xml.Name -> Text
|
|
|
|
prettyXmlName n = [fmt|{n.namePrefix & fromMaybe ""}{n.nameLocalName}|]
|
|
|
|
|
|
|
|
prettyXmlElement :: Xml.Element -> Text
|
|
|
|
prettyXmlElement el =
|
|
|
|
if not $ null el.elementAttributes
|
|
|
|
then [fmt|<{prettyXmlName el.elementName}: {attrs el.elementAttributes}>|]
|
|
|
|
else [fmt|<{prettyXmlName el.elementName}>|]
|
|
|
|
where
|
2023-05-31 01:59:00 +02:00
|
|
|
attrs :: Map Xml.Name Text -> Text
|
2023-06-04 02:34:48 +02:00
|
|
|
attrs a = a & Map.toList <&> (\(k, v) -> [fmt|{prettyXmlName k}={v}|]) & Text.intercalate ", " & \s -> [fmt|({s})|]
|
|
|
|
|
|
|
|
nodeElementMay :: Xml.Node -> Maybe Xml.Element
|
|
|
|
nodeElementMay = \case
|
|
|
|
Xml.NodeElement el -> Just el
|
|
|
|
_ -> Nothing
|
|
|
|
|
|
|
|
element :: Text -> Parse Xml.Element Xml.Element
|
|
|
|
element name = Parse $ \(ctx, el) ->
|
|
|
|
if el.elementName.nameLocalName == name
|
|
|
|
then Success (ctx & addContext (prettyXmlName el.elementName), el)
|
|
|
|
else Failure $ singleton [fmt|Expected element named <{name}> but got {el & prettyXmlElement} at {showContext ctx}|]
|
|
|
|
|
|
|
|
content :: Parse Xml.Node Text
|
|
|
|
content = Parse $ \(ctx, node) -> case node of
|
|
|
|
Xml.NodeContent t -> Success (ctx, t)
|
|
|
|
-- TODO: give an example of the node content?
|
|
|
|
n -> Failure $ singleton [fmt|Expected a content node, but got a {n & nodeType} node, at {showContext ctx}|]
|
|
|
|
where
|
|
|
|
nodeType = \case
|
|
|
|
Xml.NodeContent _ -> "content" :: Text
|
|
|
|
Xml.NodeComment _ -> "comment"
|
|
|
|
Xml.NodeInstruction _ -> "instruction"
|
|
|
|
Xml.NodeElement _ -> "element"
|
|
|
|
|
|
|
|
attribute :: Text -> Parse Xml.Element Text
|
|
|
|
attribute name = Parse $ \(ctx, el) ->
|
|
|
|
case el.elementAttributes & Map.mapKeys (.nameLocalName) & Map.lookup name of
|
|
|
|
Just a -> Success (ctx & addContext [fmt|{{attr:{name}}}|], a)
|
|
|
|
Nothing -> Failure $ singleton [fmt|Attribute "{name}" missing at {showContext ctx}|]
|
|
|
|
|
|
|
|
attributeMay :: Text -> Parse Xml.Element (Maybe Text)
|
|
|
|
attributeMay name = Parse $ \(ctx, el) ->
|
|
|
|
case el.elementAttributes & Map.mapKeys (.nameLocalName) & Map.lookup name of
|
|
|
|
Just a -> Success (ctx & addContext [fmt|{{attr:{name}}}|], Just a)
|
|
|
|
Nothing -> Success (ctx, Nothing)
|
|
|
|
|
2023-06-05 10:54:19 +02:00
|
|
|
instance
|
|
|
|
( Sqlite.FromField t1,
|
|
|
|
Sqlite.FromField t2,
|
|
|
|
Sqlite.FromField t3
|
|
|
|
) =>
|
|
|
|
Sqlite.FromRow (T3 l1 t1 l2 t2 l3 t3)
|
|
|
|
where
|
|
|
|
fromRow = do
|
|
|
|
T3
|
|
|
|
<$> (label @l1 <$> Sqlite.field)
|
|
|
|
<*> (label @l2 <$> Sqlite.field)
|
|
|
|
<*> (label @l3 <$> Sqlite.field)
|
|
|
|
|
|
|
|
foldRows ::
|
|
|
|
forall row params b.
|
|
|
|
(Sqlite.FromRow row, Sqlite.ToRow params) =>
|
|
|
|
Sqlite.Connection ->
|
|
|
|
Sqlite.Query ->
|
|
|
|
params ->
|
|
|
|
Fold.Fold row b ->
|
|
|
|
IO b
|
|
|
|
foldRows conn qry params = Fold.purely f
|
|
|
|
where
|
|
|
|
f :: forall x. (x -> row -> x) -> x -> (x -> b) -> IO b
|
|
|
|
f acc init extract = do
|
|
|
|
x <- Sqlite.fold conn qry params init (\a r -> pure $ acc a r)
|
|
|
|
pure $ extract x
|