feat(xanthous): Add a generator for random english syllables

Add a new "speech" generator module, with the beginnings of the vague
definition of the phonotactics of the language (there's one in here for
English based on the wikipedia article for English phonology, but it's
less than ideal as it has generated words like "sprurlkt") and the
ability to generate random syllables of a language by picking an onset,
nucleus, and coda from the list for that language (within a range of the
number of allowed of each syllable part). This will be used down the
road to automatically generate utterances from various
non-english-speaking creatures (so the accuracy is less important, just
that it "feels real").

Change-Id: I7b81375ec595239c05c5c800cbde1a2a900e38ac
Reviewed-on: https://cl.tvl.fyi/c/depot/+/3202
Reviewed-by: grfn <grfn@gws.fyi>
Tested-by: BuildkiteCI
This commit is contained in:
Griffin Smith 2021-06-12 21:11:58 -04:00 committed by grfn
parent 006e5231e5
commit 2cfe4069bb
7 changed files with 216 additions and 26 deletions

View file

@ -33,6 +33,7 @@ dependencies:
- containers
- criterion
- data-default
- data-interval
- deepseq
- directory
- fgl

View file

@ -1,17 +1,17 @@
{ mkDerivation, aeson, array, async, base, bifunctors, brick
, checkers, classy-prelude, comonad, comonad-extras, constraints
, containers, criterion, data-default, deepseq, directory, fgl
, fgl-arbitrary, file-embed, filepath, generic-arbitrary
, generic-lens, groups, hgeometry, hgeometry-combinatorial, hpack
, JuicyPixels, lens, lens-properties, lib, lifted-async, linear
, megaparsec, mmorph, monad-control, MonadRandom, mtl
, optparse-applicative, parallel, parser-combinators, pointed
, QuickCheck, quickcheck-instances, quickcheck-text, random
, random-extras, random-fu, random-source, Rasterific
, raw-strings-qq, reflection, semigroupoids, semigroups, splitmix
, stache, streams, tasty, tasty-hunit, tasty-quickcheck, text
, text-zipper, tomland, transformers, vector, vty, witherable, yaml
, zlib
, containers, criterion, data-default, data-interval, deepseq
, directory, fgl, fgl-arbitrary, file-embed, filepath
, generic-arbitrary, generic-lens, groups, hgeometry
, hgeometry-combinatorial, hpack, JuicyPixels, lens
, lens-properties, lib, lifted-async, linear, megaparsec, mmorph
, monad-control, MonadRandom, mtl, optparse-applicative, parallel
, parser-combinators, pointed, QuickCheck, quickcheck-instances
, quickcheck-text, random, random-extras, random-fu, random-source
, Rasterific, raw-strings-qq, reflection, semigroupoids, semigroups
, splitmix, stache, streams, tasty, tasty-hunit, tasty-quickcheck
, text, text-zipper, tomland, transformers, vector, vty, witherable
, yaml, zlib
}:
mkDerivation {
pname = "xanthous";
@ -22,8 +22,8 @@ mkDerivation {
libraryHaskellDepends = [
aeson array async base bifunctors brick checkers classy-prelude
comonad comonad-extras constraints containers criterion
data-default deepseq directory fgl fgl-arbitrary file-embed
filepath generic-arbitrary generic-lens groups hgeometry
data-default data-interval deepseq directory fgl fgl-arbitrary
file-embed filepath generic-arbitrary generic-lens groups hgeometry
hgeometry-combinatorial JuicyPixels lens lifted-async linear
megaparsec mmorph monad-control MonadRandom mtl
optparse-applicative parallel parser-combinators pointed QuickCheck
@ -36,8 +36,8 @@ mkDerivation {
executableHaskellDepends = [
aeson array async base bifunctors brick checkers classy-prelude
comonad comonad-extras constraints containers criterion
data-default deepseq directory fgl fgl-arbitrary file-embed
filepath generic-arbitrary generic-lens groups hgeometry
data-default data-interval deepseq directory fgl fgl-arbitrary
file-embed filepath generic-arbitrary generic-lens groups hgeometry
hgeometry-combinatorial JuicyPixels lens lifted-async linear
megaparsec mmorph monad-control MonadRandom mtl
optparse-applicative parallel parser-combinators pointed QuickCheck
@ -49,8 +49,8 @@ mkDerivation {
testHaskellDepends = [
aeson array async base bifunctors brick checkers classy-prelude
comonad comonad-extras constraints containers criterion
data-default deepseq directory fgl fgl-arbitrary file-embed
filepath generic-arbitrary generic-lens groups hgeometry
data-default data-interval deepseq directory fgl fgl-arbitrary
file-embed filepath generic-arbitrary generic-lens groups hgeometry
hgeometry-combinatorial JuicyPixels lens lens-properties
lifted-async linear megaparsec mmorph monad-control MonadRandom mtl
optparse-applicative parallel parser-combinators pointed QuickCheck
@ -63,8 +63,8 @@ mkDerivation {
benchmarkHaskellDepends = [
aeson array async base bifunctors brick checkers classy-prelude
comonad comonad-extras constraints containers criterion
data-default deepseq directory fgl fgl-arbitrary file-embed
filepath generic-arbitrary generic-lens groups hgeometry
data-default data-interval deepseq directory fgl fgl-arbitrary
file-embed filepath generic-arbitrary generic-lens groups hgeometry
hgeometry-combinatorial JuicyPixels lens lifted-async linear
megaparsec mmorph monad-control MonadRandom mtl
optparse-applicative parallel parser-combinators pointed QuickCheck

View file

@ -0,0 +1,108 @@
{-# LANGUAGE TemplateHaskell #-}
--------------------------------------------------------------------------------
module Xanthous.Generators.Speech
( -- * Abstract phonotactics
Phonotactics(..)
-- ** Lenses
, onsets
, nuclei
, codas
, numOnsets
, numNuclei
, numCodas
-- ** Definitions for languages
, english
-- * Language generation
, syllable
) where
--------------------------------------------------------------------------------
import Xanthous.Prelude hiding (replicateM)
import Data.Interval (Interval)
import qualified Data.Interval as Interval
import Control.Monad.Random.Class (MonadRandom)
import Xanthous.Random (chooseRange, choose, ChooseElement (ChooseElement))
import Control.Monad (replicateM)
--------------------------------------------------------------------------------
newtype Phoneme = Phoneme Text
deriving stock (Show, Eq, Generic)
deriving anyclass (NFData)
deriving newtype (IsString, Semigroup, Monoid)
data Phonotactics = Phonotactics
{ _onsets :: [Phoneme]
, _nuclei :: [Phoneme]
, _codas :: [Phoneme]
, _numOnsets :: Interval Word
, _numNuclei :: Interval Word
, _numCodas :: Interval Word
}
deriving stock (Show, Eq, Generic)
deriving anyclass (NFData)
makeLenses ''Phonotactics
syllable :: MonadRandom m => Phonotactics -> m Text
syllable phonotactics = do
let genPart num choices = do
n <- fromIntegral . fromMaybe 0 <$> chooseRange (phonotactics ^. num)
fmap (fromMaybe mempty . mconcat)
. replicateM n
. choose . ChooseElement
$ phonotactics ^. choices
(Phoneme onset) <- genPart numOnsets onsets
(Phoneme nucleus) <- genPart numNuclei nuclei
(Phoneme coda) <- genPart numCodas codas
pure $ onset <> nucleus <> coda
--------------------------------------------------------------------------------
-- <https://en.wikipedia.org/wiki/English_phonology#Phonotactics>
english :: Phonotactics
english = Phonotactics
{ _onsets = [ "pl" , "bl" , "kl" , "gl" , "pr" , "br" , "tr" , "dr" , "kr"
, "gr" , "tw" , "dw" , "gw" , "kw" , "pw"
, "fl" , "sl" , {- "thl", -} "shl" {- , "vl" -}
, "p", "b", "t", "d", "k", "ɡ", "m", "n", "f", "v", "th", "s"
, "z", "h", "l", "w"
, "sp", "st", "sk"
, "sm", "sn"
, "sf", ""
, "spl", "skl", "spr", "str", "skr", "skw", "sm", "sp", "st", "sk"
]
, _nuclei = [ "a", "e", "i", "o", "u", "ur", "ar", "or", "ear", "are", "ure"
, "oa", "ee", "oo", "ei", "ie", "oi", "ou"
]
, _codas = [ "m", "n", "ng", "p", "t", "tsh", "k", "f", "sh", "s", "th", "x"
, "v", "z", "zh", "l", "r", "w"
, "lk", "lb", "lt", "ld", "ltsh", "ldsh", "lk"
, "rp", "rb", "rt", "rd", "rtsh", "rdsh", "rk", "rɡ"
, "lf", "lv", "lth", "ls", "lz", "lsh", "lth"
, "rf", "rv", "rth", "rs", "rz", "rth"
, "lm", "ln"
, "rm", "rn", "rl"
, "mp", "nt", "nd", "nth", "nsh", "ŋk"
, "mf", "ms", "mth", "nf", "nth", "ns", "nz", "ŋθ"
, "ft", "sp", "st", "sk"
, "fth"
, "pt", "kt"
, "pth", "ps", "th", "ts", "dth", "dz", "ks"
, "lpt", "lps", "lfth", "lts", "lst", "lkt", "lks"
, "rmth", "rpt", "rps", "rts", "rst", "rkt"
, "mpt", "mps", "ndth", "ŋkt", "ŋks", "ŋkth"
, "ksth", "kst"
]
, _numOnsets = Interval.singleton 1
, _numNuclei = Interval.singleton 1
, _numCodas = Interval.singleton 1
}

View file

@ -1,10 +1,10 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE PackageImports #-}
{-# OPTIONS_GHC -Wno-orphans #-}
--------------------------------------------------------------------------------
{-# OPTIONS_GHC -Wno-deferred-type-errors #-}
module Xanthous.Orphans
( ppTemplate
) where
@ -31,6 +31,8 @@ import Linear
--------------------------------------------------------------------------------
import Xanthous.Util.JSON
import Xanthous.Util.QuickCheck
import qualified Data.Interval as Interval
import Data.Interval (Interval, Extended (..))
--------------------------------------------------------------------------------
instance forall s a.
@ -350,3 +352,23 @@ deriving newtype instance (Arbitrary s, CoArbitrary (m (a, s)))
deriving via (GenericArbitrary (V2 a)) instance Arbitrary a => Arbitrary (V2 a)
instance CoArbitrary a => CoArbitrary (V2 a)
instance Function a => Function (V2 a)
--------------------------------------------------------------------------------
instance Arbitrary r => Arbitrary (Extended r) where
arbitrary = oneof [ pure NegInf
, pure PosInf
, Finite <$> arbitrary
]
instance Arbitrary Interval.Boundary where
arbitrary = elements [ Interval.Open , Interval.Closed ]
instance (Ord r, Arbitrary r) => Arbitrary (Interval r) where
arbitrary = do
lower <- arbitrary
upper <- arbitrary
pure $ (if upper < lower then flip else id)
Interval.interval
lower
upper

View file

@ -11,9 +11,10 @@ module Xanthous.Random
, subRand
, chance
, chooseSubset
, chooseRange
) where
--------------------------------------------------------------------------------
import Xanthous.Prelude
import Xanthous.Prelude
--------------------------------------------------------------------------------
import Data.List.NonEmpty (NonEmpty(..))
import Control.Monad.Random.Class (MonadRandom(getRandomR, getRandom))
@ -25,6 +26,9 @@ import Data.Random.Distribution.Uniform
import Data.Random.Distribution.Uniform.Exclusive
import Data.Random.Sample
import qualified Data.Random.Source as DRS
import Data.Interval ( Interval, lowerBound', Extended (Finite)
, upperBound', Boundary (Closed)
)
--------------------------------------------------------------------------------
instance {-# INCOHERENT #-} (Monad m, MonadRandom m) => DRS.MonadRandom m where
@ -81,11 +85,13 @@ evenlyWeighted = Weighted . itoList
weightedBy :: Functor t => (a -> w) -> t a -> Weighted w t a
weightedBy weighting xs = Weighted $ (weighting &&& id) <$> xs
instance (Num w, Ord w, Distribution Uniform w, Excludable w) => Choose (Weighted w [] a) where
instance (Num w, Ord w, Distribution Uniform w, Excludable w)
=> Choose (Weighted w [] a) where
type RandomResult (Weighted w [] a) = Maybe a
choose (Weighted ws) = sample $ headMay <$> weightedSample 1 ws
instance (Num w, Ord w, Distribution Uniform w, Excludable w) => Choose (Weighted w NonEmpty a) where
instance (Num w, Ord w, Distribution Uniform w, Excludable w)
=> Choose (Weighted w NonEmpty a) where
type RandomResult (Weighted w NonEmpty a) = a
choose (Weighted ws) =
sample
@ -112,6 +118,33 @@ chooseSubset :: ( Num w, Ord w, Distribution Uniform w, Excludable w
) => w -> t a -> m (t a)
chooseSubset = filterA . const . chance
-- | Choose a random @n@ in the given interval
chooseRange
:: ( MonadRandom m
, Distribution Uniform n
, Enum n
, Bounded n, Show n, Ord n)
=> Interval n
-> m (Maybe n)
chooseRange int = traverse sample distribution
where
(lower, lowerBoundary) = lowerBound' int
lowerR = case lower of
Finite x -> if lowerBoundary == Closed
then x
else succ x
_ -> minBound
(upper, upperBoundary) = upperBound' int
upperR = case upper of
Finite x -> if upperBoundary == Closed
then x
else pred x
_ -> maxBound
distribution
| lowerR <= upperR = Just $ Uniform lowerR upperR
| otherwise = Nothing
--------------------------------------------------------------------------------
bools :: NonEmpty Bool

View file

@ -5,7 +5,10 @@ import Test.Prelude
--------------------------------------------------------------------------------
import Control.Monad.Random
--------------------------------------------------------------------------------
import Xanthous.Random
import Xanthous.Random
import Xanthous.Orphans ()
import qualified Data.Interval as Interval
import Data.Interval (Interval, Extended (Finite), (<=..<=))
--------------------------------------------------------------------------------
main :: IO ()
@ -18,6 +21,23 @@ test = testGroup "Xanthous.Random"
$ \(l :: [Int]) (Positive (r :: Double)) -> randomTest $ do
ss <- chooseSubset r l
pure $ all (`elem` l) ss
]
, testGroup "chooseRange"
[ testProperty "chooses in the range"
$ \(rng :: Interval Int) ->
not (Interval.null rng)
==> randomTest ( do
chooseRange rng >>= \case
Just r -> pure
. counterexample (show r)
$ r `Interval.member` rng
Nothing -> pure $ property Discard
)
, testProperty "nonEmpty range is never empty"
$ \ (lower :: Int) (NonZero diff) -> randomTest $ do
let upper = lower + diff
r <- chooseRange (Finite lower <=..<= Finite upper)
pure $ isJust r
]
]

View file

@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: bba18b2b297d73ddcb0a2c365e597a183e6b612ad336e97ca06d9ce87b989656
-- hash: 3887c4e473843f80e65cb0ae8a1def8fc4871de33e9f425a08820e9a8942e99c
name: xanthous
version: 0.1.0.0
@ -71,6 +71,7 @@ library
Xanthous.Generators.Level.LevelContents
Xanthous.Generators.Level.Util
Xanthous.Generators.Level.Village
Xanthous.Generators.Speech
Xanthous.Messages
Xanthous.Messages.Template
Xanthous.Monad
@ -136,6 +137,7 @@ library
, containers
, criterion
, data-default
, data-interval
, deepseq
, directory
, fgl
@ -227,6 +229,7 @@ executable xanthous
Xanthous.Generators.Level.LevelContents
Xanthous.Generators.Level.Util
Xanthous.Generators.Level.Village
Xanthous.Generators.Speech
Xanthous.Messages
Xanthous.Messages.Template
Xanthous.Monad
@ -291,6 +294,7 @@ executable xanthous
, containers
, criterion
, data-default
, data-interval
, deepseq
, directory
, fgl
@ -412,6 +416,7 @@ test-suite test
, containers
, criterion
, data-default
, data-interval
, deepseq
, directory
, fgl
@ -520,6 +525,7 @@ benchmark benchmark
, containers
, criterion
, data-default
, data-interval
, deepseq
, directory
, fgl