feat(xanthous): Add a generator for random english syllables
Add a new "speech" generator module, with the beginnings of the vague definition of the phonotactics of the language (there's one in here for English based on the wikipedia article for English phonology, but it's less than ideal as it has generated words like "sprurlkt") and the ability to generate random syllables of a language by picking an onset, nucleus, and coda from the list for that language (within a range of the number of allowed of each syllable part). This will be used down the road to automatically generate utterances from various non-english-speaking creatures (so the accuracy is less important, just that it "feels real"). Change-Id: I7b81375ec595239c05c5c800cbde1a2a900e38ac Reviewed-on: https://cl.tvl.fyi/c/depot/+/3202 Reviewed-by: grfn <grfn@gws.fyi> Tested-by: BuildkiteCI
This commit is contained in:
parent
006e5231e5
commit
2cfe4069bb
7 changed files with 216 additions and 26 deletions
|
@ -33,6 +33,7 @@ dependencies:
|
|||
- containers
|
||||
- criterion
|
||||
- data-default
|
||||
- data-interval
|
||||
- deepseq
|
||||
- directory
|
||||
- fgl
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
{ mkDerivation, aeson, array, async, base, bifunctors, brick
|
||||
, checkers, classy-prelude, comonad, comonad-extras, constraints
|
||||
, containers, criterion, data-default, deepseq, directory, fgl
|
||||
, fgl-arbitrary, file-embed, filepath, generic-arbitrary
|
||||
, generic-lens, groups, hgeometry, hgeometry-combinatorial, hpack
|
||||
, JuicyPixels, lens, lens-properties, lib, lifted-async, linear
|
||||
, megaparsec, mmorph, monad-control, MonadRandom, mtl
|
||||
, optparse-applicative, parallel, parser-combinators, pointed
|
||||
, QuickCheck, quickcheck-instances, quickcheck-text, random
|
||||
, random-extras, random-fu, random-source, Rasterific
|
||||
, raw-strings-qq, reflection, semigroupoids, semigroups, splitmix
|
||||
, stache, streams, tasty, tasty-hunit, tasty-quickcheck, text
|
||||
, text-zipper, tomland, transformers, vector, vty, witherable, yaml
|
||||
, zlib
|
||||
, containers, criterion, data-default, data-interval, deepseq
|
||||
, directory, fgl, fgl-arbitrary, file-embed, filepath
|
||||
, generic-arbitrary, generic-lens, groups, hgeometry
|
||||
, hgeometry-combinatorial, hpack, JuicyPixels, lens
|
||||
, lens-properties, lib, lifted-async, linear, megaparsec, mmorph
|
||||
, monad-control, MonadRandom, mtl, optparse-applicative, parallel
|
||||
, parser-combinators, pointed, QuickCheck, quickcheck-instances
|
||||
, quickcheck-text, random, random-extras, random-fu, random-source
|
||||
, Rasterific, raw-strings-qq, reflection, semigroupoids, semigroups
|
||||
, splitmix, stache, streams, tasty, tasty-hunit, tasty-quickcheck
|
||||
, text, text-zipper, tomland, transformers, vector, vty, witherable
|
||||
, yaml, zlib
|
||||
}:
|
||||
mkDerivation {
|
||||
pname = "xanthous";
|
||||
|
@ -22,8 +22,8 @@ mkDerivation {
|
|||
libraryHaskellDepends = [
|
||||
aeson array async base bifunctors brick checkers classy-prelude
|
||||
comonad comonad-extras constraints containers criterion
|
||||
data-default deepseq directory fgl fgl-arbitrary file-embed
|
||||
filepath generic-arbitrary generic-lens groups hgeometry
|
||||
data-default data-interval deepseq directory fgl fgl-arbitrary
|
||||
file-embed filepath generic-arbitrary generic-lens groups hgeometry
|
||||
hgeometry-combinatorial JuicyPixels lens lifted-async linear
|
||||
megaparsec mmorph monad-control MonadRandom mtl
|
||||
optparse-applicative parallel parser-combinators pointed QuickCheck
|
||||
|
@ -36,8 +36,8 @@ mkDerivation {
|
|||
executableHaskellDepends = [
|
||||
aeson array async base bifunctors brick checkers classy-prelude
|
||||
comonad comonad-extras constraints containers criterion
|
||||
data-default deepseq directory fgl fgl-arbitrary file-embed
|
||||
filepath generic-arbitrary generic-lens groups hgeometry
|
||||
data-default data-interval deepseq directory fgl fgl-arbitrary
|
||||
file-embed filepath generic-arbitrary generic-lens groups hgeometry
|
||||
hgeometry-combinatorial JuicyPixels lens lifted-async linear
|
||||
megaparsec mmorph monad-control MonadRandom mtl
|
||||
optparse-applicative parallel parser-combinators pointed QuickCheck
|
||||
|
@ -49,8 +49,8 @@ mkDerivation {
|
|||
testHaskellDepends = [
|
||||
aeson array async base bifunctors brick checkers classy-prelude
|
||||
comonad comonad-extras constraints containers criterion
|
||||
data-default deepseq directory fgl fgl-arbitrary file-embed
|
||||
filepath generic-arbitrary generic-lens groups hgeometry
|
||||
data-default data-interval deepseq directory fgl fgl-arbitrary
|
||||
file-embed filepath generic-arbitrary generic-lens groups hgeometry
|
||||
hgeometry-combinatorial JuicyPixels lens lens-properties
|
||||
lifted-async linear megaparsec mmorph monad-control MonadRandom mtl
|
||||
optparse-applicative parallel parser-combinators pointed QuickCheck
|
||||
|
@ -63,8 +63,8 @@ mkDerivation {
|
|||
benchmarkHaskellDepends = [
|
||||
aeson array async base bifunctors brick checkers classy-prelude
|
||||
comonad comonad-extras constraints containers criterion
|
||||
data-default deepseq directory fgl fgl-arbitrary file-embed
|
||||
filepath generic-arbitrary generic-lens groups hgeometry
|
||||
data-default data-interval deepseq directory fgl fgl-arbitrary
|
||||
file-embed filepath generic-arbitrary generic-lens groups hgeometry
|
||||
hgeometry-combinatorial JuicyPixels lens lifted-async linear
|
||||
megaparsec mmorph monad-control MonadRandom mtl
|
||||
optparse-applicative parallel parser-combinators pointed QuickCheck
|
||||
|
|
108
users/grfn/xanthous/src/Xanthous/Generators/Speech.hs
Normal file
108
users/grfn/xanthous/src/Xanthous/Generators/Speech.hs
Normal file
|
@ -0,0 +1,108 @@
|
|||
{-# LANGUAGE TemplateHaskell #-}
|
||||
--------------------------------------------------------------------------------
|
||||
module Xanthous.Generators.Speech
|
||||
( -- * Abstract phonotactics
|
||||
Phonotactics(..)
|
||||
-- ** Lenses
|
||||
, onsets
|
||||
, nuclei
|
||||
, codas
|
||||
, numOnsets
|
||||
, numNuclei
|
||||
, numCodas
|
||||
|
||||
-- ** Definitions for languages
|
||||
, english
|
||||
|
||||
-- * Language generation
|
||||
, syllable
|
||||
|
||||
) where
|
||||
--------------------------------------------------------------------------------
|
||||
import Xanthous.Prelude hiding (replicateM)
|
||||
import Data.Interval (Interval)
|
||||
import qualified Data.Interval as Interval
|
||||
import Control.Monad.Random.Class (MonadRandom)
|
||||
import Xanthous.Random (chooseRange, choose, ChooseElement (ChooseElement))
|
||||
import Control.Monad (replicateM)
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
newtype Phoneme = Phoneme Text
|
||||
deriving stock (Show, Eq, Generic)
|
||||
deriving anyclass (NFData)
|
||||
deriving newtype (IsString, Semigroup, Monoid)
|
||||
|
||||
data Phonotactics = Phonotactics
|
||||
{ _onsets :: [Phoneme]
|
||||
, _nuclei :: [Phoneme]
|
||||
, _codas :: [Phoneme]
|
||||
, _numOnsets :: Interval Word
|
||||
, _numNuclei :: Interval Word
|
||||
, _numCodas :: Interval Word
|
||||
}
|
||||
deriving stock (Show, Eq, Generic)
|
||||
deriving anyclass (NFData)
|
||||
makeLenses ''Phonotactics
|
||||
|
||||
syllable :: MonadRandom m => Phonotactics -> m Text
|
||||
syllable phonotactics = do
|
||||
let genPart num choices = do
|
||||
n <- fromIntegral . fromMaybe 0 <$> chooseRange (phonotactics ^. num)
|
||||
fmap (fromMaybe mempty . mconcat)
|
||||
. replicateM n
|
||||
. choose . ChooseElement
|
||||
$ phonotactics ^. choices
|
||||
|
||||
(Phoneme onset) <- genPart numOnsets onsets
|
||||
(Phoneme nucleus) <- genPart numNuclei nuclei
|
||||
(Phoneme coda) <- genPart numCodas codas
|
||||
|
||||
pure $ onset <> nucleus <> coda
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- <https://en.wikipedia.org/wiki/English_phonology#Phonotactics>
|
||||
english :: Phonotactics
|
||||
english = Phonotactics
|
||||
{ _onsets = [ "pl" , "bl" , "kl" , "gl" , "pr" , "br" , "tr" , "dr" , "kr"
|
||||
, "gr" , "tw" , "dw" , "gw" , "kw" , "pw"
|
||||
|
||||
, "fl" , "sl" , {- "thl", -} "shl" {- , "vl" -}
|
||||
, "p", "b", "t", "d", "k", "ɡ", "m", "n", "f", "v", "th", "s"
|
||||
, "z", "h", "l", "w"
|
||||
|
||||
, "sp", "st", "sk"
|
||||
|
||||
, "sm", "sn"
|
||||
|
||||
, "sf", "sθ"
|
||||
|
||||
, "spl", "skl", "spr", "str", "skr", "skw", "sm", "sp", "st", "sk"
|
||||
]
|
||||
, _nuclei = [ "a", "e", "i", "o", "u", "ur", "ar", "or", "ear", "are", "ure"
|
||||
, "oa", "ee", "oo", "ei", "ie", "oi", "ou"
|
||||
]
|
||||
, _codas = [ "m", "n", "ng", "p", "t", "tsh", "k", "f", "sh", "s", "th", "x"
|
||||
, "v", "z", "zh", "l", "r", "w"
|
||||
|
||||
, "lk", "lb", "lt", "ld", "ltsh", "ldsh", "lk"
|
||||
, "rp", "rb", "rt", "rd", "rtsh", "rdsh", "rk", "rɡ"
|
||||
, "lf", "lv", "lth", "ls", "lz", "lsh", "lth"
|
||||
, "rf", "rv", "rth", "rs", "rz", "rth"
|
||||
, "lm", "ln"
|
||||
, "rm", "rn", "rl"
|
||||
, "mp", "nt", "nd", "nth", "nsh", "ŋk"
|
||||
, "mf", "ms", "mth", "nf", "nth", "ns", "nz", "ŋθ"
|
||||
, "ft", "sp", "st", "sk"
|
||||
, "fth"
|
||||
, "pt", "kt"
|
||||
, "pth", "ps", "th", "ts", "dth", "dz", "ks"
|
||||
, "lpt", "lps", "lfth", "lts", "lst", "lkt", "lks"
|
||||
, "rmth", "rpt", "rps", "rts", "rst", "rkt"
|
||||
, "mpt", "mps", "ndth", "ŋkt", "ŋks", "ŋkth"
|
||||
, "ksth", "kst"
|
||||
]
|
||||
, _numOnsets = Interval.singleton 1
|
||||
, _numNuclei = Interval.singleton 1
|
||||
, _numCodas = Interval.singleton 1
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
{-# LANGUAGE RecordWildCards #-}
|
||||
{-# LANGUAGE StandaloneDeriving #-}
|
||||
{-# LANGUAGE UndecidableInstances #-}
|
||||
{-# LANGUAGE PatternSynonyms #-}
|
||||
{-# LANGUAGE PackageImports #-}
|
||||
{-# OPTIONS_GHC -Wno-orphans #-}
|
||||
--------------------------------------------------------------------------------
|
||||
{-# OPTIONS_GHC -Wno-deferred-type-errors #-}
|
||||
module Xanthous.Orphans
|
||||
( ppTemplate
|
||||
) where
|
||||
|
@ -31,6 +31,8 @@ import Linear
|
|||
--------------------------------------------------------------------------------
|
||||
import Xanthous.Util.JSON
|
||||
import Xanthous.Util.QuickCheck
|
||||
import qualified Data.Interval as Interval
|
||||
import Data.Interval (Interval, Extended (..))
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
instance forall s a.
|
||||
|
@ -350,3 +352,23 @@ deriving newtype instance (Arbitrary s, CoArbitrary (m (a, s)))
|
|||
deriving via (GenericArbitrary (V2 a)) instance Arbitrary a => Arbitrary (V2 a)
|
||||
instance CoArbitrary a => CoArbitrary (V2 a)
|
||||
instance Function a => Function (V2 a)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
instance Arbitrary r => Arbitrary (Extended r) where
|
||||
arbitrary = oneof [ pure NegInf
|
||||
, pure PosInf
|
||||
, Finite <$> arbitrary
|
||||
]
|
||||
|
||||
instance Arbitrary Interval.Boundary where
|
||||
arbitrary = elements [ Interval.Open , Interval.Closed ]
|
||||
|
||||
instance (Ord r, Arbitrary r) => Arbitrary (Interval r) where
|
||||
arbitrary = do
|
||||
lower <- arbitrary
|
||||
upper <- arbitrary
|
||||
pure $ (if upper < lower then flip else id)
|
||||
Interval.interval
|
||||
lower
|
||||
upper
|
||||
|
|
|
@ -11,9 +11,10 @@ module Xanthous.Random
|
|||
, subRand
|
||||
, chance
|
||||
, chooseSubset
|
||||
, chooseRange
|
||||
) where
|
||||
--------------------------------------------------------------------------------
|
||||
import Xanthous.Prelude
|
||||
import Xanthous.Prelude
|
||||
--------------------------------------------------------------------------------
|
||||
import Data.List.NonEmpty (NonEmpty(..))
|
||||
import Control.Monad.Random.Class (MonadRandom(getRandomR, getRandom))
|
||||
|
@ -25,6 +26,9 @@ import Data.Random.Distribution.Uniform
|
|||
import Data.Random.Distribution.Uniform.Exclusive
|
||||
import Data.Random.Sample
|
||||
import qualified Data.Random.Source as DRS
|
||||
import Data.Interval ( Interval, lowerBound', Extended (Finite)
|
||||
, upperBound', Boundary (Closed)
|
||||
)
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
instance {-# INCOHERENT #-} (Monad m, MonadRandom m) => DRS.MonadRandom m where
|
||||
|
@ -81,11 +85,13 @@ evenlyWeighted = Weighted . itoList
|
|||
weightedBy :: Functor t => (a -> w) -> t a -> Weighted w t a
|
||||
weightedBy weighting xs = Weighted $ (weighting &&& id) <$> xs
|
||||
|
||||
instance (Num w, Ord w, Distribution Uniform w, Excludable w) => Choose (Weighted w [] a) where
|
||||
instance (Num w, Ord w, Distribution Uniform w, Excludable w)
|
||||
=> Choose (Weighted w [] a) where
|
||||
type RandomResult (Weighted w [] a) = Maybe a
|
||||
choose (Weighted ws) = sample $ headMay <$> weightedSample 1 ws
|
||||
|
||||
instance (Num w, Ord w, Distribution Uniform w, Excludable w) => Choose (Weighted w NonEmpty a) where
|
||||
instance (Num w, Ord w, Distribution Uniform w, Excludable w)
|
||||
=> Choose (Weighted w NonEmpty a) where
|
||||
type RandomResult (Weighted w NonEmpty a) = a
|
||||
choose (Weighted ws) =
|
||||
sample
|
||||
|
@ -112,6 +118,33 @@ chooseSubset :: ( Num w, Ord w, Distribution Uniform w, Excludable w
|
|||
) => w -> t a -> m (t a)
|
||||
chooseSubset = filterA . const . chance
|
||||
|
||||
-- | Choose a random @n@ in the given interval
|
||||
chooseRange
|
||||
:: ( MonadRandom m
|
||||
, Distribution Uniform n
|
||||
, Enum n
|
||||
, Bounded n, Show n, Ord n)
|
||||
=> Interval n
|
||||
-> m (Maybe n)
|
||||
chooseRange int = traverse sample distribution
|
||||
where
|
||||
(lower, lowerBoundary) = lowerBound' int
|
||||
lowerR = case lower of
|
||||
Finite x -> if lowerBoundary == Closed
|
||||
then x
|
||||
else succ x
|
||||
_ -> minBound
|
||||
(upper, upperBoundary) = upperBound' int
|
||||
upperR = case upper of
|
||||
Finite x -> if upperBoundary == Closed
|
||||
then x
|
||||
else pred x
|
||||
_ -> maxBound
|
||||
distribution
|
||||
| lowerR <= upperR = Just $ Uniform lowerR upperR
|
||||
| otherwise = Nothing
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
bools :: NonEmpty Bool
|
||||
|
|
|
@ -5,7 +5,10 @@ import Test.Prelude
|
|||
--------------------------------------------------------------------------------
|
||||
import Control.Monad.Random
|
||||
--------------------------------------------------------------------------------
|
||||
import Xanthous.Random
|
||||
import Xanthous.Random
|
||||
import Xanthous.Orphans ()
|
||||
import qualified Data.Interval as Interval
|
||||
import Data.Interval (Interval, Extended (Finite), (<=..<=))
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
main :: IO ()
|
||||
|
@ -18,6 +21,23 @@ test = testGroup "Xanthous.Random"
|
|||
$ \(l :: [Int]) (Positive (r :: Double)) -> randomTest $ do
|
||||
ss <- chooseSubset r l
|
||||
pure $ all (`elem` l) ss
|
||||
]
|
||||
, testGroup "chooseRange"
|
||||
[ testProperty "chooses in the range"
|
||||
$ \(rng :: Interval Int) ->
|
||||
not (Interval.null rng)
|
||||
==> randomTest ( do
|
||||
chooseRange rng >>= \case
|
||||
Just r -> pure
|
||||
. counterexample (show r)
|
||||
$ r `Interval.member` rng
|
||||
Nothing -> pure $ property Discard
|
||||
)
|
||||
, testProperty "nonEmpty range is never empty"
|
||||
$ \ (lower :: Int) (NonZero diff) -> randomTest $ do
|
||||
let upper = lower + diff
|
||||
r <- chooseRange (Finite lower <=..<= Finite upper)
|
||||
pure $ isJust r
|
||||
|
||||
]
|
||||
]
|
||||
|
|
|
@ -4,7 +4,7 @@ cabal-version: 1.12
|
|||
--
|
||||
-- see: https://github.com/sol/hpack
|
||||
--
|
||||
-- hash: bba18b2b297d73ddcb0a2c365e597a183e6b612ad336e97ca06d9ce87b989656
|
||||
-- hash: 3887c4e473843f80e65cb0ae8a1def8fc4871de33e9f425a08820e9a8942e99c
|
||||
|
||||
name: xanthous
|
||||
version: 0.1.0.0
|
||||
|
@ -71,6 +71,7 @@ library
|
|||
Xanthous.Generators.Level.LevelContents
|
||||
Xanthous.Generators.Level.Util
|
||||
Xanthous.Generators.Level.Village
|
||||
Xanthous.Generators.Speech
|
||||
Xanthous.Messages
|
||||
Xanthous.Messages.Template
|
||||
Xanthous.Monad
|
||||
|
@ -136,6 +137,7 @@ library
|
|||
, containers
|
||||
, criterion
|
||||
, data-default
|
||||
, data-interval
|
||||
, deepseq
|
||||
, directory
|
||||
, fgl
|
||||
|
@ -227,6 +229,7 @@ executable xanthous
|
|||
Xanthous.Generators.Level.LevelContents
|
||||
Xanthous.Generators.Level.Util
|
||||
Xanthous.Generators.Level.Village
|
||||
Xanthous.Generators.Speech
|
||||
Xanthous.Messages
|
||||
Xanthous.Messages.Template
|
||||
Xanthous.Monad
|
||||
|
@ -291,6 +294,7 @@ executable xanthous
|
|||
, containers
|
||||
, criterion
|
||||
, data-default
|
||||
, data-interval
|
||||
, deepseq
|
||||
, directory
|
||||
, fgl
|
||||
|
@ -412,6 +416,7 @@ test-suite test
|
|||
, containers
|
||||
, criterion
|
||||
, data-default
|
||||
, data-interval
|
||||
, deepseq
|
||||
, directory
|
||||
, fgl
|
||||
|
@ -520,6 +525,7 @@ benchmark benchmark
|
|||
, containers
|
||||
, criterion
|
||||
, data-default
|
||||
, data-interval
|
||||
, deepseq
|
||||
, directory
|
||||
, fgl
|
||||
|
|
Loading…
Reference in a new issue