1
- {-| Core reply generation facilities
1
+ {- | Core reply generation facilities
2
2
3
- These functions provide various ways to generate, filter, and
4
- restrict the final set of replies created in response to a phrase
5
- for a supplied 'Megahaskhal.Internal.Brain'.
3
+ These functions provide various ways to generate, filter, and restrict
4
+ the final set of replies created in response to a phrase for a supplied
5
+ 'Megahaskhal.Internal.Brain'.
6
+
7
+ Example usage to generate a response provided a brain:
8
+
9
+ > import qualified Data.Text as T
10
+ > import Megahaskhal (Brain)
11
+ > import Megahaskhal.Reply (generateReply, getWords)
12
+ >
13
+ > printReply :: Brain -> IO T.Text
14
+ > printReply brain = do
15
+ > phrase <- getLine
16
+ > generateReply brain (getWords phrase)
6
17
7
18
-}
8
19
@@ -16,9 +27,8 @@ module Megahaskhal.Reply (
16
27
-- * Pipe Components
17
28
-- $pipes
18
29
replyProducer
19
- , dropShortReply
20
- , dropLongReply
21
- , generateReplies
30
+ , collectTopReplies
31
+ , generateReply
22
32
23
33
-- * Replies
24
34
-- $replies
@@ -91,44 +101,34 @@ replyProducer brain phrase = do
91
101
lift $ setStdGen newGen
92
102
replyProducer brain phrase
93
103
94
- -- | Drop replies that are shorter than the length specified
95
- dropShortReply :: Int -> Pipe ScoredReply ScoredReply IO ()
96
- dropShortReply n = do
97
- repl <- await
98
- let repLen = T. length $ sReply repl
99
- if repLen > n
100
- then yield repl >> dropShortReply n
101
- else dropShortReply n
102
-
103
- -- | Drop replies that are longer than the length specified
104
- dropLongReply :: Int -> Pipe ScoredReply ScoredReply IO ()
105
- dropLongReply n = do
104
+ dropOutsideBounds :: Int -> Int -> Pipe ScoredReply ScoredReply IO ()
105
+ dropOutsideBounds l u = do
106
106
repl <- await
107
107
let repLen = T. length $ sReply repl
108
- if repLen < n
109
- then yield repl >> dropLongReply n
110
- else dropLongReply n
108
+ if l <= repLen && repLen <= u
109
+ then yield repl >> dropOutsideBounds l u
110
+ else dropOutsideBounds l u
111
111
112
112
-- | Collect top replies
113
113
collectTopReplies :: TopReplies -> Parser ScoredReply IO TopReplies
114
114
collectTopReplies tr = do
115
- reply <- draw
116
- case reply of
115
+ repl <- draw
116
+ case repl of
117
117
Nothing -> return tr
118
118
Just r -> collectTopReplies (addReply r tr)
119
119
120
120
-- | Generate a suitable amount of replies and return the highest scored
121
- generateReplies :: Brain -> [Text ] -> IO Text
122
- generateReplies brain phrase = do
123
- times <- getStdRandom (randomR (500 ,1500 ))
121
+ generateReply :: Brain -> [Text ] -> IO Text
122
+ generateReply brain phrase = do
123
+ times <- getStdRandom (randomR (800 ,1500 ))
124
124
replies <- evalStateT parser $
125
- producer >-> dropShortReply 25
126
- >-> dropLongReply 5000
125
+ producer >-> dropOutsideBounds 25 5000
127
126
>-> PL. take times
128
127
let cc = length $ allReplies replies
129
128
index <- getStdRandom (randomR (0 , max 0 (cc- 1 )))
130
129
let repl = allReplies replies !! index
131
130
return $ capitalizeSentence . sReply $ repl
131
+ -- return $ repl { sReply=(capitalizeSentence . sReply) repl }
132
132
where producer = replyProducer brain phrase
133
133
parser = collectTopReplies $ empty 20
134
134
@@ -251,16 +251,18 @@ processWords ctx dict order keywords replies usedKey symbol = do
251
251
replyWords = symbol: replies
252
252
newCtx = updateContext ctx order symbol
253
253
254
- -- Evaluate the 'surprise' factor of a given choice of reply words
255
- -- The surprise factor is based entirely around whether the chosen reply
256
- -- includes words used in the keywords that were supplied.
257
- -- For every word in the reply, the context is built up and iterated over
258
- -- regardless of if the word is a keyword. When a keyword is hit, a subloop
259
- -- runs for that portion over the entire context at that state determining if
260
- -- any of the tree's contain the symbol and updating the probability, at the
261
- -- end of which the entropy is updated.
254
+ {-| Evaluate the 'surprise' factor of a given choice of reply words
255
+
256
+ The surprise factor is based entirely around whether the chosen reply
257
+ includes words used in the keywords that were supplied. For every word
258
+ in the reply, the context is built up and iterated over regardless of
259
+ if the word is a keyword. When a keyword is hit, a subloop runs for
260
+ that portion over the entire context at that state determining if any
261
+ of the tree's contain the symbol and updating the probability, at the
262
+ end of which the entropy is updated.
263
+
264
+ -}
262
265
evaluateReply :: Brain -> Keywords -> Replies -> Float
263
- evaluateReply _ [] _ = 0
264
266
evaluateReply (Brain fTree bTree _ order _) keys repl
265
267
| num < 8 = entropy
266
268
| num < 16 = entropy / sqrt (num- 1 )
@@ -274,16 +276,16 @@ evaluateReply (Brain fTree bTree _ order _) keys repl
274
276
num = eNum bctx
275
277
entropy = eEntropy bctx
276
278
277
- - - entropy and num accumulator that retains updated context as each tree
278
- -- in the context is stepped through
279
+ { - entropy and num accumulator that retains updated context as each tree
280
+ in the context is stepped through -}
279
281
evalulateSymbols :: Order
280
282
-> Keywords
281
283
-> EContext -- ^ Accumulator
282
284
-> Int -- ^ Current symbol
283
285
-> EContext -- ^ Accumulated value
284
286
evalulateSymbols order keys acc@ (EContext accNum accEntropy ctx) symbol
285
- | symbol `elem` keys ||
286
- null keys = EContext (accNum + 1 ) newEntropy newctx
287
+ | symbol `elem` keys
288
+ || null keys = EContext (accNum + 1 ) newEntropy newctx
287
289
| otherwise = acc { eContext = newctx }
288
290
where
289
291
-- we always update the context with the symbol on each step, even
@@ -314,13 +316,17 @@ evaluateContext symbol (!count, !prob) tree =
314
316
{-| Transform a single @Text@ phrase into its component parts suitable to
315
317
be fed into a reply generating function.
316
318
317
- Rules for tokenization:
318
- Four character classes: alpha, digit, apostrophe, and other
319
- If the character class changed from the previous to current character, then
320
- it is a boundary. The only special case is alpha -> apostrophe -> alpha,
321
- which is not considered to be a boundary (it's considered to be alpha).
322
- If the last word is alphanumeric then add a last word of ".", otherwise
323
- replace the last word with "." unless it already ends with one of "!.?".
319
+ Rules for tokenization:
320
+ Four character classes: alpha, digit, apostrophe, and other
321
+
322
+ If the character class changed from the previous to current character,
323
+ then it is a boundary. The only special case is alpha -> apostrophe ->
324
+ alpha, which is not considered to be a boundary (it's considered to be
325
+ alpha).
326
+
327
+ If the last word is alphanumeric then add a last word of ".", otherwise
328
+ replace the last word with "." unless it already ends with one of
329
+ "!.?".
324
330
325
331
-}
326
332
getWords :: Text -> [Text ]
0 commit comments