Skip to content

Commit

Permalink
Add special treatment of clitics for grammar checkers
Browse files Browse the repository at this point in the history
- add lemma
- add POS Pcle
- remove initial ∑#
  • Loading branch information
snomos committed Nov 15, 2023
1 parent 404255b commit eaa9c30
Showing 1 changed file with 42 additions and 0 deletions.
42 changes: 42 additions & 0 deletions src/fst/clitics.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ LEXICON K_not_ge !!≈ * `@CODE@` - mainly referenced by numerals

LEXICON K_not_ge_cont
K-nai ;
! Non-grammar checker section, everything is just a clitic, no lemma, just tags:
+Qst+Use/-GC:∑#go ENDLEX ;
+Qst+Foc/son+Use/-GC:∑#goson ENDLEX ;
+Qst+Foc/s+Use/-GC:∑#gos ENDLEX ;
Expand All @@ -137,6 +138,25 @@ LEXICON K_not_ge_cont
+Foc/behan+Use/-GC+Use/NG:∑#be∑#han ENDLEX ;
+Foc/bason+Use/-GC+Use/NG:∑#ba∑#son ENDLEX ;
+Foc/beson+Use/-GC+Use/NG:∑#be∑#son ENDLEX ;
! Grammar checker section, everything will be a separate token, and needs a lemma:
go+Pcle+Qst+Use/GC:go ENDLEX ;
goson+Pcle+Qst+Foc/son+Use/GC:goson ENDLEX ;
gos+Pcle+Qst+Foc/s+Use/GC:gos ENDLEX ;
mat+Pcle+Foc/mat+Use/GC:mat ENDLEX ;
mis+Pcle+Foc/mis+Use/GC:mis ENDLEX ;
ba+Pcle+Foc/ba+Use/GC:ba ENDLEX ;
be+Pcle+Foc/be+Use/GC:be ENDLEX ;
bat+Pcle+Foc/bat+Use/GC:bat ENDLEX ;
bai+Pcle+Foc/bai+Use/GC:bai ENDLEX ;
ban+Pcle+Foc/ban+Use/GC:ban ENDLEX ;
bas+Pcle+Foc/bas+Use/GC:bas ENDLEX ;
son+Pcle+Foc/son+Use/GC:son ENDLEX ; ! makkárson
bahal+Pcle+Foc/bahal+Use/GC+Use/NG:ba∑#hal ENDLEX ;
behal+Pcle+Foc/behal+Use/GC+Use/NG:be∑#hal ENDLEX ;
bahan+Pcle+Foc/bahan+Use/GC+Use/NG:ba∑#han ENDLEX ;
behan+Pcle+Foc/behan+Use/GC+Use/NG:be∑#han ENDLEX ;
bason+Pcle+Foc/bason+Use/GC+Use/NG:ba∑#son ENDLEX ;
beson+Pcle+Foc/beson+Use/GC+Use/NG:be∑#son ENDLEX ;

LEXICON K-default-neg
! This lexicon is referenced directly by some verb stem lexicons (but really affix lexicons).
Expand All @@ -146,7 +166,10 @@ LEXICON K-default-neg

LEXICON K-default-neg_cont
K-default-only ;
! Non-grammar checker section, everything is just a clitic, no lemma, just tags:
+Foc/Neg-ge+Use/-GC:∑#ge ENDLEX ;
! Grammar checker section, everything will be a separate token, and needs a lemma:
ge+Pcle+Foc/Neg-ge+Use/GC:ge ENDLEX ;

LEXICON K-ge-neg
! This lexicon is referenced directly by some verb stem lexicons (but really affix lexicons).
Expand All @@ -156,7 +179,10 @@ LEXICON K-ge-neg

LEXICON K-ge-neg_cont
K-gen-han-only ;
! Non-grammar checker section, everything is just a clitic, no lemma, just tags:
+Foc/Neg-ge+Use/-GC:∑#ge ENDLEX ; !
! Grammar checker section, everything will be a separate token, and needs a lemma:
ge+Pcle+Foc/Neg-ge+Use/GC:ge ENDLEX ; !

LEXICON K-son
! This lexicon is referenced directly by pronoun affix lexicons, and by adverb stems.
Expand All @@ -166,6 +192,7 @@ LEXICON K-son
< "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K-son_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token

LEXICON K-son_cont
! Non-grammar checker section, everything is just a clitic, no lemma, just tags:
+Foc/son+Use/-GC:∑#son ENDLEX ;
+Foc/hal+Use/-GC:∑#hal ENDLEX ; ! ! XXX Is this required?
+Foc/bat+Use/-GC:∑#bat ENDLEX ;
Expand All @@ -180,6 +207,21 @@ LEXICON K-son_cont
+Foc/behan+Use/-GC+Use/NG:∑#be∑#han ENDLEX ;
+Foc/bason+Use/-GC+Use/NG:∑#ba∑#son ENDLEX ;
+Foc/beson+Use/-GC+Use/NG:∑#be∑#son ENDLEX ;
! Grammar checker section, everything will be a separate token, and needs a lemma:
son+Pcle+Foc/son+Use/GC:son ENDLEX ;
hal+Pcle+Foc/hal+Use/GC:hal ENDLEX ; ! ! XXX Is this required?
bat+Pcle+Foc/bat+Use/GC:bat ENDLEX ;
bai+Pcle+Foc/bai+Use/GC:bai ENDLEX ;
ban+Pcle+Foc/ban+Use/GC:ban ENDLEX ;
bas+Pcle+Foc/bas+Use/GC:bas ENDLEX ;
ba+Pcle+Foc/ba+Use/GC:ba ENDLEX ;
be+Pcle+Foc/be+Use/GC:be ENDLEX ;
bahal+Pcle+Foc/bahal+Use/GC+Use/NG:ba∑#hal ENDLEX ;
behal+Pcle+Foc/behal+Use/GC+Use/NG:be∑#hal ENDLEX ;
bahan+Pcle+Foc/bahan+Use/GC+Use/NG:ba∑#han ENDLEX ;
behan+Pcle+Foc/behan+Use/GC+Use/NG:be∑#han ENDLEX ;
bason+Pcle+Foc/bason+Use/GC+Use/NG:ba∑#son ENDLEX ;
beson+Pcle+Foc/beson+Use/GC+Use/NG:be∑#son ENDLEX ;

! This is a Continuation Class for Interrogative Pronouns and Adverbs like
! gii(son), gosa(son) etc.. These have now been added to the lexicon(s).
Expand Down

0 comments on commit eaa9c30

Please sign in to comment.