Skip to content

Commit

Permalink
Prepare for proper grammar checker tokenisation
Browse files Browse the repository at this point in the history
  • Loading branch information
snomos committed Nov 15, 2023
1 parent 9f01cc3 commit a7c6fc9
Showing 1 changed file with 57 additions and 36 deletions.
93 changes: 57 additions & 36 deletions src/fst/clitics.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -112,54 +112,75 @@ LEXICON K-ge-only-cont
!! The following lexicons are not referenced by the `K` lexicon, but directly in specific cases.

LEXICON K_not_ge !!≈ * `@CODE@` - mainly referenced by numerals
! This lexicon is referenced directly by numeral stems and noun affix lexicons.
ENDLEX ;
+Use/-GC: K_not_ge_cont ; !!≈ * `@CODE@` - regular clitic analysis, everywhere but in the grammar checker
< "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K_not_ge_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token

LEXICON K_not_ge_cont
K-nai ;
+Qst+Use/Circ:∑#go ENDLEX ; !I removed the Circ symbol from these two, as they are common.
+Qst+Foc/son+Use/Circ:∑#goson ENDLEX ;
+Qst+Foc/s+Use/Circ:∑#gos ENDLEX ;
+Foc/mat+Use/Circ:∑#mat ENDLEX ;
+Foc/mis+Use/Circ:∑#mis ENDLEX ;
+Foc/ba+Use/Circ:∑#ba ENDLEX ;
+Foc/be+Use/Circ:∑#be ENDLEX ;
+Foc/bat+Use/Circ:∑#bat ENDLEX ;
+Foc/bai+Use/Circ:∑#bai ENDLEX ;
+Foc/ban+Use/Circ:∑#ban ENDLEX ;
+Foc/bas+Use/Circ:∑#bas ENDLEX ;
+Foc/son+Use/Circ:∑#son ENDLEX ; ! makkárson
+Foc/bahal+Use/Circ+Use/NG:∑#ba∑#hal ENDLEX ;
+Foc/behal+Use/Circ+Use/NG:∑#be∑#hal ENDLEX ;
+Foc/bahan+Use/Circ+Use/NG:∑#ba∑#han ENDLEX ;
+Foc/behan+Use/Circ+Use/NG:∑#be∑#han ENDLEX ;
+Foc/bason+Use/Circ+Use/NG:∑#ba∑#son ENDLEX ;
+Foc/beson+Use/Circ+Use/NG:∑#be∑#son ENDLEX ;
+Qst+Use/-GC:∑#go ENDLEX ; !I removed the Circ symbol from these two, as they are common.
+Qst+Foc/son+Use/-GC:∑#goson ENDLEX ;
+Qst+Foc/s+Use/-GC:∑#gos ENDLEX ;
+Foc/mat+Use/-GC:∑#mat ENDLEX ;
+Foc/mis+Use/-GC:∑#mis ENDLEX ;
+Foc/ba+Use/-GC:∑#ba ENDLEX ;
+Foc/be+Use/-GC:∑#be ENDLEX ;
+Foc/bat+Use/-GC:∑#bat ENDLEX ;
+Foc/bai+Use/-GC:∑#bai ENDLEX ;
+Foc/ban+Use/-GC:∑#ban ENDLEX ;
+Foc/bas+Use/-GC:∑#bas ENDLEX ;
+Foc/son+Use/-GC:∑#son ENDLEX ; ! makkárson
+Foc/bahal+Use/-GC+Use/NG:∑#ba∑#hal ENDLEX ;
+Foc/behal+Use/-GC+Use/NG:∑#be∑#hal ENDLEX ;
+Foc/bahan+Use/-GC+Use/NG:∑#ba∑#han ENDLEX ;
+Foc/behan+Use/-GC+Use/NG:∑#be∑#han ENDLEX ;
+Foc/bason+Use/-GC+Use/NG:∑#ba∑#son ENDLEX ;
+Foc/beson+Use/-GC+Use/NG:∑#be∑#son ENDLEX ;

LEXICON K-default-neg
! This lexicon is referenced directly by some verb stem lexicons (but really affix lexicons).
ENDLEX ;
+Use/-GC: K-default-neg_cont ; !!≈ * `@CODE@` - regular clitic analysis, everywhere but in the grammar checker
< "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K-default-neg_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token

LEXICON K-default-neg_cont
K-default-only ;
+Foc/Neg-ge+Use/Circ:∑#ge ENDLEX ;
+Foc/Neg-ge+Use/-GC:∑#ge ENDLEX ;

LEXICON K-ge-neg
! This lexicon is referenced directly by some verb stem lexicons (but really affix lexicons).
ENDLEX ;
+Use/-GC: K-ge-neg_cont ; !!≈ * `@CODE@` - regular clitic analysis, everywhere but in the grammar checker
< "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K-ge-neg_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token

LEXICON K-ge-neg_cont
K-gen-han-only ;
+Foc/Neg-ge+Use/Circ:∑#ge ENDLEX ; !
+Foc/Neg-ge+Use/-GC:∑#ge ENDLEX ; !

LEXICON K-son
! K ;
+Foc/son+Use/Circ:∑#son ENDLEX ;
+Foc/hal+Use/Circ:∑#hal ENDLEX ; ! ! XXX Is this required?
+Foc/bat+Use/Circ:∑#bat ENDLEX ;
+Foc/bai+Use/Circ:∑#bai ENDLEX ;
+Foc/ban+Use/Circ:∑#ban ENDLEX ;
+Foc/bas+Use/Circ:∑#bas ENDLEX ;
+Foc/ba+Use/Circ:∑#ba ENDLEX ;
+Foc/be+Use/Circ:∑#be ENDLEX ;
+Foc/bahal+Use/Circ+Use/NG:∑#ba∑#hal ENDLEX ;
+Foc/behal+Use/Circ+Use/NG:∑#be∑#hal ENDLEX ;
+Foc/bahan+Use/Circ+Use/NG:∑#ba∑#han ENDLEX ;
+Foc/behan+Use/Circ+Use/NG:∑#be∑#han ENDLEX ;
+Foc/bason+Use/Circ+Use/NG:∑#ba∑#son ENDLEX ;
+Foc/beson+Use/Circ+Use/NG:∑#be∑#son ENDLEX ;
ENDLEX ;
! This lexicon is referenced directly by pronoun affix lexicons, and by adverb stems.

ENDLEX ;
+Use/-GC: K-son_cont ; !!≈ * `@CODE@` - regular clitic analysis, everywhere but in the grammar checker
< "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K-son_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token

LEXICON K-son_cont
+Foc/son+Use/-GC:∑#son ENDLEX ;
+Foc/hal+Use/-GC:∑#hal ENDLEX ; ! ! XXX Is this required?
+Foc/bat+Use/-GC:∑#bat ENDLEX ;
+Foc/bai+Use/-GC:∑#bai ENDLEX ;
+Foc/ban+Use/-GC:∑#ban ENDLEX ;
+Foc/bas+Use/-GC:∑#bas ENDLEX ;
+Foc/ba+Use/-GC:∑#ba ENDLEX ;
+Foc/be+Use/-GC:∑#be ENDLEX ;
+Foc/bahal+Use/-GC+Use/NG:∑#ba∑#hal ENDLEX ;
+Foc/behal+Use/-GC+Use/NG:∑#be∑#hal ENDLEX ;
+Foc/bahan+Use/-GC+Use/NG:∑#ba∑#han ENDLEX ;
+Foc/behan+Use/-GC+Use/NG:∑#be∑#han ENDLEX ;
+Foc/bason+Use/-GC+Use/NG:∑#ba∑#son ENDLEX ;
+Foc/beson+Use/-GC+Use/NG:∑#be∑#son ENDLEX ;

! This is a Continuation Class for Interrogative Pronouns and Adverbs like
! gii(son), gosa(son) etc.. These have now been added to the lexicon(s).
! I tentatively direct it to K, so that they have ∑#son plus the K clitics.
Expand Down

0 comments on commit a7c6fc9

Please sign in to comment.