diff --git a/src/fst/clitics.lexc b/src/fst/clitics.lexc index 8d99884b79..fd79b3db63 100644 --- a/src/fst/clitics.lexc +++ b/src/fst/clitics.lexc @@ -112,54 +112,75 @@ LEXICON K-ge-only-cont !! The following lexicons are not referenced by the `K` lexicon, but directly in specific cases. LEXICON K_not_ge !!≈ * `@CODE@` - mainly referenced by numerals +! This lexicon is referenced directly by numeral stems and noun affix lexicons. ENDLEX ; + +Use/-GC: K_not_ge_cont ; !!≈ * `@CODE@` - regular clitic analysis, everywhere but in the grammar checker + < "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K_not_ge_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token + +LEXICON K_not_ge_cont K-nai ; - +Qst+Use/Circ:∑#go ENDLEX ; !I removed the Circ symbol from these two, as they are common. - +Qst+Foc/son+Use/Circ:∑#goson ENDLEX ; - +Qst+Foc/s+Use/Circ:∑#gos ENDLEX ; - +Foc/mat+Use/Circ:∑#mat ENDLEX ; - +Foc/mis+Use/Circ:∑#mis ENDLEX ; - +Foc/ba+Use/Circ:∑#ba ENDLEX ; - +Foc/be+Use/Circ:∑#be ENDLEX ; - +Foc/bat+Use/Circ:∑#bat ENDLEX ; - +Foc/bai+Use/Circ:∑#bai ENDLEX ; - +Foc/ban+Use/Circ:∑#ban ENDLEX ; - +Foc/bas+Use/Circ:∑#bas ENDLEX ; - +Foc/son+Use/Circ:∑#son ENDLEX ; ! makkárson - +Foc/bahal+Use/Circ+Use/NG:∑#ba∑#hal ENDLEX ; - +Foc/behal+Use/Circ+Use/NG:∑#be∑#hal ENDLEX ; - +Foc/bahan+Use/Circ+Use/NG:∑#ba∑#han ENDLEX ; - +Foc/behan+Use/Circ+Use/NG:∑#be∑#han ENDLEX ; - +Foc/bason+Use/Circ+Use/NG:∑#ba∑#son ENDLEX ; - +Foc/beson+Use/Circ+Use/NG:∑#be∑#son ENDLEX ; + +Qst+Use/-GC:∑#go ENDLEX ; !I removed the Circ symbol from these two, as they are common. + +Qst+Foc/son+Use/-GC:∑#goson ENDLEX ; + +Qst+Foc/s+Use/-GC:∑#gos ENDLEX ; + +Foc/mat+Use/-GC:∑#mat ENDLEX ; + +Foc/mis+Use/-GC:∑#mis ENDLEX ; + +Foc/ba+Use/-GC:∑#ba ENDLEX ; + +Foc/be+Use/-GC:∑#be ENDLEX ; + +Foc/bat+Use/-GC:∑#bat ENDLEX ; + +Foc/bai+Use/-GC:∑#bai ENDLEX ; + +Foc/ban+Use/-GC:∑#ban ENDLEX ; + +Foc/bas+Use/-GC:∑#bas ENDLEX ; + +Foc/son+Use/-GC:∑#son ENDLEX ; ! makkárson + +Foc/bahal+Use/-GC+Use/NG:∑#ba∑#hal ENDLEX ; + +Foc/behal+Use/-GC+Use/NG:∑#be∑#hal ENDLEX ; + +Foc/bahan+Use/-GC+Use/NG:∑#ba∑#han ENDLEX ; + +Foc/behan+Use/-GC+Use/NG:∑#be∑#han ENDLEX ; + +Foc/bason+Use/-GC+Use/NG:∑#ba∑#son ENDLEX ; + +Foc/beson+Use/-GC+Use/NG:∑#be∑#son ENDLEX ; LEXICON K-default-neg +! This lexicon is referenced directly by some verb stem lexicons (but really affix lexicons). ENDLEX ; + +Use/-GC: K-default-neg_cont ; !!≈ * `@CODE@` - regular clitic analysis, everywhere but in the grammar checker + < "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K-default-neg_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token + +LEXICON K-default-neg_cont K-default-only ; - +Foc/Neg-ge+Use/Circ:∑#ge ENDLEX ; + +Foc/Neg-ge+Use/-GC:∑#ge ENDLEX ; LEXICON K-ge-neg +! This lexicon is referenced directly by some verb stem lexicons (but really affix lexicons). ENDLEX ; + +Use/-GC: K-ge-neg_cont ; !!≈ * `@CODE@` - regular clitic analysis, everywhere but in the grammar checker + < "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K-ge-neg_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token + +LEXICON K-ge-neg_cont K-gen-han-only ; - +Foc/Neg-ge+Use/Circ:∑#ge ENDLEX ; ! + +Foc/Neg-ge+Use/-GC:∑#ge ENDLEX ; ! LEXICON K-son -! K ; - +Foc/son+Use/Circ:∑#son ENDLEX ; - +Foc/hal+Use/Circ:∑#hal ENDLEX ; ! ! XXX Is this required? - +Foc/bat+Use/Circ:∑#bat ENDLEX ; - +Foc/bai+Use/Circ:∑#bai ENDLEX ; - +Foc/ban+Use/Circ:∑#ban ENDLEX ; - +Foc/bas+Use/Circ:∑#bas ENDLEX ; - +Foc/ba+Use/Circ:∑#ba ENDLEX ; - +Foc/be+Use/Circ:∑#be ENDLEX ; - +Foc/bahal+Use/Circ+Use/NG:∑#ba∑#hal ENDLEX ; - +Foc/behal+Use/Circ+Use/NG:∑#be∑#hal ENDLEX ; - +Foc/bahan+Use/Circ+Use/NG:∑#ba∑#han ENDLEX ; - +Foc/behan+Use/Circ+Use/NG:∑#be∑#han ENDLEX ; - +Foc/bason+Use/Circ+Use/NG:∑#ba∑#son ENDLEX ; - +Foc/beson+Use/Circ+Use/NG:∑#be∑#son ENDLEX ; - ENDLEX ; +! This lexicon is referenced directly by pronoun affix lexicons, and by adverb stems. + + ENDLEX ; + +Use/-GC: K-son_cont ; !!≈ * `@CODE@` - regular clitic analysis, everywhere but in the grammar checker + < "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K-son_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token + +LEXICON K-son_cont + +Foc/son+Use/-GC:∑#son ENDLEX ; + +Foc/hal+Use/-GC:∑#hal ENDLEX ; ! ! XXX Is this required? + +Foc/bat+Use/-GC:∑#bat ENDLEX ; + +Foc/bai+Use/-GC:∑#bai ENDLEX ; + +Foc/ban+Use/-GC:∑#ban ENDLEX ; + +Foc/bas+Use/-GC:∑#bas ENDLEX ; + +Foc/ba+Use/-GC:∑#ba ENDLEX ; + +Foc/be+Use/-GC:∑#be ENDLEX ; + +Foc/bahal+Use/-GC+Use/NG:∑#ba∑#hal ENDLEX ; + +Foc/behal+Use/-GC+Use/NG:∑#be∑#hal ENDLEX ; + +Foc/bahan+Use/-GC+Use/NG:∑#ba∑#han ENDLEX ; + +Foc/behan+Use/-GC+Use/NG:∑#be∑#han ENDLEX ; + +Foc/bason+Use/-GC+Use/NG:∑#ba∑#son ENDLEX ; + +Foc/beson+Use/-GC+Use/NG:∑#be∑#son ENDLEX ; + ! This is a Continuation Class for Interrogative Pronouns and Adverbs like ! gii(son), gosa(son) etc.. These have now been added to the lexicon(s). ! I tentatively direct it to K, so that they have ∑#son plus the K clitics.