From eaa9c30eb9772598680c824cd209b835ec79e372 Mon Sep 17 00:00:00 2001 From: Sjur N Moshagen Date: Wed, 15 Nov 2023 11:37:09 +0200 Subject: [PATCH] Add special treatment of clitics for grammar checkers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add lemma - add POS Pcle - remove initial ∑# --- src/fst/clitics.lexc | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/fst/clitics.lexc b/src/fst/clitics.lexc index a0959cec3c..43314382dd 100644 --- a/src/fst/clitics.lexc +++ b/src/fst/clitics.lexc @@ -119,6 +119,7 @@ LEXICON K_not_ge !!≈ * `@CODE@` - mainly referenced by numerals LEXICON K_not_ge_cont K-nai ; +! Non-grammar checker section, everything is just a clitic, no lemma, just tags: +Qst+Use/-GC:∑#go ENDLEX ; +Qst+Foc/son+Use/-GC:∑#goson ENDLEX ; +Qst+Foc/s+Use/-GC:∑#gos ENDLEX ; @@ -137,6 +138,25 @@ LEXICON K_not_ge_cont +Foc/behan+Use/-GC+Use/NG:∑#be∑#han ENDLEX ; +Foc/bason+Use/-GC+Use/NG:∑#ba∑#son ENDLEX ; +Foc/beson+Use/-GC+Use/NG:∑#be∑#son ENDLEX ; +! Grammar checker section, everything will be a separate token, and needs a lemma: + go+Pcle+Qst+Use/GC:go ENDLEX ; + goson+Pcle+Qst+Foc/son+Use/GC:goson ENDLEX ; + gos+Pcle+Qst+Foc/s+Use/GC:gos ENDLEX ; + mat+Pcle+Foc/mat+Use/GC:mat ENDLEX ; + mis+Pcle+Foc/mis+Use/GC:mis ENDLEX ; + ba+Pcle+Foc/ba+Use/GC:ba ENDLEX ; + be+Pcle+Foc/be+Use/GC:be ENDLEX ; + bat+Pcle+Foc/bat+Use/GC:bat ENDLEX ; + bai+Pcle+Foc/bai+Use/GC:bai ENDLEX ; + ban+Pcle+Foc/ban+Use/GC:ban ENDLEX ; + bas+Pcle+Foc/bas+Use/GC:bas ENDLEX ; + son+Pcle+Foc/son+Use/GC:son ENDLEX ; ! makkárson + bahal+Pcle+Foc/bahal+Use/GC+Use/NG:ba∑#hal ENDLEX ; + behal+Pcle+Foc/behal+Use/GC+Use/NG:be∑#hal ENDLEX ; + bahan+Pcle+Foc/bahan+Use/GC+Use/NG:ba∑#han ENDLEX ; + behan+Pcle+Foc/behan+Use/GC+Use/NG:be∑#han ENDLEX ; + bason+Pcle+Foc/bason+Use/GC+Use/NG:ba∑#son ENDLEX ; + beson+Pcle+Foc/beson+Use/GC+Use/NG:be∑#son ENDLEX ; LEXICON K-default-neg ! This lexicon is referenced directly by some verb stem lexicons (but really affix lexicons). @@ -146,7 +166,10 @@ LEXICON K-default-neg LEXICON K-default-neg_cont K-default-only ; +! Non-grammar checker section, everything is just a clitic, no lemma, just tags: +Foc/Neg-ge+Use/-GC:∑#ge ENDLEX ; +! Grammar checker section, everything will be a separate token, and needs a lemma: + ge+Pcle+Foc/Neg-ge+Use/GC:ge ENDLEX ; LEXICON K-ge-neg ! This lexicon is referenced directly by some verb stem lexicons (but really affix lexicons). @@ -156,7 +179,10 @@ LEXICON K-ge-neg LEXICON K-ge-neg_cont K-gen-han-only ; +! Non-grammar checker section, everything is just a clitic, no lemma, just tags: +Foc/Neg-ge+Use/-GC:∑#ge ENDLEX ; ! +! Grammar checker section, everything will be a separate token, and needs a lemma: + ge+Pcle+Foc/Neg-ge+Use/GC:ge ENDLEX ; ! LEXICON K-son ! This lexicon is referenced directly by pronoun affix lexicons, and by adverb stems. @@ -166,6 +192,7 @@ LEXICON K-son < "+Use/GC":0 "@P.Pmatch.Loc@" 0:"∑" 0:"#" > K-son_cont ; !!≈ * `@CODE@` - the grammar checker case: force the clitics to always be treated as a separate token LEXICON K-son_cont +! Non-grammar checker section, everything is just a clitic, no lemma, just tags: +Foc/son+Use/-GC:∑#son ENDLEX ; +Foc/hal+Use/-GC:∑#hal ENDLEX ; ! ! XXX Is this required? +Foc/bat+Use/-GC:∑#bat ENDLEX ; @@ -180,6 +207,21 @@ LEXICON K-son_cont +Foc/behan+Use/-GC+Use/NG:∑#be∑#han ENDLEX ; +Foc/bason+Use/-GC+Use/NG:∑#ba∑#son ENDLEX ; +Foc/beson+Use/-GC+Use/NG:∑#be∑#son ENDLEX ; +! Grammar checker section, everything will be a separate token, and needs a lemma: + son+Pcle+Foc/son+Use/GC:son ENDLEX ; + hal+Pcle+Foc/hal+Use/GC:hal ENDLEX ; ! ! XXX Is this required? + bat+Pcle+Foc/bat+Use/GC:bat ENDLEX ; + bai+Pcle+Foc/bai+Use/GC:bai ENDLEX ; + ban+Pcle+Foc/ban+Use/GC:ban ENDLEX ; + bas+Pcle+Foc/bas+Use/GC:bas ENDLEX ; + ba+Pcle+Foc/ba+Use/GC:ba ENDLEX ; + be+Pcle+Foc/be+Use/GC:be ENDLEX ; + bahal+Pcle+Foc/bahal+Use/GC+Use/NG:ba∑#hal ENDLEX ; + behal+Pcle+Foc/behal+Use/GC+Use/NG:be∑#hal ENDLEX ; + bahan+Pcle+Foc/bahan+Use/GC+Use/NG:ba∑#han ENDLEX ; + behan+Pcle+Foc/behan+Use/GC+Use/NG:be∑#han ENDLEX ; + bason+Pcle+Foc/bason+Use/GC+Use/NG:ba∑#son ENDLEX ; + beson+Pcle+Foc/beson+Use/GC+Use/NG:be∑#son ENDLEX ; ! This is a Continuation Class for Interrogative Pronouns and Adverbs like ! gii(son), gosa(son) etc.. These have now been added to the lexicon(s).