Skip to content

Commit

Permalink
fix CHAT punctuations added in mor tier
Browse files Browse the repository at this point in the history
  • Loading branch information
Mentha7 committed Oct 6, 2021
1 parent 5da56ba commit 9a34fc1
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions chatconllu/conlluparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@
'final_sents',
'final_comments'
]
CHAT_PUNCT = [
'„',
'‡',
',',
'“',
'”',
'‘',
'’',
]

def construct_mwe(sentence, tier):
# -------- construct tier for multi-word tokens--------
Expand Down Expand Up @@ -101,7 +110,7 @@ def construct_tiers(sentence, has_mor, has_gra, generate_mor=False, generate_gra
for v in word.misc['components']:
tmp = '+' + v.replace('@', '|').replace('^', '+') # reverse to MOR coding
m = '|'.join([word.xpos, tmp])
elif word.lemma and word.xpos and word.xpos != 'punct':
elif word.lemma and word.xpos and word.xpos != 'punct' and not word.form in CHAT_PUNCT:
m = '|'.join([word.xpos, word.lemma])
if 'feats' in word.misc.keys():
for f in word.misc['feats']:
Expand All @@ -110,7 +119,7 @@ def construct_tiers(sentence, has_mor, has_gra, generate_mor=False, generate_gra
if 'translation' in word.misc.keys():
for t in word.misc['translation']:
m += '=' + t
if word.lemma and re.match(PUNCT, word.lemma) and len(word.lemma)==1: # punctuation's mor is form
if word.lemma and re.match(PUNCT, word.lemma) and len(word.lemma)==1 and not word.form in CHAT_PUNCT: # punctuation's mor is form
m = word.lemma
if 'form' in word.misc.keys():
# print("has key1")
Expand Down Expand Up @@ -149,7 +158,7 @@ def construct_tiers(sentence, has_mor, has_gra, generate_mor=False, generate_gra
for v in word.misc['components']:
tmp = '+' + v.replace('@', '|').replace('^', '+')
m = '|'.join([word.xpos, tmp])
elif word.lemma and word.xpos != 'punct':
elif word.lemma and word.xpos != 'punct' and not word.form in CHAT_PUNCT:
m = '|'.join([word.xpos, word.lemma])
if 'feats' in word.misc.keys():
for f in word.misc['feats']:
Expand All @@ -158,7 +167,7 @@ def construct_tiers(sentence, has_mor, has_gra, generate_mor=False, generate_gra
if 'translation' in word.misc.keys():
for t in word.misc['translation']:
m += '=' + t
if word.lemma and re.match(PUNCT, word.lemma) and len(word.lemma)==1: # punctuations mor is form
if word.lemma and re.match(PUNCT, word.lemma) and len(word.lemma)==1 and not word.form in CHAT_PUNCT: # punctuations mor is form
m = word.lemma
if 'form' in word.misc.keys():
# print("has key2")
Expand Down

0 comments on commit 9a34fc1

Please sign in to comment.