-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregtest.json5
116 lines (116 loc) · 3.43 KB
/
regtest.json5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
steps: {
fst: {
cmd: '~/langtech/kal/tools/shellscripts/kal-tokenise ~/langtech/kal/tools/tokenisers/tokeniser-disamb-gt-desc.pmhfst | cg-sort',
},
pre1: {
cmd: 'vislcg3 -g ~/langtech/kal/src/cg3/kal-pre1.cg3',
type: 'cg',
},
hybrids: {
cmd: '~/langtech/kal/tools/shellscripts/kal-hybrid-split ~/langtech/kal/src/fst/generator-gt-desc.hfstol | cg-sort',
},
heur: {
cmd: '~/langtech/kal/tools/shellscripts/kal-spell-heur | cg-sort',
},
pre: {
cmd: '~/langtech/kal/tools/shellscripts/kal-tokenise ~/langtech/kal/tools/tokenisers/tokeniser-disamb-gt-desc.pmhfst | vislcg3 -g ~/langtech/kal/src/cg3/kal-pre1.cg3 | ~/langtech/kal/tools/shellscripts/kal-hybrid-split ~/langtech/kal/src/fst/generator-gt-desc.hfstol | cg-sort',
},
'sems-l': {
cmd: '~/langtech/katersat/apply-sems.py --last',
type: 'cg',
},
'sems-f': {
cmd: '~/langtech/katersat/apply-sems.py',
type: 'cg',
},
pre2: {
cmd: 'vislcg3 -g ~/langtech/kal/src/cg3/kal-pre2.cg3',
type: 'cg',
},
morf: {
cmd: 'vislcg3 -g ~/langtech/kal/src/cg3/disambiguator.cg3',
type: 'cg',
},
'morf-c': {
cmd: '~/langtech/kal/tools/shellscripts/kal-tokenise ~/langtech/kal/tools/tokenisers/tokeniser-disamb-gt-desc.pmhfst | vislcg3 -g ~/langtech/kal/src/cg3/kal-pre1.cg3 | ~/langtech/kal/tools/shellscripts/kal-hybrid-split ~/langtech/kal/src/fst/generator-gt-desc.hfstol | vislcg3 -g ~/langtech/kal/src/cg3/kal-pre2.cg3 | vislcg3 -g ~/langtech/kal/src/cg3/disambiguator.cg3 | cg-sort',
},
lu: {
cmd: '~/langtech/kal/tools/shellscripts/kal-lu-prefix ~/langtech/kal/src/fst/generator-gt-desc.hfstol | cg-sort',
},
syntax: {
cmd: 'vislcg3 -g ~/langtech/kal/src/cg3/functions.cg3',
type: 'cg',
},
dep: {
cmd: 'vislcg3 -g ~/langtech/kal/src/cg3/dependency.cg3',
type: 'cg',
},
no2nd: {
cmd: "perl -wpne 's~ [^/\\s]+/[^/\\s]+~~g; s~ i[A-Z\\d]\\w*~~g; s~\\x{E020}~\\x{20}~g; while(s~( DIRTALE[A-Z]+)\\1~$1~g){}' | cg-sort",
},
'no2nd-s': {
cmd: "perl -wpne 's~ (?!i?Sem)[^/\\s]+/[^/\\s]+~~g; s~ i(?!Sem)[A-Z\\d]\\w*~~g; s~\\x{E020}~\\x{20}~g; while(s~( DIRTALE[A-Z]+)\\1~$1~g){}' | cg-sort",
},
},
pipes: {
kal: ['fst', 'pre1', 'hybrids', 'pre2', 'morf', 'syntax', 'dep', 'no2nd'],
fst: ['fst'],
'kal-sems-l': ['pre', 'sems-l', 'pre2', 'morf', 'syntax', 'dep', 'no2nd-s'],
'kal-lu': ['morf-c', 'lu', 'syntax', 'dep', 'no2nd'],
heur: ['fst', 'pre1', 'heur'],
'gloss-k2d': '~/langtech/gloss/kal2dan/kal2dan.pl --regtest --cmd',
'gloss-k2e': '~/langtech/gloss/kal2eng/kal2eng.pl --regtest --cmd',
},
corpora: {
all: ['*'],
gloss: ['KW', 'KWx'],
heur: ['heur/*'],
fst: ['fst/*'],
},
defaults: {
pipe: 'kal',
test: 'kal',
corpora: ['all'],
gold: false,
git: true,
env: [],
},
tests: {
kal: {
desc: 'Default Kalaalisut analysis pipe',
},
fst: {
desc: 'FST only',
pipe: 'fst',
corpora: ['fst'],
},
'kal-lu': {
desc: 'Split LU/LI/etc to their own token (dynamic corpus)',
pipe: 'kal-lu',
env: ['KAL_LU_PREFIX=1'],
grep: ' (CONJ|ADV)-L',
},
'kal-sems-l': {
desc: 'Semantic annotation, only final semantics',
pipe: 'kal-sems-l',
},
'heur': {
desc: 'Speller and heuristic analysis of unknown words',
pipe: 'heur',
corpora: ['heur'],
},
'gloss-k2d': {
desc: 'Gloss kal2dan',
pipe: 'gloss-k2d',
corpora: ['gloss'],
gold: true,
},
'gloss-k2e': {
desc: 'Gloss kal2eng',
pipe: 'gloss-k2e',
corpora: ['gloss'],
gold: true,
},
},
}