Skip to content

Commit 350c493

Browse files
committed
telephone numbers
1 parent 2daba4b commit 350c493

File tree

3 files changed

+65
-1
lines changed

3 files changed

+65
-1
lines changed

src/fst/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ pkgsrcfst_DATA=url.lexc
2222

2323
if CAN_HFST_LEXC
2424
check-local:
25-
$(HFST_LEXC) -v url.lexc
25+
$(HFST_LEXC) --Werror -v url.lexc -o /dev/null
26+
$(HFST_LEXC) --Werror -v testroot.lexc stems/telephone.lexc -o /dev/null
2627
endif
2728

src/fst/stems/telephone.lexc

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
!! Lexica for recognising telephone numbers.
2+
!! Language independent, if you need to compound or inflect telephone
3+
!! numbers, create a language specific version.
4+
5+
6+
LEXICON telephones
7+
8+
+ countrycodes ; ! +358 050 ...
9+
%0%0 countrycodes ; ! 00358 050 ...
10+
%0%0% countrycodes ; ! 00 358 ...
11+
0 areacodes ; ! 050 ...
12+
tel%: tel-URL ; ! tel:+358-50-55555555 in URL standard RFC 3966
13+
14+
15+
LEXICON tel-URL
16+
17+
< [%0|1|2|3|4|5|6|7|8|9] [0|1|2|3|4|5|6|7|8|9|%-]* > telephoneURLTags ;
18+
19+
LEXICON countrycodes
20+
21+
< [1|2|3|4|5|6|7|8|9] ([%0|1|2|3|4|5|6|7|8|9]) ([%0|1|2|3|4|5|6|7|8|9]) ([% | %-]) > areacodes ; ! 1-999 (kind of)
22+
23+
LEXICON areacodes
24+
25+
< %( %0 %) [%0|1|2|3|4|5|6|7|8|9]^2,4 ([% | %-]) > localcodes ; ! (0)50
26+
< [%0|1|2|3|4|5|6|7|8|9]^2,5 ([% | %-]) > localcodes ; ! 050, 921
27+
28+
LEXICON localcodes
29+
30+
< [%0|1|2|3|4|5|6|7|8|9] [%0|1|2|3|4|5|6|7|8|9|% |%-]* [%0|1|2|3|4|5|6|7|8|9] > telephoneNumTags ; ! e.g. 12 34 56 78, 123-45-678
31+
32+
LEXICON telephoneNumTags
33+
34+
+Num+Arab+Use/Circ+TEL:0 telephoneNumInflections ;
35+
36+
LEXICON telephoneURLTags
37+
38+
+URL+TEL:0 telephoneURLInflections ;

src/fst/testroot.lexc

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
!! this lexicon is not part of the shared lexcies but an example of what
2+
!! you need in language specific stuff for the shared parts to work and
3+
!! it is also automatically tested for in case of someone breaks the shared
4+
!! stuffs.
5+
6+
Multichar_Symbols
7+
8+
+TEL +URL +Gen +Sg +URL +Num +Arab +Use/Circ
9+
10+
11+
LEXICON Root
12+
13+
0 telephones ;
14+
15+
16+
17+
LEXICON telephoneNumInflections
18+
!! for example allow 's possessive for telephone numbers like this:
19+
0 # ;
20+
+Gen+Sg:'s # ; !!= `@CODE@`
21+
22+
LEXICON telephoneURLInflections
23+
24+
0 # ;
25+

0 commit comments

Comments
 (0)