Skip to content

Commit 296dc0d

Browse files
Grey0202tellarin
authored andcommitted
[PhoneNumber] Fix boundary issues and add phone number pre-check (microsoft#1803)
Some changes are not ideal, but there are perf gains and clearly wrong cases are filtered out.
1 parent 0ca0b22 commit 296dc0d

File tree

30 files changed

+475
-84
lines changed

30 files changed

+475
-84
lines changed

.NET/Microsoft.Recognizers.Definitions.Common/BaseGUID.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions
2121

2222
public static class BaseGUID
2323
{
24-
public const string GUIDRegexElement = @"(([A-Fa-f0-9]{8}(-[A-Fa-f0-9]{4}){3}-[A-Fa-f0-9]{12})|([A-Fa-f0-9]{32}))";
25-
public static readonly string GUIDRegex = $@"(\b{GUIDRegexElement}\b|\{{{GUIDRegexElement}\}}|urn:uuid:{GUIDRegexElement}\b|%7[Bb]{GUIDRegexElement}%7[Dd]|[Xx]\'{GUIDRegexElement}\')";
24+
public const string GUIDRegexElement = @"(([a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12})|([a-f0-9]{32}))";
25+
public static readonly string GUIDRegex = $@"(\b{GUIDRegexElement}\b|\{{{GUIDRegexElement}\}}|urn:uuid:{GUIDRegexElement}\b|%7[b]{GUIDRegexElement}%7[d]|[x]\'{GUIDRegexElement}\')";
2626
}
2727
}

.NET/Microsoft.Recognizers.Definitions.Common/BasePhoneNumbers.cs

+22-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public static class BasePhoneNumbers
2525
public const string WordBoundariesRegex = @"\b";
2626
public const string NonWordBoundariesRegex = @"\B";
2727
public const string EndWordBoundariesRegex = @"\b";
28+
public const string PreCheckPhoneNumberRegex = @"(\d{1,4}.){2,4}\s?\d{2,3}";
2829
public static readonly Func<string, string, string> GeneralPhoneNumberRegex = (WordBoundariesRegex, EndWordBoundariesRegex) => $@"({WordBoundariesRegex}(((\d[\s]?){{4,12}}))(-?[\d\s?]{{3}}\d)(?!-){EndWordBoundariesRegex})|(\(\d{{5}}\)\s?\d{{5,6}})|\+\d{{2}}\(\d\)\d{{10}}";
2930
public static readonly Func<string, string, string, string> BRPhoneNumberRegex = (WordBoundariesRegex, NonWordBoundariesRegex, EndWordBoundariesRegex) => $@"((\(\s?(\+\s?|00)55\s?\)\s?)|(((?<!\d)\+\s?|{WordBoundariesRegex}00)55\s?)|{WordBoundariesRegex})?((({NonWordBoundariesRegex}\(\s?))\d{{2,3}}(\s?\))|({WordBoundariesRegex}\d{{2,3}}))\s?\d{{4,5}}-?\d{{3,5}}(?!-){EndWordBoundariesRegex}";
3031
public static readonly Func<string, string, string, string> UKPhoneNumberRegex = (WordBoundariesRegex, NonWordBoundariesRegex, EndWordBoundariesRegex) => $@"((({WordBoundariesRegex}(00)|{NonWordBoundariesRegex}\+)\s?)?({WordBoundariesRegex}\d{{2}}\s?)?((\s?\(0\)[-\s]?|{WordBoundariesRegex}|(?<=(\b^#)\d{{2}}))\d{{2,5}}|\(0\d{{3,4}}\))[/-]?\s?(\d{{5,8}}|\d{{3,4}}[-\s]?\d{{3,4}})(?!-){EndWordBoundariesRegex})";
@@ -44,10 +45,15 @@ public static class BasePhoneNumbers
4445
@"\)\.",
4546
@"^0(0|11)(-)"
4647
};
47-
public const string PhoneNumberMaskRegex = @"([0-9A-E]{2}(\s[0-9A-E]{2}){7})";
48+
public const string PhoneNumberMaskRegex = @"([0-9a-e]{2}(\s[0-9a-e]{2}){7})";
4849
public const string CountryCodeRegex = @"^(\(\s?(\+\s?|00)\d{1,3}\s?\)|(\+\s?|00)\d{1,3})";
4950
public const string AreaCodeIndicatorRegex = @"\(";
5051
public const string FormatIndicatorRegex = @"(\s|-|/|\.)+";
52+
public static readonly IList<char> ColonMarkers = new List<char>
53+
{
54+
':'
55+
};
56+
public const string ColonPrefixCheckRegex = @"(([A-Za-z])\s*$)";
5157
public static readonly IList<char> SpecialBoundaryMarkers = new List<char>
5258
{
5359
'-',
@@ -62,5 +68,20 @@ public static class BasePhoneNumbers
6268
'#',
6369
'*'
6470
};
71+
public static readonly IList<char> ForbiddenPrefixMarkers = new List<char>
72+
{
73+
',',
74+
':',
75+
'%'
76+
};
77+
public static readonly IList<char> ForbiddenSuffixMarkers = new List<char>
78+
{
79+
'/',
80+
'+',
81+
'#',
82+
'*',
83+
':',
84+
'%'
85+
};
6586
}
6687
}

.NET/Microsoft.Recognizers.Definitions.Common/Chinese/PhoneNumbersDefinitions.cs

+6
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,11 @@ public static class PhoneNumbersDefinitions
2525
public const string WordBoundariesRegex = @"(\b|(?<=[\u0800-\u9FFF]))";
2626
public const string NonWordBoundariesRegex = @"(\B|(?<=[\u0800-\u9FFF]))";
2727
public const string EndWordBoundariesRegex = @"(\b|(?=[\u0800-\u9FFF]))";
28+
public const string ColonPrefixCheckRegex = @"(([A-Za-z]|[\u4E00-\u9FA5])\s*$)";
29+
public static readonly IList<char> ForbiddenPrefixMarkers = new List<char>
30+
{
31+
':',
32+
'%'
33+
};
2834
}
2935
}

.NET/Microsoft.Recognizers.Text.Sequence/Chinese/Extractors/ChinesePhoneNumberExtractorConfiguration.cs

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
using System.Text.RegularExpressions;
2-
using Microsoft.Recognizers.Definitions;
1+
using System.Collections.Generic;
2+
using System.Text.RegularExpressions;
33
using Microsoft.Recognizers.Definitions.Chinese;
44

55
namespace Microsoft.Recognizers.Text.Sequence.Chinese
@@ -12,6 +12,8 @@ public ChinesePhoneNumberExtractorConfiguration(SequenceOptions options)
1212
WordBoundariesRegex = PhoneNumbersDefinitions.WordBoundariesRegex;
1313
NonWordBoundariesRegex = PhoneNumbersDefinitions.NonWordBoundariesRegex;
1414
EndWordBoundariesRegex = PhoneNumbersDefinitions.EndWordBoundariesRegex;
15+
ColonPrefixCheckRegex = new Regex(PhoneNumbersDefinitions.ColonPrefixCheckRegex);
16+
ForbiddenPrefixMarkers = (List<char>)PhoneNumbersDefinitions.ForbiddenPrefixMarkers;
1517
}
1618
}
1719
}

.NET/Microsoft.Recognizers.Text.Sequence/Config/PhoneNumberConfiguration.cs

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.Text.RegularExpressions;
1+
using System.Collections.Generic;
2+
using System.Text.RegularExpressions;
23

34
namespace Microsoft.Recognizers.Text.Sequence
45
{
@@ -17,5 +18,13 @@ public PhoneNumberConfiguration(SequenceOptions options = SequenceOptions.None)
1718

1819
public string EndWordBoundariesRegex { get; set; }
1920

21+
public Regex ColonPrefixCheckRegex { get; set; }
22+
23+
public List<char> ColonMarkers { get; set; }
24+
25+
public List<char> ForbiddenPrefixMarkers { get; set; }
26+
27+
public List<char> ForbiddenSuffixMarkers { get; set; }
28+
2029
}
2130
}

.NET/Microsoft.Recognizers.Text.Sequence/English/Extractors/EnglishPhoneNumberExtractorConfiguration.cs

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.Text.RegularExpressions;
1+
using System.Collections.Generic;
2+
using System.Text.RegularExpressions;
23
using Microsoft.Recognizers.Definitions;
34

45
namespace Microsoft.Recognizers.Text.Sequence.English
@@ -11,6 +12,10 @@ public EnglishPhoneNumberExtractorConfiguration(SequenceOptions options)
1112
WordBoundariesRegex = BasePhoneNumbers.WordBoundariesRegex;
1213
NonWordBoundariesRegex = BasePhoneNumbers.NonWordBoundariesRegex;
1314
EndWordBoundariesRegex = BasePhoneNumbers.EndWordBoundariesRegex;
15+
ColonPrefixCheckRegex = new Regex(BasePhoneNumbers.ColonPrefixCheckRegex);
16+
ColonMarkers = (List<char>)BasePhoneNumbers.ColonMarkers;
17+
ForbiddenPrefixMarkers = (List<char>)BasePhoneNumbers.ForbiddenPrefixMarkers;
18+
ForbiddenSuffixMarkers = (List<char>)BasePhoneNumbers.ForbiddenSuffixMarkers;
1419
}
1520
}
1621
}

.NET/Microsoft.Recognizers.Text.Sequence/Extractors/BasePhoneNumberExtractor.cs

+57-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System.Collections.Immutable;
33
using System.Linq;
44
using System.Text.RegularExpressions;
5+
using System.Xml;
56
using Microsoft.Recognizers.Definitions;
67

78
namespace Microsoft.Recognizers.Text.Sequence
@@ -10,10 +11,14 @@ public class BasePhoneNumberExtractor : BaseSequenceExtractor
1011
{
1112
private static readonly Regex InternationDialingPrefixRegex = new Regex(BasePhoneNumbers.InternationDialingPrefixRegex);
1213

14+
private static readonly Regex PreCheckPhoneNumberRegex = new Regex(BasePhoneNumbers.PreCheckPhoneNumberRegex, RegexOptions.Compiled);
15+
1316
private PhoneNumberConfiguration config;
1417

1518
public BasePhoneNumberExtractor(PhoneNumberConfiguration config)
1619
{
20+
this.config = config;
21+
1722
var wordBoundariesRegex = config.WordBoundariesRegex;
1823
var nonWordBoundariesRegex = config.NonWordBoundariesRegex;
1924
var endWordBoundariesRegex = config.EndWordBoundariesRegex;
@@ -69,20 +74,39 @@ public BasePhoneNumberExtractor(PhoneNumberConfiguration config)
6974

7075
protected sealed override string ExtractType { get; } = Constants.SYS_PHONE_NUMBER;
7176

72-
private static List<char> BoundaryMarkers => BasePhoneNumbers.BoundaryMarkers.ToList();
73-
7477
private static List<char> SpecialBoundaryMarkers => BasePhoneNumbers.SpecialBoundaryMarkers.ToList();
7578

7679
public override List<ExtractResult> Extract(string text)
7780
{
81+
if (!PreCheckPhoneNumberRegex.IsMatch(text))
82+
{
83+
return new List<ExtractResult>();
84+
}
85+
7886
var ers = base.Extract(text);
7987

8088
foreach (var er in ers)
8189
{
90+
if (CountDigits(er.Text) < 7 && er.Data.ToString() != "ITPhoneNumber")
91+
{
92+
ers.Remove(er);
93+
continue;
94+
}
95+
96+
if (er.Start + er.Length < text.Length)
97+
{
98+
var ch = text[(int)(er.Start + er.Length)];
99+
if (BasePhoneNumbers.ForbiddenSuffixMarkers.Contains(ch))
100+
{
101+
ers.Remove(er);
102+
continue;
103+
}
104+
}
105+
82106
if (er.Start != 0)
83107
{
84108
var ch = text[(int)(er.Start - 1)];
85-
if (BoundaryMarkers.Contains(ch))
109+
if (BasePhoneNumbers.BoundaryMarkers.Contains(ch))
86110
{
87111
if (SpecialBoundaryMarkers.Contains(ch) &&
88112
CheckFormattedPhoneNumber(er.Text) &&
@@ -106,6 +130,22 @@ public override List<ExtractResult> Extract(string text)
106130
}
107131
}
108132

133+
// Handle cases like "-1234567" and "-1234+5678"
134+
ers.Remove(er);
135+
}
136+
137+
if (this.config.ForbiddenPrefixMarkers.Contains(ch))
138+
{
139+
// Handle "tel:123456".
140+
if (BasePhoneNumbers.ColonMarkers.Contains(ch))
141+
{
142+
var front = text.Substring(0, (int)(er.Start - 1));
143+
if (this.config.ColonPrefixCheckRegex.IsMatch(front))
144+
{
145+
continue;
146+
}
147+
}
148+
109149
ers.Remove(er);
110150
}
111151
}
@@ -134,5 +174,19 @@ private bool CheckFormattedPhoneNumber(string phoneNumberText)
134174
{
135175
return Regex.IsMatch(phoneNumberText, BasePhoneNumbers.FormatIndicatorRegex);
136176
}
177+
178+
private int CountDigits(string candidateString)
179+
{
180+
var count = 0;
181+
foreach (var t in candidateString)
182+
{
183+
if (char.IsNumber(t))
184+
{
185+
++count;
186+
}
187+
}
188+
189+
return count;
190+
}
137191
}
138192
}

.NET/Microsoft.Recognizers.Text.Sequence/Models/GUIDModel.cs

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Linq;
4+
using Microsoft.Recognizers.Text.Utilities;
45

56
namespace Microsoft.Recognizers.Text.Sequence
67
{
@@ -17,6 +18,9 @@ public override List<ModelResult> Parse(string query)
1718
{
1819
var parsedSequences = new List<ParseResult>();
1920

21+
// Preprocess the query
22+
query = QueryProcessor.Preprocess(query);
23+
2024
try
2125
{
2226
var extractResults = Extractor.Extract(query);

.NET/Microsoft.Recognizers.Text.Sequence/Models/PhoneNumberModel.cs

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Linq;
4+
using Microsoft.Recognizers.Text.Utilities;
45

56
namespace Microsoft.Recognizers.Text.Sequence
67
{
@@ -17,6 +18,9 @@ public override List<ModelResult> Parse(string query)
1718
{
1819
var parsedSequences = new List<ParseResult>();
1920

21+
// Preprocess the query
22+
query = QueryProcessor.Preprocess(query);
23+
2024
try
2125
{
2226
var extractResults = Extractor.Extract(query);

JavaScript/packages/recognizers-number/src/resources/chineseNumeric.ts

+8-8
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@ export namespace ChineseNumeric {
2121
export const WordSeparatorToken = '';
2222
export const ZeroChar = '零';
2323
export const PairChar = '对';
24-
export const RoundNumberMap: ReadonlyMap<string, number> = new Map<string, number>([["k", 1000], ["m", 1000000], ["g", 1000000000], ["t", 1000000000000]]);
25-
export const RoundNumberMapChar: ReadonlyMap<string, number> = new Map<string, number>([["十", 10], ["百", 100], ["千", 1000], ["万", 10000], ["亿", 100000000], ["兆", 1000000000000], ["拾", 10], ["佰", 100], ["仟", 1000], ["萬", 10000], ["億", 100000000]]);
26-
export const ZeroToNineMap: ReadonlyMap<string, number> = new Map<string, number>([["0", 0], ["1", 1], ["2", 2], ["3", 3], ["4", 4], ["5", 5], ["6", 6], ["7", 7], ["8", 8], ["9", 9], ["零", 0], ["一", 1], ["二", 2], ["三", 3], ["四", 4], ["五", 5], ["六", 6], ["七", 7], ["八", 8], ["九", 9], ["〇", 0], ["壹", 1], ["贰", 2], ["貳", 2], ["叁", 3], ["肆", 4], ["伍", 5], ["陆", 6], ["陸", 6], ["柒", 7], ["捌", 8], ["玖", 9], ["0", 0], ["1", 1], ["2", 2], ["3", 3], ["4", 4], ["5", 5], ["6", 6], ["7", 7], ["8", 8], ["9", 9], ["半", 0.5], ["两", 2], ["兩", 2], ["俩", 2], ["倆", 2], ["仨", 3]]);
27-
export const FullToHalfMap: ReadonlyMap<string, string> = new Map<string, string>([["0", "0"], ["1", "1"], ["2", "2"], ["3", "3"], ["4", "4"], ["5", "5"], ["6", "6"], ["7", "7"], ["8", "8"], ["9", "9"], ["/", "/"], ["-", "-"], [",", "'"], ["G", "G"], ["M", "M"], ["T", "T"], ["K", "K"], ["k", "k"], [".", "."]]);
28-
export const TratoSimMap: ReadonlyMap<string, string> = new Map<string, string>([["佰", "百"], ["點", "点"], ["個", "个"], ["幾", "几"], ["對", "对"], ["雙", "双"]]);
29-
export const UnitMap: ReadonlyMap<string, string> = new Map<string, string>([["萬萬", "億"], ["億萬", "兆"], ["萬億", "兆"], ["万万", "亿"], ["万亿", "兆"], ["亿万", "兆"], [" ", ""], ["多", ""], ["余", ""], ["几", ""]]);
30-
export const RoundDirectList = ["亿", "兆", "億"];
31-
export const TenChars = ["十", "拾"];
24+
export const RoundNumberMap: ReadonlyMap<string, number> = new Map<string, number>([["k", 1000],["m", 1000000],["g", 1000000000],["t", 1000000000000]]);
25+
export const RoundNumberMapChar: ReadonlyMap<string, number> = new Map<string, number>([["十", 10],["百", 100],["千", 1000],["万", 10000],["亿", 100000000],["兆", 1000000000000],["拾", 10],["佰", 100],["仟", 1000],["萬", 10000],["億", 100000000]]);
26+
export const ZeroToNineMap: ReadonlyMap<string, number> = new Map<string, number>([["0", 0],["1", 1],["2", 2],["3", 3],["4", 4],["5", 5],["6", 6],["7", 7],["8", 8],["9", 9],["零", 0],["一", 1],["二", 2],["三", 3],["四", 4],["五", 5],["六", 6],["七", 7],["八", 8],["九", 9],["〇", 0],["壹", 1],["贰", 2],["貳", 2],["叁", 3],["肆", 4],["伍", 5],["陆", 6],["陸", 6],["柒", 7],["捌", 8],["玖", 9],["0", 0],["1", 1],["2", 2],["3", 3],["4", 4],["5", 5],["6", 6],["7", 7],["8", 8],["9", 9],["半", 0.5],["两", 2],["兩", 2],["俩", 2],["倆", 2],["仨", 3]]);
27+
export const FullToHalfMap: ReadonlyMap<string, string> = new Map<string, string>([["0", "0"],["1", "1"],["2", "2"],["3", "3"],["4", "4"],["5", "5"],["6", "6"],["7", "7"],["8", "8"],["9", "9"],["/", "/"],["-", "-"],[",", "'"],["G", "G"],["M", "M"],["T", "T"],["K", "K"],["k", "k"],[".", "."]]);
28+
export const TratoSimMap: ReadonlyMap<string, string> = new Map<string, string>([["佰", "百"],["點", "点"],["個", "个"],["幾", "几"],["對", "对"],["雙", "双"]]);
29+
export const UnitMap: ReadonlyMap<string, string> = new Map<string, string>([["萬萬", "億"],["億萬", "兆"],["萬億", "兆"],["万万", "亿"],["万亿", "兆"],["亿万", "兆"],[" ", ""],["多", ""],["余", ""],["几", ""]]);
30+
export const RoundDirectList = [ "亿","兆","億" ];
31+
export const TenChars = [ "十","拾" ];
3232
export const DigitalNumberRegex = `((?<=(\\d|\\b))${BaseNumbers.MultiplierLookupRegex}(?=\\b))`;
3333
export const ZeroToNineFullHalfRegex = `[\\d1234567890]`;
3434
export const DigitNumRegex = `${ZeroToNineFullHalfRegex}+`;

JavaScript/packages/recognizers-sequence/src/resources/baseGUID.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@
1010
// ------------------------------------------------------------------------------
1111

1212
export namespace BaseGUID {
13-
export const GUIDRegexElement = `(([A-Fa-f0-9]{8}(-[A-Fa-f0-9]{4}){3}-[A-Fa-f0-9]{12})|([A-Fa-f0-9]{32}))`;
14-
export const GUIDRegex = `(\\b${GUIDRegexElement}\\b|\\{${GUIDRegexElement}\\}|urn:uuid:${GUIDRegexElement}\\b|%7[Bb]${GUIDRegexElement}%7[Dd]|[Xx]\\'${GUIDRegexElement}\\')`;
13+
export const GUIDRegexElement = `(([a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12})|([a-f0-9]{32}))`;
14+
export const GUIDRegex = `(\\b${GUIDRegexElement}\\b|\\{${GUIDRegexElement}\\}|urn:uuid:${GUIDRegexElement}\\b|%7[b]${GUIDRegexElement}%7[d]|[x]\\'${GUIDRegexElement}\\')`;
1515
}

JavaScript/packages/recognizers-sequence/src/resources/basePhoneNumbers.ts

+6-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export namespace BasePhoneNumbers {
1414
export const WordBoundariesRegex = `\\b`;
1515
export const NonWordBoundariesRegex = `\\B`;
1616
export const EndWordBoundariesRegex = `\\b`;
17+
export const PreCheckPhoneNumberRegex = `(\\d{1,4}.){2,4}\\s?\\d{2,3}`;
1718
export const GeneralPhoneNumberRegex = (WordBoundariesRegex: string, EndWordBoundariesRegex: string) => { return `(${WordBoundariesRegex}(((\\d[\\s]?){4,12}))(-?[\\d\\s?]{3}\\d)(?!-)${EndWordBoundariesRegex})|(\\(\\d{5}\\)\\s?\\d{5,6})|\\+\\d{2}\\(\\d\\)\\d{10}`; }
1819
export const BRPhoneNumberRegex = (WordBoundariesRegex: string, NonWordBoundariesRegex: string, EndWordBoundariesRegex: string) => { return `((\\(\\s?(\\+\\s?|00)55\\s?\\)\\s?)|(((?<!\\d)\\+\\s?|${WordBoundariesRegex}00)55\\s?)|${WordBoundariesRegex})?(((${NonWordBoundariesRegex}\\(\\s?))\\d{2,3}(\\s?\\))|(${WordBoundariesRegex}\\d{2,3}))\\s?\\d{4,5}-?\\d{3,5}(?!-)${EndWordBoundariesRegex}`; }
1920
export const UKPhoneNumberRegex = (WordBoundariesRegex: string, NonWordBoundariesRegex: string, EndWordBoundariesRegex: string) => { return `(((${WordBoundariesRegex}(00)|${NonWordBoundariesRegex}\\+)\\s?)?(${WordBoundariesRegex}\\d{2}\\s?)?((\\s?\\(0\\)[-\\s]?|${WordBoundariesRegex}|(?<=(\\b^#)\\d{2}))\\d{2,5}|\\(0\\d{3,4}\\))[/-]?\\s?(\\d{5,8}|\\d{3,4}[-\\s]?\\d{3,4})(?!-)${EndWordBoundariesRegex})`; }
@@ -27,10 +28,14 @@ export namespace BasePhoneNumbers {
2728
export const NoAreaCodeUSPhoneNumberRegex = `(?<!(-|-\\s|\\d|\\)|\\)\\s|\\.))[2-9]\\d{2}\\s?[-\\.]\\s?\\d{4}(?!(-\\s?\\d))\\b`;
2829
export const InternationDialingPrefixRegex = `0(0|11)$`;
2930
export const TypicalDeductionRegexList = [ "^\\d{3}-\\d{2}-\\d{4}$","^\\d{5}-\\d{4}$","\\)\\.","^0(0|11)(-)" ];
30-
export const PhoneNumberMaskRegex = `([0-9A-E]{2}(\\s[0-9A-E]{2}){7})`;
31+
export const PhoneNumberMaskRegex = `([0-9a-e]{2}(\\s[0-9a-e]{2}){7})`;
3132
export const CountryCodeRegex = `^(\\(\\s?(\\+\\s?|00)\\d{1,3}\\s?\\)|(\\+\\s?|00)\\d{1,3})`;
3233
export const AreaCodeIndicatorRegex = `\\(`;
3334
export const FormatIndicatorRegex = `(\\s|-|/|\\.)+`;
35+
export const ColonMarkers = [ ":" ];
36+
export const ColonPrefixCheckRegex = `(([A-Za-z])\\s*$)`;
3437
export const SpecialBoundaryMarkers = [ "-"," " ];
3538
export const BoundaryMarkers = [ "-",".","/","+","#","*" ];
39+
export const ForbiddenPrefixMarkers = [ ",",":","%" ];
40+
export const ForbiddenSuffixMarkers = [ "/","+","#","*",":","%" ];
3641
}

JavaScript/packages/recognizers-sequence/src/resources/chinesePhoneNumbers.ts

+2
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,6 @@ export namespace ChinesePhoneNumbers {
1414
export const WordBoundariesRegex = `(\\b|(?<=[\\u0800-\\u9FFF]))`;
1515
export const NonWordBoundariesRegex = `(\\B|(?<=[\\u0800-\\u9FFF]))`;
1616
export const EndWordBoundariesRegex = `(\\b|(?=[\\u0800-\\u9FFF]))`;
17+
export const ColonPrefixCheckRegex = `(([A-Za-z]|[\\u4E00-\\u9FA5])\\s*$)`;
18+
export const ForbiddenPrefixMarkers = [ ":","%" ];
1719
}

JavaScript/packages/recognizers-sequence/src/sequence/chinese/extractors.ts

+4
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,14 @@ export class ChinesePhoneNumberExtractorConfiguration implements IPhoneNumberExt
1717
readonly WordBoundariesRegex: string;
1818
readonly NonWordBoundariesRegex: string;
1919
readonly EndWordBoundariesRegex: string;
20+
readonly ColonPrefixCheckRegex: string;
21+
readonly ForbiddenPrefixMarkers: string[];
2022

2123
constructor() {
2224
this.WordBoundariesRegex = ChinesePhoneNumbers.WordBoundariesRegex;
2325
this.NonWordBoundariesRegex = ChinesePhoneNumbers.NonWordBoundariesRegex;
2426
this.EndWordBoundariesRegex = ChinesePhoneNumbers.EndWordBoundariesRegex;
27+
this.ColonPrefixCheckRegex = ChinesePhoneNumbers.ColonPrefixCheckRegex;
28+
this.ForbiddenPrefixMarkers = ChinesePhoneNumbers.ForbiddenPrefixMarkers;
2529
}
2630
}

0 commit comments

Comments
 (0)