Skip to content

Commit 3e51bb9

Browse files
Cicero96tellarin
authored andcommitted
Add recognition for mention, hashtag, url, and email in sequence (microsoft#447)
* Add new recognizers * Temporarily hide tests from js
1 parent 74a2bc6 commit 3e51bb9

40 files changed

+1019
-5
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//------------------------------------------------------------------------------
2+
// <auto-generated>
3+
// This code was generated by a tool.
4+
// Changes to this file may cause incorrect behavior and will be lost if
5+
// the code is regenerated.
6+
//
7+
// Generation parameters:
8+
// - DataFilename: Patterns\Base-Email.yaml
9+
// - Language: NULL
10+
// - ClassName: BaseEmail
11+
// </auto-generated>
12+
//------------------------------------------------------------------------------
13+
namespace Microsoft.Recognizers.Definitions
14+
{
15+
using System;
16+
using System.Collections.Generic;
17+
18+
public static class BaseEmail
19+
{
20+
public const string EmailRegex = @"(([-a-zA-Z0-9_\.]+)@([-a-zA-Z\d\.]+)\.([a-zA-Z\.]{2,6}))";
21+
}
22+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<#@ template debug="true" hostspecific="true" language="C#" #>
2+
<#
3+
this.DataFilename = @"Patterns\Base-Email.yaml";
4+
this.Language = null;
5+
this.ClassName = "BaseEmail";
6+
#>
7+
<#@ include file=".\CommonDefinitions.ttinclude"#>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//------------------------------------------------------------------------------
2+
// <auto-generated>
3+
// This code was generated by a tool.
4+
// Changes to this file may cause incorrect behavior and will be lost if
5+
// the code is regenerated.
6+
//
7+
// Generation parameters:
8+
// - DataFilename: Patterns\Base-Hashtag.yaml
9+
// - Language: NULL
10+
// - ClassName: BaseHashtag
11+
// </auto-generated>
12+
//------------------------------------------------------------------------------
13+
namespace Microsoft.Recognizers.Definitions
14+
{
15+
using System;
16+
using System.Collections.Generic;
17+
18+
public static class BaseHashtag
19+
{
20+
public const string HashtagRegex = @"((?<=\s|^)#([a-zA-Z0-9_]+))";
21+
}
22+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<#@ template debug="true" hostspecific="true" language="C#" #>
2+
<#
3+
this.DataFilename = @"Patterns\Base-Hashtag.yaml";
4+
this.Language = null;
5+
this.ClassName = "BaseHashtag";
6+
#>
7+
<#@ include file=".\CommonDefinitions.ttinclude"#>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//------------------------------------------------------------------------------
2+
// <auto-generated>
3+
// This code was generated by a tool.
4+
// Changes to this file may cause incorrect behavior and will be lost if
5+
// the code is regenerated.
6+
//
7+
// Generation parameters:
8+
// - DataFilename: Patterns\Base-Mention.yaml
9+
// - Language: NULL
10+
// - ClassName: BaseMention
11+
// </auto-generated>
12+
//------------------------------------------------------------------------------
13+
namespace Microsoft.Recognizers.Definitions
14+
{
15+
using System;
16+
using System.Collections.Generic;
17+
18+
public static class BaseMention
19+
{
20+
public const string MentionRegex = @"((?<=\s|^)@([a-zA-Z0-9_]+))";
21+
}
22+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<#@ template debug="true" hostspecific="true" language="C#" #>
2+
<#
3+
this.DataFilename = @"Patterns\Base-Mention.yaml";
4+
this.Language = null;
5+
this.ClassName = "BaseMention";
6+
#>
7+
<#@ include file=".\CommonDefinitions.ttinclude"#>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//------------------------------------------------------------------------------
2+
// <auto-generated>
3+
// This code was generated by a tool.
4+
// Changes to this file may cause incorrect behavior and will be lost if
5+
// the code is regenerated.
6+
//
7+
// Generation parameters:
8+
// - DataFilename: Patterns\Base-URL.yaml
9+
// - Language: NULL
10+
// - ClassName: BaseURL
11+
// </auto-generated>
12+
//------------------------------------------------------------------------------
13+
namespace Microsoft.Recognizers.Definitions
14+
{
15+
using System;
16+
using System.Collections.Generic;
17+
18+
public static class BaseURL
19+
{
20+
public const string URLRegex = @"(?<=\s|^)((https?|ftp):\/\/)?(www\.)?[-a-zA-Z0-9:%._\+~#=]{2,256}\.[a-zA-Z]{2,6}\b([-a-zA-Z0-9:%_\+.~#?&//=]*)";
21+
}
22+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<#@ template debug="true" hostspecific="true" language="C#" #>
2+
<#
3+
this.DataFilename = @"Patterns\Base-URL.yaml";
4+
this.Language = null;
5+
this.ClassName = "BaseURL";
6+
#>
7+
<#@ include file=".\CommonDefinitions.ttinclude"#>

.NET/Microsoft.Recognizers.Definitions/Microsoft.Recognizers.Definitions.csproj

+36
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,26 @@
3131
<DesignTime>True</DesignTime>
3232
<AutoGen>True</AutoGen>
3333
</Compile>
34+
<Compile Update="BaseEmail.cs">
35+
<DesignTime>True</DesignTime>
36+
<AutoGen>True</AutoGen>
37+
<DependentUpon>BaseEmail.tt</DependentUpon>
38+
</Compile>
39+
<Compile Update="BaseHashTag.cs">
40+
<DesignTime>True</DesignTime>
41+
<AutoGen>True</AutoGen>
42+
<DependentUpon>BaseHashTag.tt</DependentUpon>
43+
</Compile>
3444
<Compile Update="BaseIp.cs">
3545
<DesignTime>True</DesignTime>
3646
<AutoGen>True</AutoGen>
3747
<DependentUpon>BaseIp.tt</DependentUpon>
3848
</Compile>
49+
<Compile Update="BaseMention.cs">
50+
<DesignTime>True</DesignTime>
51+
<AutoGen>True</AutoGen>
52+
<DependentUpon>BaseMention.tt</DependentUpon>
53+
</Compile>
3954
<Compile Update="BaseNumbers.cs">
4055
<DependentUpon>BaseNumbers.tt</DependentUpon>
4156
<DesignTime>True</DesignTime>
@@ -46,6 +61,11 @@
4661
<AutoGen>True</AutoGen>
4762
<DependentUpon>BasePhoneNumbers.tt</DependentUpon>
4863
</Compile>
64+
<Compile Update="BaseURL.cs">
65+
<DesignTime>True</DesignTime>
66+
<AutoGen>True</AutoGen>
67+
<DependentUpon>BaseURL.tt</DependentUpon>
68+
</Compile>
4969
<Compile Update="Chinese\DateTimeDefinitions.cs">
5070
<DependentUpon>DateTimeDefinitions.tt</DependentUpon>
5171
<DesignTime>True</DesignTime>
@@ -187,10 +207,22 @@
187207
<Generator>TextTemplatingFileGenerator</Generator>
188208
<LastGenOutput>BaseDateTime.cs</LastGenOutput>
189209
</None>
210+
<None Update="BaseEmail.tt">
211+
<Generator>TextTemplatingFileGenerator</Generator>
212+
<LastGenOutput>BaseEmail.cs</LastGenOutput>
213+
</None>
214+
<None Update="BaseHashtag.tt">
215+
<Generator>TextTemplatingFileGenerator</Generator>
216+
<LastGenOutput>BaseHashTag.cs</LastGenOutput>
217+
</None>
190218
<None Update="BaseIp.tt">
191219
<Generator>TextTemplatingFileGenerator</Generator>
192220
<LastGenOutput>BaseIp.cs</LastGenOutput>
193221
</None>
222+
<None Update="BaseMention.tt">
223+
<Generator>TextTemplatingFileGenerator</Generator>
224+
<LastGenOutput>BaseMention.cs</LastGenOutput>
225+
</None>
194226
<None Update="BaseNumbers.tt">
195227
<Generator>TextTemplatingFileGenerator</Generator>
196228
<LastGenOutput>BaseNumbers.cs</LastGenOutput>
@@ -199,6 +231,10 @@
199231
<Generator>TextTemplatingFileGenerator</Generator>
200232
<LastGenOutput>BasePhoneNumbers.cs</LastGenOutput>
201233
</None>
234+
<None Update="BaseURL.tt">
235+
<Generator>TextTemplatingFileGenerator</Generator>
236+
<LastGenOutput>BaseURL.cs</LastGenOutput>
237+
</None>
202238
<None Update="Chinese\DateTimeDefinitions.tt">
203239
<Generator>TextTemplatingFileGenerator</Generator>
204240
<LastGenOutput>DateTimeDefinitions.cs</LastGenOutput>

.NET/Microsoft.Recognizers.Text.DataDrivenTests/Sequence/TestSequence_English.cs

+28
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,33 @@ public void IpAddressModel()
3939
{
4040
TestIpAddress();
4141
}
42+
43+
[DataSource("Microsoft.VisualStudio.TestTools.DataSource.CSV", "MentionModel-English.csv", "MentionModel-English#csv", DataAccessMethod.Sequential)]
44+
[TestMethod]
45+
public void MentionModel()
46+
{
47+
TestMention();
48+
}
49+
50+
[DataSource("Microsoft.VisualStudio.TestTools.DataSource.CSV", "HashtagModel-English.csv", "HashtagModel-English#csv", DataAccessMethod.Sequential)]
51+
[TestMethod]
52+
public void HashtagModel()
53+
{
54+
TestHashtag();
55+
}
56+
57+
[DataSource("Microsoft.VisualStudio.TestTools.DataSource.CSV", "EmailModel-English.csv", "EmailModel-English#csv", DataAccessMethod.Sequential)]
58+
[TestMethod]
59+
public void EmailModel()
60+
{
61+
TestEmail();
62+
}
63+
64+
[DataSource("Microsoft.VisualStudio.TestTools.DataSource.CSV", "URLModel-English.csv", "URLModel-English#csv", DataAccessMethod.Sequential)]
65+
[TestMethod]
66+
public void URLModel()
67+
{
68+
TestURL();
69+
}
4270
}
4371
}

.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestBase.cs

+24
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,30 @@ public void TestPhoneNumber()
232232
ValidateResults();
233233
}
234234

235+
public void TestMention()
236+
{
237+
TestPreValidation();
238+
ValidateResults();
239+
}
240+
241+
public void TestHashtag()
242+
{
243+
TestPreValidation();
244+
ValidateResults();
245+
}
246+
247+
public void TestEmail()
248+
{
249+
TestPreValidation();
250+
ValidateResults();
251+
}
252+
253+
public void TestURL()
254+
{
255+
TestPreValidation();
256+
ValidateResults();
257+
}
258+
235259
public void TestChoice()
236260
{
237261
TestPreValidation();

.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs

+8
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ public enum Models
7171
DateTimeExtendedTypes,
7272
PhoneNumber,
7373
IpAddress,
74+
Mention,
75+
Hashtag,
76+
Email,
77+
URL,
7478
Boolean,
7579
}
7680

@@ -124,6 +128,10 @@ public static class TestContextExtensions
124128
{ Models.DateTimeExtendedTypes, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExtendedTypes, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) },
125129
{ Models.PhoneNumber, (test, culture) => SequenceRecognizer.RecognizePhoneNumber(test.Input, culture, fallbackToDefaultCulture: false) },
126130
{ Models.IpAddress, (test, culture) => SequenceRecognizer.RecognizeIpAddress(test.Input, culture, fallbackToDefaultCulture: false) },
131+
{ Models.Mention, (test, culture) => SequenceRecognizer.RecognizeMention(test.Input, culture, fallbackToDefaultCulture: false) },
132+
{ Models.Hashtag, (test, culture) => SequenceRecognizer.RecognizeHashtag(test.Input, culture, fallbackToDefaultCulture: false) },
133+
{ Models.Email, (test, culture) => SequenceRecognizer.RecognizeEmail(test.Input, culture, fallbackToDefaultCulture: false) },
134+
{ Models.URL, (test, culture) => SequenceRecognizer.RecognizeURL(test.Input, culture, fallbackToDefaultCulture: false) },
127135
{ Models.Boolean, (test, culture) => ChoiceRecognizer.RecognizeBoolean(test.Input, culture, fallbackToDefaultCulture: false) }
128136
};
129137

.NET/Microsoft.Recognizers.Text.Sequence/Constants.cs

+25-1
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,27 @@ public static class Constants
88

99
public const string SYS_IP = "builtin.ip";
1010

11+
public const string SYS_MENTION = "builtin.mention";
12+
13+
public const string SYS_HASHTAG = "builtin.hashtag";
14+
15+
public const string SYS_EMAIL = "builtin.email";
16+
17+
public const string SYS_URL = "builtin.url";
18+
1119
// Model type name
1220
public const string MODEL_PHONE_NUMBER = "phonenumber";
1321

1422
public const string MODEL_IP = "ip";
1523

24+
public const string MODEL_MENTION = "mention";
25+
26+
public const string MODEL_HASHTAG = "hashtag";
27+
28+
public const string MODEL_EMAIL = "email";
29+
30+
public const string MODEL_URL = "url";
31+
1632
public const string IP_REGEX_IPV4 = "ipv4";
1733

1834
public const string IP_REGEX_IPV6 = "ipv6";
@@ -27,6 +43,14 @@ public static class Constants
2743

2844
public const string PHONE_NUMBER_REGEX_GERMANY = "GermanyPhoneNumber";
2945

30-
public const string PHONE_NUMBER_REGEX_US= "USPhoneNumber";
46+
public const string PHONE_NUMBER_REGEX_US = "USPhoneNumber";
47+
48+
public const string MENTION_REGEX = "Mention";
49+
50+
public const string HASHTAG_REGEX = "Hashtag";
51+
52+
public const string EMAIL_REGEX = "Email";
53+
54+
public const string URL_REGEX = "Url";
3155
}
3256
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
namespace Microsoft.Recognizers.Text.Sequence.English
2+
{
3+
public class EmailExtractor : BaseEmailExtractor
4+
{
5+
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
namespace Microsoft.Recognizers.Text.Sequence.English
2+
{
3+
public class HashtagExtractor : BaseHashtagExtractor
4+
{
5+
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
namespace Microsoft.Recognizers.Text.Sequence.English
2+
{
3+
public class MentionExtractor : BaseMentionExtractor
4+
{
5+
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
namespace Microsoft.Recognizers.Text.Sequence.English
2+
{
3+
public class URLExtractor : BaseURLExtractor
4+
{
5+
6+
}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
namespace Microsoft.Recognizers.Text.Sequence.English
2+
{
3+
public class EmailParser : BaseSequenceParser
4+
{
5+
public EmailParser()
6+
{
7+
8+
}
9+
}
10+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
namespace Microsoft.Recognizers.Text.Sequence.English
2+
{
3+
public class HashtagParser : BaseSequenceParser
4+
{
5+
public HashtagParser()
6+
{
7+
8+
}
9+
}
10+
}

0 commit comments

Comments
 (0)