Skip to content

Commit c70ff6f

Browse files
authored
Maintenance update of auto-gen resources and minor changes (microsoft#2524)
* - Removed unnecessary half-width digits in ZH and JA regexes; - Fix merge issue in FR/ES/PT for time-of-day + time (additional cases for microsoft#2482); - Add NumberRange patterns to be localized in French YAML; - Re-gen resources. * - Fixing incorrect ET timezone offset; - Adding non-standard speed units; - Add bitcoin and its Unicode symbol as currency unit; - Breaking clustered units into separate entries; - Fixing incorrect French timezone spec file name; - Re-gen resources across platforms. * - Adding specs; - Adding more verbosity to assert failure in Python DateTime test code; - Re-gen resources across platforms. * Disabling unicode fraction case in javascript units.
1 parent ca59ab0 commit c70ff6f

File tree

95 files changed

+1746
-580
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+1746
-580
lines changed

.NET/Microsoft.Recognizers.Definitions.Common/BaseCurrency.cs

+7-3
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ public static class BaseCurrency
190190
{ @"UYU", @"CENTESIMO" },
191191
{ @"VEF", @"CENTIMO" },
192192
{ @"YER", @"FILS" },
193-
{ @"ZMW", @"NGWEE" }
193+
{ @"ZMW", @"NGWEE" },
194+
{ @"_XBT", @"MILLIBITCOIN|SATOSHI" }
194195
};
195196
public static readonly Dictionary<string, long> CurrencyFractionalRatios = new Dictionary<string, long>
196197
{
@@ -269,7 +270,9 @@ public static class BaseCurrency
269270
{ @"Ngwee", 100 },
270271
{ @"Kwartje", 4 },
271272
{ @"Dubbeltje", 10 },
272-
{ @"Stuiver", 20 }
273+
{ @"Stuiver", 20 },
274+
{ @"Millibitcoin", 1000 },
275+
{ @"Satoshi", 100000000 }
273276
};
274277
public static readonly Dictionary<string, long> NonStandardFractionalSubunits = new Dictionary<string, long>
275278
{
@@ -279,7 +282,8 @@ public static class BaseCurrency
279282
{ @"OMR", 1000 },
280283
{ @"YDD", 1000 },
281284
{ @"TND", 1000 },
282-
{ @"MRO", 5 }
285+
{ @"MRO", 5 },
286+
{ @"_XBT", 1000 }
283287
};
284288
}
285289
}

.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersDefinitions.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ public static class NumbersDefinitions
159159
'拾'
160160
};
161161
public static readonly string DigitalNumberRegex = $@"((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))";
162-
public const string ZeroToNineFullHalfRegex = @"[\d1234567890]";
162+
public const string ZeroToNineFullHalfRegex = @"[\d]";
163163
public static readonly string DigitNumRegex = $@"{ZeroToNineFullHalfRegex}+";
164164
public const string DozenRegex = @".*打$";
165165
public const string PercentageRegex = @"(?<=(((?<![十百千拾佰仟])[十百千拾佰仟])|([万亿兆萬億]))\s*分\s*之).+|.+(?=个\s*(((?<![十百千拾佰仟])[十百千拾佰仟])|([万亿兆萬億]))\s*分\s*点)|.*(?=[%%])";
@@ -201,8 +201,8 @@ public static class NumbersDefinitions
201201
public static readonly string DoubleWithMultiplierRegex = $@"({NegativeNumberTermsRegexNum}\s*)?{ZeroToNineFullHalfRegex}+[\..]{ZeroToNineFullHalfRegex}+\s*{BaseNumbers.NumberMultiplierRegex}";
202202
public static readonly string DoubleWithThousandsRegex = $@"{NegativeNumberTermsRegex}?(({ZeroToNineFullHalfRegex}+)|({ZeroToNineFullHalfRegex}{{1,3}}(,{ZeroToNineFullHalfRegex}{{3}})+))([\..]{ZeroToNineFullHalfRegex}+)?\s*[多几幾余]?[万亿萬億]{{1,2}}";
203203
public static readonly string DoubleAllFloatRegex = $@"(?<![百佰]\s*分\s*之\s*(({AllIntRegex}[点點]*)|{AllFloatRegex})*){AllFloatRegex}(?!{ZeroToNineIntegerRegex}*\s*[个個]\s*[百佰]\s*分\s*[点點])";
204-
public static readonly string DoubleExponentialNotationRegex = $@"(?<!{ZeroToNineFullHalfRegex}+[\..])({NegativeNumberTermsRegexNum}\s*)?{ZeroToNineFullHalfRegex}+([\..]{ZeroToNineFullHalfRegex}+)?e(([--++]*[1-9123456789]{ZeroToNineFullHalfRegex}*)|[00](?!{ZeroToNineFullHalfRegex}+))";
205-
public static readonly string DoubleScientificNotationRegex = $@"(?<!{ZeroToNineFullHalfRegex}+[\..])({NegativeNumberTermsRegexNum}\s*)?({ZeroToNineFullHalfRegex}+([\..]{ZeroToNineFullHalfRegex}+)?)\^([--++]*[1-9123456789]{ZeroToNineFullHalfRegex}*)";
204+
public static readonly string DoubleExponentialNotationRegex = $@"(?<!{ZeroToNineFullHalfRegex}+[\..])({NegativeNumberTermsRegexNum}\s*)?{ZeroToNineFullHalfRegex}+([\..]{ZeroToNineFullHalfRegex}+)?e(([--++]*[1-9]{ZeroToNineFullHalfRegex}*)|0(?!{ZeroToNineFullHalfRegex}+))";
205+
public static readonly string DoubleScientificNotationRegex = $@"(?<!{ZeroToNineFullHalfRegex}+[\..])({NegativeNumberTermsRegexNum}\s*)?({ZeroToNineFullHalfRegex}+([\..]{ZeroToNineFullHalfRegex}+)?)\^([--++]*[1-9]{ZeroToNineFullHalfRegex}*)";
206206
public static readonly string OrdinalRegex = $@"第{AllIntRegex}";
207207
public static readonly string OrdinalNumbersRegex = $@"第{ZeroToNineFullHalfRegex}+";
208208
public static readonly string AllFractionNumber = $@"{NegativeNumberTermsRegex}?(({ZeroToNineFullHalfRegex}+|{AllIntRegex})\s*又\s*)?{NegativeNumberTermsRegex}?({ZeroToNineFullHalfRegex}+|{AllIntRegex})\s*分\s*之\s*{NegativeNumberTermsRegex}?({ZeroToNineFullHalfRegex}+|{AllIntRegex})({PointRegexStr}{AllIntRegex}*)?";
@@ -227,7 +227,7 @@ public static class NumbersDefinitions
227227
public static readonly string FoldsPercentageRegex = $@"{ZeroToNineIntegerRegex}(\s*[点點]?\s*{ZeroToNineIntegerRegex})?\s*折";
228228
public static readonly string SimpleFoldsPercentageRegex = $@"{ZeroToNineFullHalfRegex}\s*成(\s*(半|{ZeroToNineFullHalfRegex}))?";
229229
public static readonly string SpecialsPercentageRegex = $@"({ZeroToNineIntegerRegex}|[十拾])\s*成(\s*(半|{ZeroToNineIntegerRegex}))?";
230-
public static readonly string NumbersSpecialsPercentageRegex = $@"({ZeroToNineFullHalfRegex}[\..]{ZeroToNineFullHalfRegex}|[11][00])\s*成";
230+
public static readonly string NumbersSpecialsPercentageRegex = $@"({ZeroToNineFullHalfRegex}[\..]{ZeroToNineFullHalfRegex}|10)\s*成";
231231
public static readonly string SimpleSpecialsPercentageRegex = $@"{ZeroToNineIntegerRegex}\s*[点點]\s*{ZeroToNineIntegerRegex}\s*成";
232232
public const string SpecialsFoldsPercentageRegex = @"半\s*成|(?<=打)[对對]\s*折|半\s*折";
233233
public const string SpeicalCharBeforeNumber = @"(有|是|为)";

.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersWithUnitDefinitions.cs

+4-2
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,8 @@ public static class NumbersWithUnitDefinitions
246246
{ @"Fen", @"分钱|分" },
247247
{ @"Jiao", @"毛钱|毛|角钱|角" },
248248
{ @"Finnish markka", @"芬兰马克" },
249-
{ @"Penni", @"盆尼" }
249+
{ @"Penni", @"盆尼" },
250+
{ @"Bitcoin", @"₿|btc|xbt|个比特币|比特币" }
250251
};
251252
public static readonly Dictionary<string, string> CurrencyNameToIsoCodeMap = new Dictionary<string, string>
252253
{
@@ -536,7 +537,8 @@ public static class NumbersWithUnitDefinitions
536537
{ @"Turkish lira", @"₺" },
537538
{ @"Euro", @"€" },
538539
{ @"Pound", @"£" },
539-
{ @"Costa Rican colón", @"₡" }
540+
{ @"Costa Rican colón", @"₡" },
541+
{ @"Bitcoin", @"₿|btc|xbt" }
540542
};
541543
public static readonly IList<string> CurrencyAmbiguousValues = new List<string>
542544
{

.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersWithUnitDefinitions.cs

+11-4
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,10 @@ public static class NumbersWithUnitDefinitions
420420
{ @"Fen", @"fen" },
421421
{ @"Jiao", @"jiao|mao" },
422422
{ @"Finnish markka", @"finse mark|finse markka|suomen markka|finnish markka|finsk mark|fim|markkaa|markka" },
423-
{ @"Penni", @"penniä|penni" }
423+
{ @"Penni", @"penniä|penni" },
424+
{ @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" },
425+
{ @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" },
426+
{ @"Satoshi", @"satoshi|satoshis" }
424427
};
425428
public static readonly Dictionary<string, string> CurrencyNameToIsoCodeMap = new Dictionary<string, string>
426429
{
@@ -609,7 +612,8 @@ public static class NumbersWithUnitDefinitions
609612
{ @"British Virgin Islands dollar", @"_BD" },
610613
{ @"Ascension pound", @"_AP" },
611614
{ @"Alderney pound", @"_ALP" },
612-
{ @"Abkhazian apsar", @"_AA" }
615+
{ @"Abkhazian apsar", @"_AA" },
616+
{ @"Bitcoin", @"_XBT" }
613617
};
614618
public static readonly Dictionary<string, string> FractionalUnitNameToCodeMap = new Dictionary<string, string>
615619
{
@@ -688,7 +692,9 @@ public static class NumbersWithUnitDefinitions
688692
{ @"Ngwee", @"NGWEE" },
689693
{ @"Kwartje", @"KWARTJE" },
690694
{ @"Dubbeltje", @"DUBBELTJE" },
691-
{ @"Stuiver", @"STUIVER" }
695+
{ @"Stuiver", @"STUIVER" },
696+
{ @"Millibitcoin", @"MILLIBITCOIN" },
697+
{ @"Satoshi", @"SATOSHI" }
692698
};
693699
public const string CompoundUnitConnectorRegex = @"(?<spacer>en)";
694700
public static readonly Dictionary<string, string> CurrencyPrefixList = new Dictionary<string, string>
@@ -732,7 +738,8 @@ public static class NumbersWithUnitDefinitions
732738
{ @"Euro", @"€|eur" },
733739
{ @"Pound", @"£" },
734740
{ @"Costa Rican colón", @"₡" },
735-
{ @"Turkish lira", @"₺" }
741+
{ @"Turkish lira", @"₺" },
742+
{ @"Bitcoin", @"₿|btc|xbt" }
736743
};
737744
public static readonly IList<string> AmbiguousCurrencyUnitList = new List<string>
738745
{

.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs

+8-8
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,9 @@ public static class DateTimeDefinitions
139139
public const string HourNumRegex = @"\b(?<hournum>zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\b";
140140
public const string MinuteNumRegex = @"(?<minnum>ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)";
141141
public const string DeltaMinuteNumRegex = @"(?<deltaminnum>ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)";
142-
public const string PmRegex = @"(?<pm>(((?:at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\s+(the\s+)?night))";
143-
public const string PmRegexFull = @"(?<pm>((?:at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|(mid)?night|lunchtime))";
144-
public const string AmRegex = @"(?<am>((?:at|in|around|on|for)\s+(the\s+)?)?(morning))";
142+
public const string PmRegex = @"(?<pm>(((?:at|in|around|circa|on|for)\s+(the\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\s+(the\s+)?night))";
143+
public const string PmRegexFull = @"(?<pm>((?:at|in|around|circa|on|for)\s+(the\s+)?)?(afternoon|evening|(mid)?night|lunchtime))";
144+
public const string AmRegex = @"(?<am>((?:at|in|around|circa|on|for)\s+(the\s+)?)?(morning))";
145145
public const string LunchRegex = @"\blunchtime\b";
146146
public const string NightRegex = @"\b(mid)?night\b";
147147
public const string CommonDatePrefixRegex = @"^[\.]";
@@ -156,7 +156,7 @@ public static class DateTimeDefinitions
156156
public const string MidafternoonRegex = @"(?<midafternoon>mid\s*(-\s*)?afternoon)";
157157
public const string MiddayRegex = @"(?<midday>mid\s*(-\s*)?day|((12\s)?noon))";
158158
public static readonly string MidTimeRegex = $@"(?<mid>({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))";
159-
public static readonly string AtRegex = $@"\b(?:(?:(?<=\bat\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?<iam>a)|(?<ipm>p)))?|{MidTimeRegex}))|{MidTimeRegex})\b";
159+
public static readonly string AtRegex = $@"\b(?:(?:(?<=\b(at|(at)?\s*around|circa)\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?<iam>a)|(?<ipm>p)))?|{MidTimeRegex}))|{MidTimeRegex})\b";
160160
public static readonly string IshRegex = $@"\b({BaseDateTime.HourRegex}(-|——)?ish|noon(ish)?)\b";
161161
public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?<unit>h(ou)?rs?|h|min(ute)?s?|sec(ond)?s?)\b";
162162
public const string RestrictedTimeUnitRegex = @"(?<unit>hour|minute)\b";
@@ -198,9 +198,9 @@ public static class DateTimeDefinitions
198198
public const string DateTimeTimeOfDayRegex = @"\b(?<timeOfDay>morning|(?<pm>afternoon|night|evening))\b";
199199
public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\btoni(ght|te))\b";
200200
public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?(in\s+)?{DateTimeSpecificTimeOfDayRegex}";
201-
public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|in|on))?\s*$";
201+
public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|circa|in|on))?\s*$";
202202
public static readonly string SimpleTimeOfTodayAfterRegex = $@"(?<!{NonTimeContextTokens}\s*)\b({HourNumRegex}|{BaseDateTime.HourRegex})\s*(,\s*)?(in\s+)?{DateTimeSpecificTimeOfDayRegex}\b";
203-
public static readonly string SimpleTimeOfTodayBeforeRegex = $@"\b{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around))?\s*({HourNumRegex}|{BaseDateTime.HourRegex})\b";
203+
public static readonly string SimpleTimeOfTodayBeforeRegex = $@"\b{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|circa))?\s*({HourNumRegex}|{BaseDateTime.HourRegex})\b";
204204
public const string SpecificEndOfRegex = @"(the\s+)?end of(\s+the)?\s*$";
205205
public const string UnspecificEndOfRegex = @"\b(the\s+)?(eod|(end\s+of\s+day))\b";
206206
public const string UnspecificEndOfRangeRegex = @"\b(eoy)\b";
@@ -256,12 +256,12 @@ public static class DateTimeDefinitions
256256
public static readonly string RelativeTimeUnitRegex = $@"(?:(?:(?:{NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({TimeUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))";
257257
public static readonly string RelativeDurationUnitRegex = $@"(?:(?:(?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))";
258258
public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+(?<duration>week|month|year|decade|weekend)\b";
259-
public const string ConnectorRegex = @"^(-|,|for|t|around|@)$";
259+
public const string ConnectorRegex = @"^(-|,|for|t|around|circa|@)$";
260260
public const string FromToRegex = @"(\b(from).+(to|and|or)\b.+)";
261261
public const string SingleAmbiguousMonthRegex = @"^(the\s+)?(may|march)$";
262262
public const string SingleAmbiguousTermsRegex = @"^(the\s+)?(day|week|month|year)$";
263263
public const string UnspecificDatePeriodRegex = @"^(week|month|year)$";
264-
public const string PrepositionSuffixRegex = @"\b(on|in|at|around|from|to)$";
264+
public const string PrepositionSuffixRegex = @"\b(on|in|at|around|circa|from|to)$";
265265
public const string FlexibleDayRegex = @"(?<DayOfMonth>([A-Za-z]+\s)?[A-Za-z\d]+)";
266266
public static readonly string ForTheRegex = $@"\b((((?<=for\s+)the\s+{FlexibleDayRegex})|((?<=on\s+)(the\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?<end>\s*(,|\.(?!\d)|!|\?|$)))";
267267
public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+(the\s+{FlexibleDayRegex})\b";

0 commit comments

Comments
 (0)