Skip to content

Commit d4029c3

Browse files
authored
[Choice] Add support for emoji skin tone modifiers across cultures (microsoft#2454)
1 parent 2af748d commit d4029c3

File tree

66 files changed

+1087
-115
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+1087
-115
lines changed

.NET/Microsoft.Recognizers.Definitions.Common/Arabic/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Ara";
2525
public const string TokenizerRegex = @"[^\u0621-\u064A0-9]";
26-
public const string TrueRegex = @"\b(صحيح|نعم|حسنا|موافق|متأكد|بالتأكيد|أتفق|اتفق|بالطبع|هيا)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c)";
27-
public const string FalseRegex = @"\b(لست متاحا|لا أستطيع|ليس تماما|أختلف|نختلف|اوافق|لست أوافق|لا|ليس صحيح|ليس|ليست|غير موافق)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(صحيح|نعم|حسنا|موافق|متأكد|بالتأكيد|أتفق|اتفق|بالطبع|هيا)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(لست متاحا|لا أستطيع|ليس تماما|أختلف|نختلف|اوافق|لست أوافق|لا|ليس صحيح|ليس|ليست|غير موافق)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/Bulgarian/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ public static class ChoiceDefinitions
2525
public const string TokenizerRegex = @"[^\w\d]";
2626
public const string ItIsNotRegex = @"не\s+е\s+";
2727
public const string IAmNotRegex = @"не\s+съм\s+";
28-
public const string TrueRegex = @"\b(в[яе]рно|определено|несъмнено|точно|добре|да|йеп|д|ok|ок|съглас((ен)|(на)))\b|(\uD83D\uDC4D|\uD83D\uDC4C)";
29-
public static readonly string FalseRegex = $@"\b({ItIsNotRegex}в[яе]рно|нев[яе]рно|{ItIsNotRegex}ок|{ItIsNotRegex}добре|{IAmNotRegex}съглас((ен)|(на))|не)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
28+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
29+
public static readonly string TrueRegex = $@"\b(в[яе]рно|определено|несъмнено|точно|добре|да|йеп|д|ok|ок|съглас((ен)|(на)))\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
30+
public static readonly string FalseRegex = $@"\b({ItIsNotRegex}в[яе]рно|нев[яе]рно|{ItIsNotRegex}ок|{ItIsNotRegex}добре|{IAmNotRegex}съглас((ен)|(на))|не)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
3031
}
3132
}

.NET/Microsoft.Recognizers.Definitions.Common/Chinese/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Chs";
2525
public const string TokenizerRegex = @"[^\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff66-\uff9f]";
26-
public const string TrueRegex = @"(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/Dutch/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Nl";
2525
public const string TokenizerRegex = @"[^\w\d]";
26-
public const string TrueRegex = @"\b(ja|jawel|jazeker|natuurlijk|vanzelfsprekend|zeker|prima|jep|yes|yep|y|ok|oke|akkoord)\b|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"\b(nee|neen|nope|nein|nop|no|niet|nooit)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(ja|jawel|jazeker|natuurlijk|vanzelfsprekend|zeker|prima|jep|yes|yep|y|ok|oke|akkoord)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(nee|neen|nope|nein|nop|no|niet|nooit)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/English/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Eng";
2525
public const string TokenizerRegex = @"[^\w\d]";
26-
public const string TrueRegex = @"\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c)";
27-
public const string FalseRegex = @"\b(false|nope|nop|no|not\s+ok|disagree)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(false|nope|nop|no|not\s+ok|disagree)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/French/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Fr";
2525
public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]";
26-
public const string TrueRegex = @"\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\b|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"\b(faux|nan|non|pas\s+d'accord|pas\s+concorder|n'est\s+pas\s+(correct|ok)|pas)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(faux|nan|non|pas\s+d'accord|pas\s+concorder|n'est\s+pas\s+(correct|ok)|pas)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/German/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Ger";
2525
public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]";
26-
public const string TrueRegex = @"\b(wahr|ja|jep|j|sicher|ok|einverstanden|mit\s+Sicherheit|sicherlich|jap|mache ich)\b|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"\b(falsch|nein|ne|nö|nicht\s+ok|nicht\s+einverstanden|n)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(wahr|ja|jep|j|sicher|ok|einverstanden|mit\s+Sicherheit|sicherlich|jap|mache ich)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(falsch|nein|ne|nö|nicht\s+ok|nicht\s+einverstanden|n)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/Hindi/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Hin";
2525
public const string TokenizerRegex = @"[^\w\d\u0900-\u097f]";
26-
public const string TrueRegex = @"\b(true|yes|yep|y|sure|ok|agree|बिलकुल|हाँ|हां|सहमत)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c)";
27-
public const string FalseRegex = @"\b(false|nope|nop|no|not\s+ok|disagree)\b|((नहीं|ठीक\s+नहीं|असहमत)\b|(ना))|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(true|yes|yep|y|sure|ok|agree|बिलकुल|हाँ|हां|सहमत)\b|(\uD83D\uDC4D|\uD83D\uDC4C|\u0001f44c){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(false|nope|nop|no|not\s+ok|disagree)\b|((नहीं|ठीक\s+नहीं|असहमत)\b|(ना))|(\uD83D\uDC4E|\u270B|\uD83D\uDD90|\u0001F44E|\u0001F590){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/Italian/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Ita";
2525
public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]";
26-
public const string TrueRegex = @"\b(vero|s[iì]|certo|sicuro|ok|d'accordo|(?<!non\s+)va\s+bene)\b|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"\b(falso|no|per\s+(niente|nulla)|niente\s+affatto|non\s+va\s+bene|non\s+è\s+ok)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(vero|s[iì]|certo|sicuro|ok|d'accordo|(?<!non\s+)va\s+bene)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(falso|no|per\s+(niente|nulla)|niente\s+affatto|non\s+va\s+bene|non\s+è\s+ok)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/Japanese/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Jpn";
2525
public const string TokenizerRegex = @"[^\w\d\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf]";
26-
public const string TrueRegex = @"(はい(!)*|そうです|よい(です)*)|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"(いいえ|ではありません|ではない|じゃない|じゃありません)|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"(はい(!)*|そうです|よい(です)*)|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"(いいえ|ではありません|ではない|じゃない|じゃありません)|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Por";
2525
public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]";
26-
public const string TrueRegex = @"\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\b|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"\b(falso|n[aã]o|incorreto|nada disso)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(falso|n[aã]o|incorreto|nada disso)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/Spanish/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Spa";
2525
public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]";
26-
public const string TrueRegex = @"\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\b|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"\b(falso|no|nop|n|no)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(falso|no|nop|n|no)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/Swedish/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Swe";
2525
public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]";
26-
public const string TrueRegex = @"\b(sant|ja|yes|y|j|ok|japp|jupp|jepp|absolut|säkert|instämmer|javisst|kör)\b|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"\b(falskt|nej|näpp|nope|misstycker|aldrig|n|nä|absolut\s+inte|glöm det|instämmer\s+inte|vill\s+inte|avstår)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(sant|ja|yes|y|j|ok|japp|jupp|jepp|absolut|säkert|instämmer|javisst|kör)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(falskt|nej|näpp|nope|misstycker|aldrig|n|nä|absolut\s+inte|glöm det|instämmer\s+inte|vill\s+inte|avstår)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

.NET/Microsoft.Recognizers.Definitions.Common/Turkish/ChoiceDefinitions.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static class ChoiceDefinitions
2323
{
2424
public const string LangMarker = @"Tr";
2525
public const string TokenizerRegex = @"[^\w\d\u00E0-\u00FC]";
26-
public const string TrueRegex = @"\b(do[gğ]ru|evet|d|e|tabii?|tamam|kat[ıi]l[ıi]yorum)\b|(\uD83D\uDC4D|\uD83D\uDC4C)";
27-
public const string FalseRegex = @"\b(yanl[ıi][sş]|hay[ıi]r|h|y|(do[gğ]ru|tamam)\s+de[gğ]il|kat[ıi]lm[ıi]yorum)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90)";
26+
public const string SkinToneRegex = @"(\uD83C\uDFFB|\uD83C\uDFFC|\uD83C\uDFFD|\uD83C\uDFFE|\uD83C\uDFFF)";
27+
public static readonly string TrueRegex = $@"\b(do[gğ]ru|evet|d|e|tabii?|tamam|kat[ıi]l[ıi]yorum)\b|(\uD83D\uDC4D|\uD83D\uDC4C){SkinToneRegex}?";
28+
public static readonly string FalseRegex = $@"\b(yanl[ıi][sş]|hay[ıi]r|h|y|(do[gğ]ru|tamam)\s+de[gğ]il|kat[ıi]lm[ıi]yorum)\b|(\uD83D\uDC4E|\u270B|\uD83D\uDD90){SkinToneRegex}?";
2829
}
2930
}

Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/ChineseChoice.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ public class ChineseChoice {
1717

1818
public static final String TokenizerRegex = "[^\\u3040-\\u30ff\\u3400-\\u4dbf\\u4e00-\\u9fff\\uf900-\\ufaff\\uff66-\\uff9f]";
1919

20-
public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
20+
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
2121

22-
public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
22+
public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
23+
.replace("{SkinToneRegex}", SkinToneRegex);
24+
25+
public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
26+
.replace("{SkinToneRegex}", SkinToneRegex);
2327
}

Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/EnglishChoice.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ public class EnglishChoice {
1717

1818
public static final String TokenizerRegex = "[^\\w\\d]";
1919

20-
public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c)";
20+
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
2121

22-
public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590)";
22+
public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c){SkinToneRegex}?"
23+
.replace("{SkinToneRegex}", SkinToneRegex);
24+
25+
public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590){SkinToneRegex}?"
26+
.replace("{SkinToneRegex}", SkinToneRegex);
2327
}

Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/FrenchChoice.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ public class FrenchChoice {
1717

1818
public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";
1919

20-
public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
20+
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
2121

22-
public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
22+
public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
23+
.replace("{SkinToneRegex}", SkinToneRegex);
24+
25+
public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
26+
.replace("{SkinToneRegex}", SkinToneRegex);
2327
}

Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/PortugueseChoice.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ public class PortugueseChoice {
1717

1818
public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";
1919

20-
public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
20+
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
2121

22-
public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
22+
public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
23+
.replace("{SkinToneRegex}", SkinToneRegex);
24+
25+
public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
26+
.replace("{SkinToneRegex}", SkinToneRegex);
2327
}

Java/libraries/recognizers-text-choice/src/main/java/com/microsoft/recognizers/text/choice/resources/SpanishChoice.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ public class SpanishChoice {
1717

1818
public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";
1919

20-
public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
20+
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";
2121

22-
public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
22+
public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
23+
.replace("{SkinToneRegex}", SkinToneRegex);
24+
25+
public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
26+
.replace("{SkinToneRegex}", SkinToneRegex);
2327
}

JavaScript/packages/recognizers-choice/src/choice/chinese/boolean.ts

+2
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ export class ChineseBooleanExtractorConfiguration implements IBooleanExtractorCo
66
readonly regexTrue: RegExp;
77
readonly regexFalse: RegExp;
88
readonly tokenRegex: RegExp;
9+
readonly emojiSkinToneRegex: RegExp;
910
readonly onlyTopMatch: boolean;
1011

1112
constructor(onlyTopMatch: boolean = true) {
13+
this.emojiSkinToneRegex = RegExpUtility.getSafeRegExp(ChineseChoice.SkinToneRegex);
1214
this.regexTrue = RegExpUtility.getSafeRegExp(ChineseChoice.TrueRegex);
1315
this.regexFalse = RegExpUtility.getSafeRegExp(ChineseChoice.FalseRegex);
1416
this.tokenRegex = RegExpUtility.getSafeRegExp(ChineseChoice.TokenizerRegex, 'is');

0 commit comments

Comments
 (0)