@@ -958,6 +958,10 @@ public void testHyphensQuoteAndBOM() {
958
958
"Let's shoot'em up" ,
959
959
"In Louis L'Amour's 1985 historical novel" ,
960
960
"Grace O'Malley's Castle." ,
961
+ "The Z-R relationship was Z = 408R9.20" ,
962
+ "I use SPSS28.0 to measure Lee’s指数 as 其IC50约为4.814μmol / L" ,
963
+ "Some people write BA2.12.1. Tests were DM899.00." ,
964
+ "@Insanomania They do... Their mentality doesn't :(\n " ,
961
965
962
966
};
963
967
@@ -982,13 +986,18 @@ public void testHyphensQuoteAndBOM() {
982
986
{ "Let" , "'s" , "shoot" , "'em" , "up" },
983
987
{ "In" , "Louis" , "L'Amour" , "'s" , "1985" , "historical" , "novel" },
984
988
{ "Grace" , "O'Malley" , "'s" , "Castle" , "." },
989
+ { "The" , "Z-R" , "relationship" , "was" , "Z" , "=" , "408R" , "9.20" },
990
+ { "I" , "use" , "SPSS" , "28.0" , "to" , "measure" , "Lee's指数" , "as" , "其IC50约为" , "4.814" , "μmol" , "/" , "L" }, // could use \p{Latin} more in patterns?
991
+ { "Some" , "people" , "write" , "BA" , "2.12.1" , "." ,"Tests" , "were" , "DM" , "899.00" , "." },
992
+ { "@Insanomania" , "They" , "do" , "..." , "Their" , "mentality" , "does" , "n't" , ":(" },
985
993
986
994
};
987
995
988
996
@ Test
989
997
public void testApostrophes () {
990
998
// Note that this is running with "latex" normalization of quotes!
991
- TokenizerFactory <CoreLabel > tokFactory = PTBTokenizer .coreLabelFactory ("normalizeCurrency=false,invertible,ptb3Escaping" );
999
+ TokenizerFactory <CoreLabel > tokFactory =
1000
+ PTBTokenizer .coreLabelFactory ("invertible,ptb3Escaping,normalizeCurrency=false,normalizeParentheses=false" );
992
1001
runOnTwoArrays (tokFactory , apostropheInputs , apostropheGold );
993
1002
runAgainstOrig (tokFactory , apostropheInputs );
994
1003
}
0 commit comments