1
1
package org .unicode .jsp ;
2
2
3
+ import com .google .common .base .Joiner ;
4
+ import com .google .common .collect .Multimap ;
5
+ import com .google .common .collect .TreeMultimap ;
3
6
import com .ibm .icu .dev .util .UnicodeMap ;
4
7
import com .ibm .icu .lang .CharSequences ;
5
8
import com .ibm .icu .lang .UCharacter ;
12
15
import com .ibm .icu .text .Transform ;
13
16
import com .ibm .icu .text .UTF16 ;
14
17
import com .ibm .icu .text .UnicodeSet ;
18
+ import com .ibm .icu .text .UnicodeSetIterator ;
19
+ import com .ibm .icu .util .LocaleData ;
15
20
import com .ibm .icu .util .ULocale ;
16
21
import com .ibm .icu .util .VersionInfo ;
17
22
import java .nio .charset .Charset ;
18
23
import java .util .ArrayList ;
19
24
import java .util .Arrays ;
25
+ import java .util .Collection ;
20
26
import java .util .List ;
21
27
import java .util .Locale ;
28
+ import java .util .Map .Entry ;
29
+ import java .util .Set ;
30
+ import java .util .TreeSet ;
22
31
import org .unicode .idna .Idna .IdnaType ;
23
32
import org .unicode .idna .Idna2003 ;
24
33
import org .unicode .idna .Idna2008 ;
28
37
import org .unicode .props .UnicodeProperty .BaseProperty ;
29
38
import org .unicode .props .UnicodeProperty .Factory ;
30
39
import org .unicode .props .UnicodeProperty .SimpleProperty ;
40
+ import org .unicode .text .utility .Utility ;
31
41
32
42
public class XPropertyFactory extends UnicodeProperty .Factory {
33
43
44
+ private static final Joiner JOIN_COMMAS = Joiner .on ("," );
45
+ private static final boolean DEBUG_MULTI = false ;
46
+
34
47
static final UnicodeSet ALL =
35
48
new UnicodeSet ("[[:^C:][:Cc:][:Cf:][:noncharactercodepoint:]]" ).freeze ();
36
49
@@ -96,6 +109,7 @@ public final Factory add2(UnicodeProperty sp) {
96
109
add (
97
110
new CodepointTransformProperty (
98
111
new Transform <Integer , String >() {
112
+ @ Override
99
113
public String transform (Integer source ) {
100
114
return Normalizer .normalize (source , Normalizer .NFC );
101
115
}
@@ -105,6 +119,7 @@ public String transform(Integer source) {
105
119
add (
106
120
new CodepointTransformProperty (
107
121
new Transform <Integer , String >() {
122
+ @ Override
108
123
public String transform (Integer source ) {
109
124
return Normalizer .normalize (source , Normalizer .NFD );
110
125
}
@@ -114,6 +129,7 @@ public String transform(Integer source) {
114
129
add (
115
130
new CodepointTransformProperty (
116
131
new Transform <Integer , String >() {
132
+ @ Override
117
133
public String transform (Integer source ) {
118
134
return Normalizer .normalize (source , Normalizer .NFKC );
119
135
}
@@ -123,6 +139,7 @@ public String transform(Integer source) {
123
139
add (
124
140
new CodepointTransformProperty (
125
141
new Transform <Integer , String >() {
142
+ @ Override
126
143
public String transform (Integer source ) {
127
144
return Normalizer .normalize (source , Normalizer .NFKD );
128
145
}
@@ -133,6 +150,7 @@ public String transform(Integer source) {
133
150
add (
134
151
new StringTransformProperty (
135
152
new StringTransform () {
153
+ @ Override
136
154
public String transform (String source ) {
137
155
return UCharacter .foldCase (source , true );
138
156
}
@@ -142,6 +160,7 @@ public String transform(String source) {
142
160
add (
143
161
new StringTransformProperty (
144
162
new StringTransform () {
163
+ @ Override
145
164
public String transform (String source ) {
146
165
return UCharacter .toLowerCase (ULocale .ROOT , source );
147
166
}
@@ -151,6 +170,7 @@ public String transform(String source) {
151
170
add (
152
171
new StringTransformProperty (
153
172
new StringTransform () {
173
+ @ Override
154
174
public String transform (String source ) {
155
175
return UCharacter .toUpperCase (ULocale .ROOT , source );
156
176
}
@@ -160,6 +180,7 @@ public String transform(String source) {
160
180
add (
161
181
new StringTransformProperty (
162
182
new StringTransform () {
183
+ @ Override
163
184
public String transform (String source ) {
164
185
return UCharacter .toTitleCase (ULocale .ROOT , source , null );
165
186
}
@@ -170,6 +191,7 @@ public String transform(String source) {
170
191
add (
171
192
new StringTransformProperty (
172
193
new StringTransform () {
194
+ @ Override
173
195
public String transform (String source ) {
174
196
StringBuilder b = new StringBuilder ();
175
197
for (int cp : CharSequences .codePoints (source )) {
@@ -184,6 +206,7 @@ public String transform(String source) {
184
206
add (
185
207
new StringTransformProperty (
186
208
new StringTransform () {
209
+ @ Override
187
210
public String transform (String source ) {
188
211
String result = NFM .nfm .get (source );
189
212
return result == null ? source : result ;
@@ -201,6 +224,7 @@ public String transform(String source) {
201
224
add (
202
225
new CodepointTransformProperty (
203
226
new Transform <Integer , String >() {
227
+ @ Override
204
228
public String transform (Integer source ) {
205
229
return UnicodeUtilities .getSubheader ().getSubheader (source );
206
230
}
@@ -239,6 +263,9 @@ public String transform(Integer source) {
239
263
.setMain ("bmp" , "bmp" , UnicodeProperty .BINARY , "6.0" ));
240
264
241
265
addCollationProperty ();
266
+ addExamplarProperty (LocaleData .ES_STANDARD , "exem" , "exemplar" );
267
+ addExamplarProperty (LocaleData .ES_AUXILIARY , "exema" , "exemplar_aux" );
268
+ addExamplarProperty (LocaleData .ES_PUNCTUATION , "exemp" , "exemplar_punct" );
242
269
243
270
// set up the special script property
244
271
UnicodeProperty scriptProp = base .getProperty ("sc" );
@@ -251,7 +278,8 @@ public String transform(Integer source) {
251
278
.setMain ("Script_Extensions" , "scx" , UnicodeProperty .ENUMERATED , "1.1" )
252
279
.addValueAliases (
253
280
ScriptTester .getScriptSpecialsAlternates (),
254
- AliasAddAction .IGNORE_IF_MISSING ));
281
+ AliasAddAction .IGNORE_IF_MISSING )
282
+ .setMultivalued (true ));
255
283
256
284
CachedProps cp = CachedProps .CACHED_PROPS ;
257
285
for (String prop : cp .getAvailable ()) {
@@ -289,6 +317,81 @@ public String transform(Integer source) {
289
317
.setMain ("RGI_Emoji" , "RGI_Emoji" , UnicodeProperty .BINARY , "13.0" ));
290
318
}
291
319
320
+ private void addExamplarProperty (
321
+ int exemplarType , String propertyAbbreviation , String propertyName ) {
322
+ Multimap <Integer , String > data = TreeMultimap .create ();
323
+ Set <String > localeSet = new TreeSet <>();
324
+
325
+ for (ULocale ulocale : ULocale .getAvailableLocales ()) {
326
+ if (!ulocale .getCountry ().isEmpty () || !ulocale .getVariant ().isEmpty ()) {
327
+ continue ;
328
+ // we want to skip cases where characters are in the parent locale, but there is no
329
+ // ULocale parentLocale = ulocale.getParent();
330
+ }
331
+ UnicodeSet exemplarSet = LocaleData .getExemplarSet (ulocale , 0 , exemplarType );
332
+ if (!ulocale .getScript ().isEmpty ()) {
333
+ // we can't find out the parent locale or defaultContent locale in ICU, so we hack
334
+ // it
335
+ String langLocale = ulocale .getLanguage ();
336
+ UnicodeSet langExemplarSet =
337
+ LocaleData .getExemplarSet (new ULocale (langLocale ), 0 , exemplarType );
338
+ if (langExemplarSet .equals (exemplarSet )) {
339
+ continue ;
340
+ }
341
+ }
342
+ String locale = ulocale .toLanguageTag ();
343
+ localeSet .add (locale );
344
+ for (UnicodeSetIterator it = new UnicodeSetIterator (exemplarSet ); it .nextRange (); ) {
345
+ if (it .codepoint == UnicodeSetIterator .IS_STRING ) {
346
+ // flatten
347
+ int cp = 0 ;
348
+ for (int i = 0 ; i < it .string .length (); i += Character .charCount (cp )) {
349
+ cp = it .string .codePointAt (i );
350
+ data .put (cp , locale );
351
+ }
352
+ } else {
353
+ for (int cp = it .codepoint ; cp <= it .codepointEnd ; ++cp ) {
354
+ data .put (cp , locale );
355
+ }
356
+ }
357
+ }
358
+ }
359
+
360
+ // convert to UnicodeMap
361
+ UnicodeMap <String > unicodeMap = new UnicodeMap <>();
362
+ for (Entry <Integer , Collection <String >> entry : data .asMap ().entrySet ()) {
363
+ String value = JOIN_COMMAS .join (entry .getValue ()).intern ();
364
+ unicodeMap .put (entry .getKey (), value );
365
+ }
366
+ if (DEBUG_MULTI ) {
367
+ System .out .println ("\n " + propertyName );
368
+ for (UnicodeMap .EntryRange <String > entry : unicodeMap .entryRanges ()) {
369
+ System .out .println (
370
+ Utility .hex (entry .codepoint )
371
+ + (entry .codepoint == entry .codepointEnd
372
+ ? ""
373
+ : "-" + Utility .hex (entry .codepointEnd ))
374
+ + " ;\t "
375
+ + entry .value );
376
+ }
377
+ }
378
+
379
+ // put locales into right format
380
+ String [] localeList = localeSet .toArray (new String [localeSet .size ()]);
381
+ String [][] locales = new String [][] {localeList , localeList }; // abbreviations are the same
382
+
383
+ add (
384
+ new UnicodeProperty .UnicodeMapProperty ()
385
+ .set (unicodeMap )
386
+ .setMain (
387
+ propertyName ,
388
+ propertyAbbreviation ,
389
+ UnicodeProperty .ENUMERATED ,
390
+ "1.1" )
391
+ .addValueAliases (locales , AliasAddAction .ADD_MAIN_ALIAS )
392
+ .setMultivalued (true ));
393
+ }
394
+
292
395
private void addCollationProperty () {
293
396
RuleBasedCollator c = UnicodeSetUtilities .RAW_COLLATOR ;
294
397
// (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
@@ -652,6 +755,7 @@ public StringTransformProperty(
652
755
setUniformUnassigned (hasUniformUnassigned );
653
756
}
654
757
758
+ @ Override
655
759
protected String _getValue (int codepoint ) {
656
760
return transform .transform (UTF16 .valueOf (codepoint ));
657
761
}
@@ -666,6 +770,7 @@ public CodepointTransformProperty(
666
770
setUniformUnassigned (hasUniformUnassigned );
667
771
}
668
772
773
+ @ Override
669
774
protected String _getValue (int codepoint ) {
670
775
return transform .transform (codepoint );
671
776
}
@@ -682,6 +787,7 @@ public static class EncodingProperty extends SimpleProperty {
682
787
encoder = new CharEncoder (charset , false , false );
683
788
}
684
789
790
+ @ Override
685
791
protected String _getValue (int codepoint ) {
686
792
int len = encoder .getValue (codepoint , temp , 0 );
687
793
if (len < 0 ) {
@@ -697,6 +803,7 @@ protected String _getValue(int codepoint) {
697
803
return result .toString ();
698
804
}
699
805
806
+ @ Override
700
807
public boolean isDefault (int codepoint ) {
701
808
int len = encoder .getValue (codepoint , temp , 0 );
702
809
return len < 0 ;
@@ -716,6 +823,7 @@ public static class EncodingPropertyBoolean extends SimpleProperty {
716
823
encoder = new CharEncoder (charset , true , true );
717
824
}
718
825
826
+ @ Override
719
827
protected String _getValue (int codepoint ) {
720
828
return (encoder .getValue (codepoint , null , 0 ) > 0 ) ? "Yes" : "No" ;
721
829
}
@@ -731,6 +839,7 @@ public XPropertyFactory.UnicodeSetProperty set(UnicodeSet set) {
731
839
return this ;
732
840
}
733
841
842
+ @ Override
734
843
protected UnicodeMap <String > _getUnicodeMap () {
735
844
UnicodeMap <String > result = new UnicodeMap <String >();
736
845
result .putAll (unicodeSet , "Yes" );
@@ -743,10 +852,12 @@ public XPropertyFactory.UnicodeSetProperty set(String string) {
743
852
return set (new UnicodeSet (string ).freeze ());
744
853
}
745
854
855
+ @ Override
746
856
protected String _getValue (int codepoint ) {
747
857
return YESNO_ARRAY [unicodeSet .contains (codepoint ) ? 0 : 1 ];
748
858
}
749
859
860
+ @ Override
750
861
protected List _getAvailableValues (List result ) {
751
862
return YESNO ;
752
863
}
0 commit comments