1
- use core:: iter:: FusedIterator ;
1
+ use core:: { iter:: FusedIterator , marker :: PhantomData } ;
2
2
3
3
use tinyvec:: ArrayVec ;
4
4
@@ -42,17 +42,27 @@ impl JamoKind {
42
42
}
43
43
}
44
44
45
- /// Iterator over a string's characters, with '\u{115F}' and '\u{1160}' inserted
46
- /// where needed to ensure all Korean syllable blocks are in standard form
47
- /// by [UAX29 rules](https://www.unicode.org/reports/tr29/#Standard_Korean_Syllables).
45
+ trait NormalizeKoreanSyllables {
46
+ fn insert_fillers (
47
+ next_c : Option < char > ,
48
+ prev_end_jamo_kind : Option < JamoKind > ,
49
+ next_start_jamo_kind : Option < JamoKind > ,
50
+ buf : & mut ArrayVec < [ Option < char > ; 3 ] > ,
51
+ ) -> Option < char > ;
52
+ }
53
+
54
+ // Used to abstract over UAX29 and KS X 1026-1 rules
48
55
#[ derive( Clone , Debug ) ]
49
- pub struct StandardKoreanSyllables < I > {
56
+ struct StandardizeKoreanSyllablesInner < I , N > {
50
57
prev_end_jamo_kind : Option < JamoKind > ,
51
58
buf : ArrayVec < [ Option < char > ; 3 ] > ,
52
59
inner : I ,
60
+ normalizer : PhantomData < N > ,
53
61
}
54
62
55
- impl < I : Iterator < Item = char > > Iterator for StandardKoreanSyllables < I > {
63
+ impl < I : Iterator < Item = char > , N : NormalizeKoreanSyllables > Iterator
64
+ for StandardizeKoreanSyllablesInner < I , N >
65
+ {
56
66
type Item = char ;
57
67
58
68
fn next ( & mut self ) -> Option < Self :: Item > {
@@ -65,7 +75,7 @@ impl<I: Iterator<Item = char>> Iterator for StandardKoreanSyllables<I> {
65
75
next_c. map_or ( ( None , None ) , JamoKind :: of) ;
66
76
self . prev_end_jamo_kind = next_end_jamo_kind;
67
77
68
- insert_fillers (
78
+ N :: insert_fillers (
69
79
next_c,
70
80
prev_end_jamo_kind,
71
81
next_start_jamo_kind,
@@ -87,50 +97,169 @@ impl<I: Iterator<Item = char>> Iterator for StandardKoreanSyllables<I> {
87
97
}
88
98
}
89
99
90
- impl < I : Iterator < Item = char > + FusedIterator > FusedIterator for StandardKoreanSyllables < I > { }
100
+ impl < I : Iterator < Item = char > + FusedIterator , N : NormalizeKoreanSyllables > FusedIterator
101
+ for StandardizeKoreanSyllablesInner < I , N >
102
+ {
103
+ }
91
104
92
- #[ inline]
93
- fn insert_fillers (
94
- next_c : Option < char > ,
95
- prev_end_jamo_kind : Option < JamoKind > ,
96
- next_start_jamo_kind : Option < JamoKind > ,
97
- buf : & mut ArrayVec < [ Option < char > ; 3 ] > ,
98
- ) -> Option < char > {
99
- match ( prev_end_jamo_kind, next_start_jamo_kind) {
100
- // Insert choseong filler before V not preceded by L or V
101
- ( None , Some ( JamoKind :: V ) ) | ( Some ( JamoKind :: T ) , Some ( JamoKind :: V ) ) => {
102
- buf. push ( next_c) ;
103
- Some ( '\u{115F}' )
104
- }
105
- // Insert choseong and jungseong fillers before T preceded non-jamo
106
- ( None , Some ( JamoKind :: T ) ) => {
107
- buf. push ( next_c) ;
108
- buf. push ( Some ( '\u{1160}' ) ) ;
109
- Some ( '\u{115F}' )
110
- }
111
- // Insert V filler between L and non-jamo
112
- ( Some ( JamoKind :: L ) , None ) => {
113
- buf. push ( next_c) ;
114
- Some ( '\u{1160}' )
105
+ impl < I , N > StandardizeKoreanSyllablesInner < I , N > {
106
+ #[ inline]
107
+ fn new ( iter : I ) -> Self {
108
+ Self {
109
+ prev_end_jamo_kind : None ,
110
+ buf : ArrayVec :: new ( ) ,
111
+ inner : iter,
112
+ normalizer : PhantomData ,
115
113
}
116
- // For L followed by T, insert V filler, L filler, then another V filler
117
- ( Some ( JamoKind :: L ) , Some ( JamoKind :: T ) ) => {
118
- buf. push ( next_c) ;
119
- buf. push ( Some ( '\u{1160}' ) ) ;
120
- buf. push ( Some ( '\u{115F}' ) ) ;
121
- Some ( '\u{1160}' )
114
+ }
115
+ }
116
+
117
+ // UAX 29 normalization
118
+
119
+ #[ derive( Clone , Debug ) ]
120
+ struct Uax29 ;
121
+
122
+ impl NormalizeKoreanSyllables for Uax29 {
123
+ #[ inline]
124
+ fn insert_fillers (
125
+ next_c : Option < char > ,
126
+ prev_end_jamo_kind : Option < JamoKind > ,
127
+ next_start_jamo_kind : Option < JamoKind > ,
128
+ buf : & mut ArrayVec < [ Option < char > ; 3 ] > ,
129
+ ) -> Option < char > {
130
+ match ( prev_end_jamo_kind, next_start_jamo_kind) {
131
+ // Insert choseong filler before V not preceded by L or V
132
+ ( None , Some ( JamoKind :: V ) ) | ( Some ( JamoKind :: T ) , Some ( JamoKind :: V ) ) => {
133
+ buf. push ( next_c) ;
134
+ Some ( '\u{115F}' )
135
+ }
136
+ // Insert choseong and jungseong fillers before T preceded non-jamo
137
+ ( None , Some ( JamoKind :: T ) ) => {
138
+ buf. push ( next_c) ;
139
+ buf. push ( Some ( '\u{1160}' ) ) ;
140
+ Some ( '\u{115F}' )
141
+ }
142
+ // Insert V filler between L and non-jamo
143
+ ( Some ( JamoKind :: L ) , None ) => {
144
+ buf. push ( next_c) ;
145
+ Some ( '\u{1160}' )
146
+ }
147
+ // For L followed by T, insert V filler, L filler, then another V filler
148
+ ( Some ( JamoKind :: L ) , Some ( JamoKind :: T ) ) => {
149
+ buf. push ( next_c) ;
150
+ buf. push ( Some ( '\u{1160}' ) ) ;
151
+ buf. push ( Some ( '\u{115F}' ) ) ;
152
+ Some ( '\u{1160}' )
153
+ }
154
+ _ => next_c,
122
155
}
123
- _ => next_c,
124
156
}
125
157
}
126
158
127
- impl < I > StandardKoreanSyllables < I > {
159
+ /// Iterator over a string's characters, with U+115F and U+1160 inserted
160
+ /// where needed to ensure all Korean syllable blocks are in standard form
161
+ /// by [UAX29 rules](https://www.unicode.org/reports/tr29/#Standard_Korean_Syllables).
162
+ #[ derive( Clone , Debug ) ]
163
+ pub struct StandardizeKoreanSyllables < I > ( StandardizeKoreanSyllablesInner < I , Uax29 > ) ;
164
+
165
+ impl < I > StandardizeKoreanSyllables < I > {
128
166
#[ inline]
129
167
pub ( crate ) fn new ( iter : I ) -> Self {
130
- Self {
131
- prev_end_jamo_kind : None ,
132
- buf : ArrayVec :: new ( ) ,
133
- inner : iter,
168
+ Self ( StandardizeKoreanSyllablesInner :: new ( iter) )
169
+ }
170
+ }
171
+
172
+ impl < I : Iterator < Item = char > > Iterator for StandardizeKoreanSyllables < I > {
173
+ type Item = char ;
174
+
175
+ fn next ( & mut self ) -> Option < Self :: Item > {
176
+ self . 0 . next ( )
177
+ }
178
+
179
+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
180
+ self . 0 . size_hint ( )
181
+ }
182
+ }
183
+
184
+ impl < I : Iterator < Item = char > + FusedIterator > FusedIterator for StandardizeKoreanSyllables < I > { }
185
+
186
+ // KS X 1026 1 normalization
187
+
188
+ #[ cfg( feature = "ks_x_1026-1" ) ]
189
+ #[ derive( Clone , Debug ) ]
190
+ struct KsX1026_1 ;
191
+
192
+ #[ cfg( feature = "ks_x_1026-1" ) ]
193
+ impl NormalizeKoreanSyllables for KsX1026_1 {
194
+ #[ inline]
195
+ fn insert_fillers (
196
+ next_c : Option < char > ,
197
+ prev_end_jamo_kind : Option < JamoKind > ,
198
+ next_start_jamo_kind : Option < JamoKind > ,
199
+ buf : & mut ArrayVec < [ Option < char > ; 3 ] > ,
200
+ ) -> Option < char > {
201
+ match ( prev_end_jamo_kind, next_start_jamo_kind) {
202
+ // Insert choseong filler before V preceded by V, T or non-jamo
203
+ ( None , Some ( JamoKind :: V ) )
204
+ | ( Some ( JamoKind :: V ) , Some ( JamoKind :: V ) )
205
+ | ( Some ( JamoKind :: T ) , Some ( JamoKind :: V ) ) => {
206
+ buf. push ( next_c) ;
207
+ Some ( '\u{115F}' )
208
+ }
209
+ // Insert choseong and jungseong fillers before T preceded by T or non-jamo
210
+ ( None , Some ( JamoKind :: T ) ) | ( Some ( JamoKind :: T ) , Some ( JamoKind :: T ) ) => {
211
+ buf. push ( next_c) ;
212
+ buf. push ( Some ( '\u{1160}' ) ) ;
213
+ Some ( '\u{115F}' )
214
+ }
215
+ // Insert V filler between L and non-jamo or other L
216
+ ( Some ( JamoKind :: L ) , None ) | ( Some ( JamoKind :: L ) , Some ( JamoKind :: L ) ) => {
217
+ buf. push ( next_c) ;
218
+ Some ( '\u{1160}' )
219
+ }
220
+ // For L followed by T, insert V filler, L filler, then another V filler
221
+ ( Some ( JamoKind :: L ) , Some ( JamoKind :: T ) ) => {
222
+ buf. push ( next_c) ;
223
+ buf. push ( Some ( '\u{1160}' ) ) ;
224
+ buf. push ( Some ( '\u{115F}' ) ) ;
225
+ Some ( '\u{1160}' )
226
+ }
227
+ _ => next_c,
134
228
}
135
229
}
136
230
}
231
+
232
+ /// Iterator over a string's characters, with U+115F and U+1160 inserted
233
+ /// where needed to ensure all Korean syllable blocks are in standard form
234
+ /// by [KS X 1026-1](http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3422.pdf) rules.
235
+ #[ cfg( feature = "ks_x_1026-1" ) ]
236
+ #[ cfg_attr( docsrs, doc( cfg( feature = "ks_x_1026-1" ) ) ) ]
237
+ #[ derive( Clone , Debug ) ]
238
+ pub struct StandardizeKoreanSyllablesKsX1026_1 < I > ( StandardizeKoreanSyllablesInner < I , KsX1026_1 > ) ;
239
+
240
+ #[ cfg( feature = "ks_x_1026-1" ) ]
241
+ impl < I > StandardizeKoreanSyllablesKsX1026_1 < I > {
242
+ #[ inline]
243
+ pub ( crate ) fn new ( iter : I ) -> Self {
244
+ Self ( StandardizeKoreanSyllablesInner :: new ( iter) )
245
+ }
246
+ }
247
+
248
+ #[ cfg( feature = "ks_x_1026-1" ) ]
249
+ impl < I : Iterator < Item = char > > Iterator for StandardizeKoreanSyllablesKsX1026_1 < I > {
250
+ type Item = char ;
251
+
252
+ fn next ( & mut self ) -> Option < Self :: Item > {
253
+ self . 0 . next ( )
254
+ }
255
+
256
+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
257
+ self . 0 . size_hint ( )
258
+ }
259
+ }
260
+
261
+ #[ cfg( feature = "ks_x_1026-1" ) ]
262
+ impl < I : Iterator < Item = char > + FusedIterator > FusedIterator
263
+ for StandardizeKoreanSyllablesKsX1026_1 < I >
264
+ {
265
+ }
0 commit comments