@@ -5,31 +5,16 @@ Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones
5
5
Licensed under MIT License, see LICENSE.md
6
6
=#
7
7
8
- _wide_lower_l (c) = ifelse (c > (V6_COMPAT ? 0xdf : 0xde ), c != 0xf7 , c == 0xb5 )
9
-
10
- @inline _wide_lower_ch (ch) =
11
- ch <= 0x7f ? _islower_a (ch) : (ch > 0xff ? _islower_u (ch) : _wide_lower_l (ch))
12
-
13
- @inline _isupper_ch (ch) =
14
- ch <= 0x7f ? _isupper_a (ch) : (ch > 0xff ? _isupper_u (ch) : _isupper_l (ch))
15
-
16
- _wide_lower_latin (ch) = (ch == 0xb5 ) | (ch == 0xff ) | (! V6_COMPAT && (ch == 0xdf ))
17
-
18
- _wide_out_upper (ch) =
19
- ifelse (ch == 0xb5 , 0x39c ,
20
- ifelse (ch == 0xff , 0x178 , ifelse (! V6_COMPAT && ch == 0xdf , 0x1e9e , ch% UInt16)))
21
-
22
-
23
8
function uppercase_first (str:: MaybeSub{S} ) where {C<: ASCIICSE ,S<: Str{C} }
24
9
(len = ncodeunits (str)) == 0 && return str
25
10
@preserve str begin
26
11
pnt = pointer (str)
27
12
ch = get_codeunit (pnt)
28
13
_islower_a (ch) || return str
29
- out = _allocate (len)
14
+ buf, out = _allocate (UInt8, len)
30
15
unsafe_copyto! (out, pnt, len)
31
16
set_codeunit! (out, ch - 0x20 )
32
- Str (C, out )
17
+ Str (C, buf )
33
18
end
34
19
end
35
20
@@ -39,10 +24,10 @@ function lowercase_first(str::MaybeSub{S}) where {C<:ASCIICSE,S<:Str{C}}
39
24
pnt = pointer (str)
40
25
ch = get_codeunit (pnt)
41
26
_isupper_a (ch) || return str
42
- out = _allocate (len)
27
+ buf, out = _allocate (UInt8, len)
43
28
unsafe_copyto! (out, pnt, len)
44
29
set_codeunit! (out, ch + 0x20 )
45
- Str (C, out )
30
+ Str (C, buf )
46
31
end
47
32
end
48
33
@@ -119,7 +104,7 @@ function uppercase_first(str::MaybeSub{S}) where {C<:LatinCSE,S<:Str{C}}
119
104
_can_upper (ch) || return str
120
105
buf, out = _allocate (UInt8, len)
121
106
set_codeunit! (out, ch - 0x20 )
122
- len > 1 && unsafe_copyto! (out, pnt+ 1 , len- 1 )
107
+ len > 1 && unsafe_copyto! (out + 1 , pnt+ 1 , len- 1 )
123
108
Str (C, buf)
124
109
end
125
110
end
@@ -154,10 +139,10 @@ function lowercase_first(str::MaybeSub{S}) where {C<:Latin_CSEs,S<:Str{C}}
154
139
@preserve str begin
155
140
pnt = pointer (str)
156
141
ch = get_codeunit (pnt)
157
- _isupper (ch) || return str
142
+ _isupper_al (ch) || return str
158
143
buf, out = _allocate (UInt8, len)
159
144
set_codeunit! (out, ch + 0x20 )
160
- len > 1 && unsafe_copyto! (out, pnt+ 1 , len- 1 )
145
+ len > 1 && unsafe_copyto! (out+ 1 , pnt+ 1 , len- 1 )
161
146
Str (C, buf)
162
147
end
163
148
end
@@ -176,7 +161,7 @@ function _upper(::Type{C}, beg::Ptr{UInt8}, off, len) where {C<:_LatinCSE}
176
161
out += off
177
162
while out < fin
178
163
ch = get_codeunit (out)
179
- _can_upper (ch) && set_codeunit! (out, ch - 0x20 )
164
+ _islower (ch) && set_codeunit! (out, ch - 0x20 )
180
165
out += 1
181
166
end
182
167
Str (C, buf)
264
249
# result must have at least one character > 0xff, so if the only character(s)
265
250
# > 0xff became <= 0xff, then the result may need to be narrowed and returned as _LatinStr
266
251
267
- function _lower (:: Type{C} , beg, off, len) where {C<: _UCS2CSE }
252
+ function _lower (:: Type{C} , beg, off, len) where {C<: Union{ _UCS2CSE} }
268
253
CU = codeunit (C)
269
254
buf, out = _allocate (CU, len)
270
255
unsafe_copyto! (out, beg, len)
@@ -277,18 +262,20 @@ function _lower(::Type{C}, beg, off, len) where {C<:_UCS2CSE}
277
262
_isupper_a (ch) && set_codeunit! (out, ch += 0x20 )
278
263
elseif ch <= 0xff
279
264
_isupper_l (ch) && set_codeunit! (out, ch += 0x20 )
280
- elseif _isupper_u (ch)
281
- ch = _lowercase_u (ch)
282
- flg = ch <= 0xff
283
- set_codeunit! (out, ch)
265
+ elseif ch <= 0xffff
266
+ if _can_lower_bmp (ch)
267
+ ch = _lower_bmp (ch)
268
+ flg = ch <= 0xff
269
+ set_codeunit! (out, ch)
270
+ end
284
271
end
285
272
out += sizeof (CU)
286
273
end
287
274
if flg && is_latin (buf)
288
275
out = pointer (buf)
289
- buf = _allocate (len)
290
- _narrow! (pointer (buf ), out, out + len)
291
- Str (_LatinCSE, buf )
276
+ buf8 = _allocate (len)
277
+ _narrow! (pointer (buf8 ), out, out + len)
278
+ Str (_LatinCSE, buf8 )
292
279
else
293
280
Str (C, buf)
294
281
end
@@ -302,25 +289,74 @@ function _lower(::Type{C}, beg, off, len) where {C<:Union{UCS2CSE,UTF32_CSEs}}
302
289
out += off
303
290
while out < fin
304
291
ch = get_codeunit (out)
305
- if ch <= 0x7f
306
- _isupper_a (ch) && set_codeunit! (out, ch += 0x20 )
307
- elseif ch <= 0xff
308
- _isupper_l (ch) && set_codeunit! (out, ch += 0x20 )
309
- elseif _isupper_u (ch)
310
- set_codeunit! (out, _lowercase_u (ch))
292
+ if ch <= 0xff
293
+ _isupper_al (ch) && set_codeunit! (out, ch += 0x20 )
294
+ elseif ch <= 0xffff
295
+ _can_lower_bmp (ch) && set_codeunit! (out, _lower_bmp (ch) )
296
+ elseif ch <= 0x1ffff
297
+ _can_lower_slp (ch) && set_codeunit! (out, _lower_slp (ch))
311
298
end
312
299
out += sizeof (CU)
313
300
end
314
301
Str (C, buf)
315
302
end
316
303
304
+ function lowercase_first (str:: MaybeSub{S} ) where {C<: _UCS2CSE ,S<: Str{C} }
305
+ (len = ncodeunits (str)) == 0 && return str
306
+ @preserve str begin
307
+ pnt = pointer (str)
308
+ ch = get_codeunit (pnt)
309
+ (ch <= 0xff ? _isupper_al (ch) : ch <= 0xffff ? _can_lower_bmp (ch) :
310
+ ch <= 0x1ffff && _can_lower_slp (ch)) ||
311
+ return str
312
+ cl = _lower_ch (ch)
313
+ if ch > 0xff && cl <= 0xff && _check_mask_ul (pnt+ 1 , len- 1 , _latin_mask (UInt16))
314
+ buf8, out8 = _allocate (UInt8, len)
315
+ len > 1 && _narrow! (out8 + 1 , pnt + 1 , pnt + len - 1 )
316
+ set_codeunit! (out8, cl)
317
+ Str (_LatinCSE, buf8)
318
+ else
319
+ buf, out = _allocate (codeunit (C), len)
320
+ len > 1 && unsafe_copyto! (out, pnt, len)
321
+ set_codeunit! (out, cl)
322
+ Str (C, buf)
323
+ end
324
+ end
325
+ end
326
+
327
+ function uppercase_first (str:: MaybeSub{S} ) where {C<: Union{UCS2_CSEs,UTF32_CSEs} ,S<: Str{C} }
328
+ (len = ncodeunits (str)) == 0 && return str
329
+ @preserve str begin
330
+ pnt = pointer (str)
331
+ ch = get_codeunit (pnt)
332
+ _can_title_ch (ch) || return str
333
+ buf, out = _allocate (codeunit (C), len)
334
+ len > 1 && unsafe_copyto! (out, pnt, len)
335
+ set_codeunit! (out, _title_ch (ch))
336
+ Str (C, buf)
337
+ end
338
+ end
339
+
340
+ function lowercase_first (str:: MaybeSub{S} ) where {C<: Union{UCS2CSE,UTF32_CSEs} ,S<: Str{C} }
341
+ (len = ncodeunits (str)) == 0 && return str
342
+ @preserve str begin
343
+ pnt = pointer (str)
344
+ ch = get_codeunit (pnt)
345
+ _can_lower_ch (ch) || return str
346
+ buf, out = _allocate (codeunit (C), len)
347
+ len > 1 && unsafe_copyto! (out, pnt, len)
348
+ set_codeunit! (out, _lower_ch (ch))
349
+ Str (C, buf)
350
+ end
351
+ end
352
+
317
353
function lowercase (str:: MaybeSub{S} ) where {C<: Union{UCS2_CSEs,UTF32_CSEs} ,S<: Str{C} }
318
354
@preserve str begin
319
355
CU = codeunit (C)
320
356
pnt = beg = pointer (str)
321
357
fin = beg + sizeof (str)
322
358
while pnt < fin
323
- _isupper_ch (get_codeunit (pnt)) && return _lower (C, beg, pnt- beg, ncodeunits (str))
359
+ _can_lower_ch (get_codeunit (pnt)) && return _lower (C, beg, pnt- beg, ncodeunits (str))
324
360
pnt += sizeof (CU)
325
361
end
326
362
end
@@ -337,16 +373,12 @@ function _upper(::Type{C}, beg, off, len) where {C<:Union{UCS2_CSEs,UTF32_CSEs}}
337
373
ch = get_codeunit (out)
338
374
if ch <= 0x7f
339
375
_islower_a (ch) && set_codeunit! (out, ch -= 0x20 )
340
- elseif ch > 0xff
341
- _islower_u (ch) && set_codeunit! (out, _uppercase_u (ch))
342
- elseif _can_upper (ch)
343
- set_codeunit! (out, ch -= 0x20 )
344
- elseif ch == 0xb5
345
- set_codeunit! (out, 0x39c )
346
- elseif ch == 0xff
347
- set_codeunit! (out, 0x178 )
348
- elseif ! V6_COMPAT && ch == 0xdf
349
- set_codeunit! (out, 0x1e9e )
376
+ elseif ch <= 0xff
377
+ set_codeunit! (out, _uppercase_l (ch))
378
+ elseif ch <= 0xffff
379
+ _can_upper_bmp (ch) && set_codeunit! (out, _upper_bmp (ch))
380
+ elseif ch <= 0x1ffff
381
+ _can_upper_slp (ch) && set_codeunit! (out, _upper_slp (ch))
350
382
end
351
383
out += sizeof (CU)
352
384
end
@@ -359,7 +391,7 @@ function uppercase(str::MaybeSub{S}) where {C<:Union{UCS2_CSEs,UTF32_CSEs},S<:St
359
391
pnt = beg = pointer (str)
360
392
fin = beg + sizeof (str)
361
393
while pnt < fin
362
- _wide_lower_ch (get_codeunit (pnt)) && return _upper (C, beg, pnt- beg, ncodeunits (str))
394
+ _can_upper_ch (get_codeunit (pnt)) && return _upper (C, beg, pnt- beg, ncodeunits (str))
363
395
pnt += sizeof (CU)
364
396
end
365
397
str
0 commit comments