@@ -7,6 +7,144 @@ Licensed under MIT License, see LICENSE.md
7
7
Based initially on julia/test/strings/util.jl
8
8
=#
9
9
10
+ function _concat (T, a, b)
11
+ la = ncodeunits (a)
12
+ lb = ncodeunits (b)
13
+ buf, out = _allocate (T, la + lb)
14
+ @preserve a unsafe_copyto! (out, pointer (a), la)
15
+ @preserve b unsafe_copyto! (out + la, pointer (b), lb)
16
+ buf
17
+ end
18
+
19
+ function _string (T, a, b, rest)
20
+ la = ncodeunits (a)
21
+ lb = ncodeunits (b)
22
+ len = la + lb
23
+ @inbounds for str in rest
24
+ len += ncodeunits (str)
25
+ end
26
+ buf, out = _allocate (T, len)
27
+ @preserve a unsafe_copyto! (out, pointer (a), la)
28
+ out += la
29
+ @preserve b unsafe_copyto! (out, pointer (b), lb)
30
+ out += lb
31
+ @inbounds for str in rest
32
+ len = ncodeunits (str)
33
+ @preserve str unsafe_copyto! (out, pointer (str), len)
34
+ out += len
35
+ end
36
+ buf
37
+ end
38
+
39
+ function _string (T, coll)
40
+ len = 0
41
+ @inbounds for str in coll
42
+ len += ncodeunits (str)
43
+ end
44
+ buf, out = _allocate (T, len)
45
+ @inbounds for str in coll
46
+ len = ncodeunits (str)
47
+ @preserve str unsafe_copyto! (out, pointer (str), len)
48
+ out += len
49
+ end
50
+ buf
51
+ end
52
+
53
+ # Handle concatenation where all the same CSE for strings, and character set for characters
54
+ #=
55
+ """
56
+ WIP: this is rather tricky.
57
+ It really should handle any type of Chr / Str / CSE, not just the ones defined
58
+ in CharSetEncodings, ChrBase and StrBase
59
+ Ideally, it could also handle mixes with String and Char (or other AbstractString / AbstractChar
60
+ types.
61
+ It may need to do two or even three passes, one to determine the correct type to be output,
62
+ another to determine the output length, and finally another to copy the strings / characters into
63
+ the buffer.
64
+ The result type should be based on promotion rules, i.e. outputting UCS2Str if only ASCII, Latin, UCS2 characters and strings are in the list.
65
+ This is difficult to do in a way that will still be type stable.
66
+ """
67
+
68
+ function _string_chr(a::Union{<:Chr{CS,T}, <:Str{C}, SubString{<:Str{C}}}...
69
+ ) where {CS<:CharSet,T,C<:CSE{CS}}
70
+ len = 0
71
+ for v in a
72
+ if v isa Chr
73
+ len += 1
74
+ else
75
+ len += ncodeunits(v)
76
+ end
77
+ end
78
+ buf, out = _allocate(T, len)
79
+ for v in a
80
+ len = ncodeunits(str)
81
+ @preserve str unsafe_copyto!(out, pointer(str), len)
82
+ out += len
83
+ end
84
+ buf
85
+ end
86
+ =#
87
+
88
+ string (c:: MaybeSub{<:Str} ) = c
89
+ string (c:: MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}} ...) = Str (LatinCSE, _string (UInt8, c))
90
+ string (c:: MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}} ...) = Str (UTF8CSE, _string (UInt8, c))
91
+ string (c:: MaybeSub{<:Str{<:UCS2_CSEs}} ...) = Str (UCS2CSE, _string (UInt16, c))
92
+ string (c:: MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}} ...) = Str (UTF16CSE, _string (UInt16, c))
93
+ string (c:: MaybeSub{<:Str{<:UTF32_CSEs}} ...) = Str (UTF32CSE, _string (UInt32, c))
94
+
95
+ #=
96
+ const MS_Str{C} = MaybeSub{<:Str{C}}
97
+ string(a::MS_Str{C}, b::MS_Str{C}) where {C<:CSE} = Str(C, _concat(codeunit(C), a, b))
98
+ string(a::MS_Str{C}, b::MS_Str{C}, c::MS_Str{C}...) where {C<:CSE} =
99
+ Str(C, _string(codeunit(C), a, b, c))
100
+
101
+ string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
102
+ string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
103
+ string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
104
+
105
+ const MS_AL = MS_Str{<:Union{ASCIICSE,Latin_CSEs}}
106
+ string(a::MS_AL, b::MS_AL) = Str(LatinCSE, _concat(UInt8, a, b))
107
+ string(a::MS_AL, b::MS_AL, c::MS_AL...) = Str(LatinCSE, _string(UInt8, a, b, c))
108
+
109
+ const MS_AU = MS_Str{<:Union{ASCIICSE,UTF8CSE}}
110
+ string(a::MS_AU, b::MS_AU) = Str(UTF8CSE, _concat(UInt8, a, b))
111
+ string(a::MS_AU, b::MS_AU, c::MS_AU...) = Str(UTF8CSE, _string(UInt8, a, b, c))
112
+
113
+ const MS_U2 = MS_Str{<:UCS2_CSEs}
114
+ string(a::MS_U2, b::MS_U2) = Str(UCS2CSE, _concat(UInt16, a, b))
115
+ string(a::MS_U2, b::MS_U2, c::MS_U2...) = Str(UCS2CSE, _string(UInt16, a, b, c))
116
+
117
+ const MS_UT = MS_Str{<:Union{UCS2_CSEs,UTF16CSE}}
118
+ string(a::MS_UT, b::MS_UT) = Str(UTF16CSE, _concat(UInt16, a, b))
119
+ string(a::MS_UT, b::MS_UT, c::MS_UT...) = Str(UTF16CSE, _string(UInt16, a, b, c))
120
+
121
+ const MS_U4 = MS_Str{<:UTF32_CSEs}
122
+ string(a::MS_U4, b::MS_U4) = Str(UTF32CSE, _concat(UInt32, a, b))
123
+ string(a::MS_U4, b::MS_U4, c::MS_U4...) = Str(UTF32CSE, _string(UInt32, a, b, c))
124
+ =#
125
+
126
+ #=
127
+ string(c::MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}}...) =
128
+ length(c) == 1 ? c[1] : Str(LatinCSE, _string(UInt8, c))
129
+
130
+ string(c::MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}}...) =
131
+ length(c) == 1 ? c[1] : Str(UTF8CSE, _string(UInt8, c))
132
+
133
+ string(c::MaybeSub{<:Str{<:UCS2_CSEs}}...) =
134
+ length(c) == 1 ? c[1] : Str(UCS2CSE, _string(UInt16, c))
135
+
136
+ string(c::MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}}...) =
137
+ length(c) == 1 ? c[1] : Str(UTF16CSE, _string(UInt16, c))
138
+
139
+ string(c::MaybeSub{<:Str{<:UTF32_CSEs}}...) =
140
+ length(c) == 1 ? c[1] : Str(UTF32CSE, _string(UInt32, c))
141
+ =#
142
+ string (c:: MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}} ...) = Str (LatinCSE, _string (UInt8, c))
143
+ string (c:: MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}} ...) = Str (UTF8CSE, _string (UInt8, c))
144
+ string (c:: MaybeSub{<:Str{<:UCS2_CSEs}} ...) = Str (UCS2CSE, _string (UInt16, c))
145
+ string (c:: MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}} ...) = Str (UTF16CSE, _string (UInt16, c))
146
+ string (c:: MaybeSub{<:Str{<:UTF32_CSEs}} ...) = Str (UTF32CSE, _string (UInt32, c))
147
+
10
148
# starts with and ends with predicates
11
149
12
150
starts_with (a:: MaybeSub{<:Str{C}} , b:: MaybeSub{<:Str{C}} ) where {C<: CSE } =
0 commit comments