21
21
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
22
// THE SOFTWARE.
23
23
//
24
- // Adapted from: https://github.com/durbrow/fix-double-utf8.swift
25
24
26
25
import Foundation
27
26
28
- var memoizer = [ String: UnicodeScalar] ( )
29
-
30
- func lookup( base: UnicodeScalar , combi: UnicodeScalar ) -> UnicodeScalar {
31
- let combined = " \( base) \( combi) "
32
-
33
- if let y = memoizer [ combined] {
34
- return y
35
- }
36
-
37
- for i in 0x80 ... 0xFF {
38
- let ch = UnicodeScalar ( i)
39
-
40
- if String ( ch) == combined {
41
- memoizer [ combined] = ch
42
- return ch
43
- }
44
- }
45
- let ch = UnicodeScalar ( 0xFFFD ) // Unicode replacement character �
46
-
47
- memoizer [ combined] = ch
48
- return ch
27
+ func fixDoubleUTF8( inout name: String ) {
28
+ let utf8 = name. dataUsingEncoding ( NSISOLatin1StringEncoding, allowLossyConversion: false ) !
29
+ let latin1 = NSString ( data: utf8, encoding: NSUTF8StringEncoding) !
30
+ name = latin1 as String
49
31
}
50
32
51
- func fixDoubleUTF8( inout name: String ) {
52
- var isASCII = true
53
- var y = [ UInt8] ( )
54
-
55
- for ch in name. unicodeScalars {
56
- if ch. value < 0x80 {
57
- y. append ( UInt8 ( ch) )
58
- continue
59
- }
60
- isASCII = false
61
-
62
- if ch. value < 0x100 {
63
- y. append ( UInt8 ( ch) )
64
- continue
65
- }
66
- // might be a combining character that when combined with the
67
- // preceeding character maps to a codepoint in the UTF8 range
68
- if y. count == 0 {
69
- return
70
- }
71
-
72
- let last = y. removeLast ( )
73
- let repl = lookup ( UnicodeScalar ( last) , ch)
74
-
75
- // the replacement needs to be in the UTF8 range
76
- if repl. value >= 0x100 {
77
- return
78
- }
79
-
80
- y. append ( UInt8 ( repl) )
81
- }
82
-
83
- if isASCII {
84
- return
85
- }
86
-
87
- y. append ( 0 ) // null terminator
88
-
89
- return y. withUnsafeBufferPointer {
90
- let cstr = UnsafePointer < CChar > ( $0. baseAddress) // typecase from uint8_t * to char *
91
- let rslt = String . fromCStringRepairingIllFormedUTF8 ( cstr) // -> (String, Bool)
92
- if let str = rslt. 0 {
93
- if !rslt. hadError {
94
- name = str
95
- }
96
- }
97
-
98
- return
99
- }
33
+ func doubleEncodeUTF8( inout str: String ) {
34
+ let latin1 = str. dataUsingEncoding ( NSUTF8StringEncoding) !
35
+ let utf8 = NSString ( data: latin1, encoding: NSISOLatin1StringEncoding) !
36
+ str = utf8 as String
100
37
}
0 commit comments