|
15 | 15 | */
|
16 | 16 | package com.diffplug.selfie
|
17 | 17 |
|
18 |
| -actual class PerCharacterEscaper { |
19 |
| - actual fun escape(input: String): String = TODO() |
20 |
| - actual fun unescape(input: String): String = TODO() |
| 18 | +/** |
| 19 | + * If your escape policy is "'123", it means this: |
| 20 | + * ``` |
| 21 | + * abc->abc |
| 22 | + * 123->'1'2'3 |
| 23 | + * I won't->I won''t |
| 24 | + * ``` |
| 25 | + */ |
| 26 | +actual class PerCharacterEscaper |
| 27 | +/** |
| 28 | + * The first character in the string will be uses as the escape character, and all characters will |
| 29 | + * be escaped. |
| 30 | + */ |
| 31 | +private constructor( |
| 32 | + private val escapeCodePoint: Int, |
| 33 | + private val escapedCodePoints: IntArray, |
| 34 | + private val escapedByCodePoints: IntArray |
| 35 | +) { |
| 36 | + val MIN_SUPPLEMENTARY_CODE_POINT = 0x010000 |
| 37 | + val MAX_CODE_POINT = 0X10FFFF |
| 38 | + val MIN_LOW_SURROGATE = '\uDC00' |
| 39 | + val MIN_HIGH_SURROGATE = '\uD800' |
| 40 | + private fun highSurrogate(codePoint: Int): Char { |
| 41 | + return ((codePoint ushr 10) + |
| 42 | + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT ushr 10)).code) |
| 43 | + .toChar() |
| 44 | + } |
| 45 | + private fun lowSurrogate(codePoint: Int): Char { |
| 46 | + return ((codePoint and 0x3ff) + MIN_LOW_SURROGATE.code).toChar() |
| 47 | + } |
| 48 | + private fun toSurrogates(codePoint: Int, dst: CharArray, index: Int) { |
| 49 | + // We write elements "backwards" to guarantee all-or-nothing |
| 50 | + dst[index + 1] = lowSurrogate(codePoint) |
| 51 | + dst[index] = highSurrogate(codePoint) |
| 52 | + } |
| 53 | + private fun toChars(codePoint: Int): CharArray { |
| 54 | + return if (isBmpCodePoint(codePoint)) { |
| 55 | + charArrayOf(codePoint.toChar()) |
| 56 | + } else if (isValidCodePoint(codePoint)) { |
| 57 | + val result = CharArray(2) |
| 58 | + toSurrogates(codePoint, result, 0) |
| 59 | + result |
| 60 | + } else { |
| 61 | + throw IllegalArgumentException("Not a valid Unicode code point: $codePoint") |
| 62 | + } |
| 63 | + } |
| 64 | + private fun isBmpCodePoint(codePoint: Int): Boolean { |
| 65 | + return codePoint ushr 16 == 0 |
| 66 | + } |
| 67 | + private fun isValidCodePoint(codePoint: Int): Boolean { |
| 68 | + // Optimized form of: |
| 69 | + // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT |
| 70 | + val plane = codePoint ushr 16 |
| 71 | + return plane < MAX_CODE_POINT + 1 ushr 16 |
| 72 | + } |
| 73 | + private fun charCount(codePoint: Int): Int { |
| 74 | + return if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 2 else 1 |
| 75 | + } |
| 76 | + private fun firstOffsetNeedingEscape(input: String): Int { |
| 77 | + val length = input.length |
| 78 | + var firstOffsetNeedingEscape = -1 |
| 79 | + var offset = 0 |
| 80 | + outer@ while (offset < length) { |
| 81 | + val codepoint = codePointAt(input, offset) |
| 82 | + for (escaped in escapedCodePoints) { |
| 83 | + if (codepoint == escaped) { |
| 84 | + firstOffsetNeedingEscape = offset |
| 85 | + break@outer |
| 86 | + } |
| 87 | + } |
| 88 | + offset += charCount(codepoint) |
| 89 | + } |
| 90 | + return firstOffsetNeedingEscape |
| 91 | + } |
| 92 | + actual fun escape(input: String): String { |
| 93 | + val noEscapes = firstOffsetNeedingEscape(input) |
| 94 | + return if (noEscapes == -1) { |
| 95 | + input |
| 96 | + } else { |
| 97 | + val length = input.length |
| 98 | + val needsEscapes = length - noEscapes |
| 99 | + val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4) |
| 100 | + builder.append(input, 0, noEscapes) |
| 101 | + var offset = noEscapes |
| 102 | + while (offset < length) { |
| 103 | + val codepoint = codePointAt(input, offset) |
| 104 | + offset += charCount(codepoint) |
| 105 | + val idx = indexOf(escapedCodePoints, codepoint) |
| 106 | + if (idx == -1) { |
| 107 | + builder.append(toChars(codepoint)) |
| 108 | + } else { |
| 109 | + builder.append(toChars(escapeCodePoint)) |
| 110 | + builder.append(toChars(escapedByCodePoints[idx])) |
| 111 | + } |
| 112 | + } |
| 113 | + builder.toString() |
| 114 | + } |
| 115 | + } |
| 116 | + private fun firstOffsetNeedingUnescape(input: String): Int { |
| 117 | + val length = input.length |
| 118 | + var firstOffsetNeedingEscape = -1 |
| 119 | + var offset = 0 |
| 120 | + while (offset < length) { |
| 121 | + val codepoint = codePointAt(input, offset) |
| 122 | + if (codepoint == escapeCodePoint) { |
| 123 | + firstOffsetNeedingEscape = offset |
| 124 | + break |
| 125 | + } |
| 126 | + offset += charCount(codepoint) |
| 127 | + } |
| 128 | + return firstOffsetNeedingEscape |
| 129 | + } |
| 130 | + actual fun unescape(input: String): String { |
| 131 | + val noEscapes = firstOffsetNeedingUnescape(input) |
| 132 | + return if (noEscapes == -1) { |
| 133 | + input |
| 134 | + } else { |
| 135 | + val length = input.length |
| 136 | + val needsEscapes = length - noEscapes |
| 137 | + val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4) |
| 138 | + builder.append(input, 0, noEscapes) |
| 139 | + var offset = noEscapes |
| 140 | + while (offset < length) { |
| 141 | + var codepoint = codePointAt(input, offset) |
| 142 | + offset += charCount(codepoint) |
| 143 | + // if we need to escape something, escape it |
| 144 | + if (codepoint == escapeCodePoint) { |
| 145 | + if (offset < length) { |
| 146 | + codepoint = codePointAt(input, offset) |
| 147 | + val idx = indexOf(escapedByCodePoints, codepoint) |
| 148 | + if (idx != -1) { |
| 149 | + codepoint = escapedCodePoints[idx] |
| 150 | + } |
| 151 | + offset += charCount(codepoint) |
| 152 | + } else { |
| 153 | + throw IllegalArgumentException( |
| 154 | + "Escape character '" + |
| 155 | + toChars(escapeCodePoint).concatToString(0, 0 + 1) + |
| 156 | + "' can't be the last character in a string.") |
| 157 | + } |
| 158 | + } |
| 159 | + // we didn't escape it, append it raw |
| 160 | + builder.append(toChars(codepoint)) |
| 161 | + } |
| 162 | + builder.toString() |
| 163 | + } |
| 164 | + } |
21 | 165 |
|
22 | 166 | actual companion object {
|
23 |
| - actual fun selfEscape(escapePolicy: String): PerCharacterEscaper = TODO() |
24 |
| - actual fun specifiedEscape(escapePolicy: String): PerCharacterEscaper = TODO() |
| 167 | + private fun indexOf(arr: IntArray, target: Int): Int { |
| 168 | + for ((index, value) in arr.withIndex()) { |
| 169 | + if (value == target) { |
| 170 | + return index |
| 171 | + } |
| 172 | + } |
| 173 | + return -1 |
| 174 | + } |
| 175 | + private fun codePointAt(value: String, offset: Int): Int { |
| 176 | + val codePoint = js("value.codePointAt(offset)") |
| 177 | + return codePoint |
| 178 | + } |
| 179 | + private fun codePoints(value: String): IntArray { |
| 180 | + val result = mutableListOf<Int>() |
| 181 | + var offset = 0 |
| 182 | + while (offset < value.length) { |
| 183 | + val codepoint = codePointAt(value, offset) |
| 184 | + result.add(codepoint) |
| 185 | + offset += 1 |
| 186 | + } |
| 187 | + |
| 188 | + return result.toIntArray() |
| 189 | + } |
| 190 | + |
| 191 | + /** |
| 192 | + * If your escape policy is "'123", it means this: |
| 193 | + * ``` |
| 194 | + * abc->abc |
| 195 | + * 123->'1'2'3 |
| 196 | + * I won't->I won''t |
| 197 | + * ``` |
| 198 | + */ |
| 199 | + actual fun selfEscape(escapePolicy: String): PerCharacterEscaper { |
| 200 | + val escapedCodePoints = codePoints(escapePolicy) |
| 201 | + val escapeCodePoint = escapedCodePoints[0] |
| 202 | + return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedCodePoints) |
| 203 | + } |
| 204 | + |
| 205 | + /** |
| 206 | + * If your escape policy is "'a1b2c3d", it means this: |
| 207 | + * ``` |
| 208 | + * abc->abc |
| 209 | + * 123->'b'c'd |
| 210 | + * I won't->I won'at |
| 211 | + * ``` |
| 212 | + */ |
| 213 | + actual fun specifiedEscape(escapePolicy: String): PerCharacterEscaper { |
| 214 | + val codePoints = codePoints(escapePolicy) |
| 215 | + require(codePoints.size % 2 == 0) |
| 216 | + val escapeCodePoint = codePoints[0] |
| 217 | + val escapedCodePoints = IntArray(codePoints.size / 2) |
| 218 | + val escapedByCodePoints = IntArray(codePoints.size / 2) |
| 219 | + for (i in escapedCodePoints.indices) { |
| 220 | + escapedCodePoints[i] = codePoints[2 * i] |
| 221 | + escapedByCodePoints[i] = codePoints[2 * i + 1] |
| 222 | + } |
| 223 | + return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedByCodePoints) |
| 224 | + } |
25 | 225 | }
|
26 | 226 | }
|
0 commit comments