Skip to content

Commit 61fba2b

Browse files
authored
js: write PerCharacterEscaper.js (#9)
2 parents 91c1048 + 8219bd8 commit 61fba2b

File tree

3 files changed

+296
-105
lines changed

3 files changed

+296
-105
lines changed
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Copyright (C) 2016-2023 DiffPlug
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.diffplug.selfie
17+
18+
import com.diffplug.selfie.PerCharacterEscaper.Companion.selfEscape
19+
import com.diffplug.selfie.PerCharacterEscaper.Companion.specifiedEscape
20+
import kotlin.test.Test
21+
import kotlin.test.assertEquals
22+
import kotlin.test.assertFails
23+
import kotlin.test.assertSame
24+
25+
class PerCharacterEscaperTest {
26+
@Test
27+
fun performanceOptimizationSelf() {
28+
val escaper = selfEscape("`123")
29+
// if nothing gets changed, it should return the exact same value
30+
val abc = "abc"
31+
assertSame(abc, escaper.escape(abc))
32+
assertSame(abc, escaper.unescape(abc))
33+
34+
// otherwise it should have the normal behavior
35+
assertEquals("`1", escaper.escape("1"))
36+
assertEquals("``", escaper.escape("`"))
37+
assertEquals("abc`1`2`3``def", escaper.escape("abc123`def"))
38+
39+
// in both directions
40+
assertEquals("1", escaper.unescape("`1"))
41+
assertEquals("`", escaper.unescape("``"))
42+
assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def"))
43+
}
44+
45+
@Test
46+
fun performanceOptimizationSpecific() {
47+
val escaper = specifiedEscape("`a1b2c3d")
48+
// if nothing gets changed, it should return the exact same value
49+
val abc = "abc"
50+
assertSame(abc, escaper.escape(abc))
51+
assertSame(abc, escaper.unescape(abc))
52+
53+
// otherwise it should have the normal behavior
54+
assertEquals("`b", escaper.escape("1"))
55+
assertEquals("`a", escaper.escape("`"))
56+
assertEquals("abc`b`c`d`adef", escaper.escape("abc123`def"))
57+
58+
// in both directions
59+
assertEquals("1", escaper.unescape("`b"))
60+
assertEquals("`", escaper.unescape("`a"))
61+
assertEquals("abc123`def", escaper.unescape("abc`1`2`3``def"))
62+
}
63+
64+
@Test
65+
fun cornerCasesSelf() {
66+
val escaper = selfEscape("`123")
67+
// cornercase - escape character without follow-on will throw an error
68+
val exception = assertFails { escaper.unescape("`") }
69+
assertEquals("Escape character '`' can't be the last character in a string.", exception.message)
70+
// escape character followed by non-escape character is fine
71+
assertEquals("a", escaper.unescape("`a"))
72+
}
73+
74+
@Test
75+
fun cornerCasesSpecific() {
76+
val escaper = specifiedEscape("`a1b2c3d")
77+
// cornercase - escape character without follow-on will throw an error
78+
val exception = assertFails { escaper.unescape("`") }
79+
assertEquals("Escape character '`' can't be the last character in a string.", exception.message)
80+
// escape character followed by non-escape character is fine
81+
assertEquals("e", escaper.unescape("`e"))
82+
}
83+
84+
@Test
85+
fun roundtrip() {
86+
val escaper = selfEscape("`<>")
87+
val roundtrip = { str: String? -> assertEquals(str, escaper.unescape(escaper.escape(str!!))) }
88+
roundtrip("")
89+
roundtrip("<local>~`/")
90+
}
91+
}

selfie-lib/src/jsMain/kotlin/com/diffplug/selfie/PerCharacterEscaper.js.kt

Lines changed: 205 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,212 @@
1515
*/
1616
package com.diffplug.selfie
1717

18-
actual class PerCharacterEscaper {
19-
actual fun escape(input: String): String = TODO()
20-
actual fun unescape(input: String): String = TODO()
18+
/**
19+
* If your escape policy is "'123", it means this:
20+
* ```
21+
* abc->abc
22+
* 123->'1'2'3
23+
* I won't->I won''t
24+
* ```
25+
*/
26+
actual class PerCharacterEscaper
27+
/**
28+
* The first character in the string will be uses as the escape character, and all characters will
29+
* be escaped.
30+
*/
31+
private constructor(
32+
private val escapeCodePoint: Int,
33+
private val escapedCodePoints: IntArray,
34+
private val escapedByCodePoints: IntArray
35+
) {
36+
val MIN_SUPPLEMENTARY_CODE_POINT = 0x010000
37+
val MAX_CODE_POINT = 0X10FFFF
38+
val MIN_LOW_SURROGATE = '\uDC00'
39+
val MIN_HIGH_SURROGATE = '\uD800'
40+
private fun highSurrogate(codePoint: Int): Char {
41+
return ((codePoint ushr 10) +
42+
(MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT ushr 10)).code)
43+
.toChar()
44+
}
45+
private fun lowSurrogate(codePoint: Int): Char {
46+
return ((codePoint and 0x3ff) + MIN_LOW_SURROGATE.code).toChar()
47+
}
48+
private fun toSurrogates(codePoint: Int, dst: CharArray, index: Int) {
49+
// We write elements "backwards" to guarantee all-or-nothing
50+
dst[index + 1] = lowSurrogate(codePoint)
51+
dst[index] = highSurrogate(codePoint)
52+
}
53+
private fun toChars(codePoint: Int): CharArray {
54+
return if (isBmpCodePoint(codePoint)) {
55+
charArrayOf(codePoint.toChar())
56+
} else if (isValidCodePoint(codePoint)) {
57+
val result = CharArray(2)
58+
toSurrogates(codePoint, result, 0)
59+
result
60+
} else {
61+
throw IllegalArgumentException("Not a valid Unicode code point: $codePoint")
62+
}
63+
}
64+
private fun isBmpCodePoint(codePoint: Int): Boolean {
65+
return codePoint ushr 16 == 0
66+
}
67+
private fun isValidCodePoint(codePoint: Int): Boolean {
68+
// Optimized form of:
69+
// codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
70+
val plane = codePoint ushr 16
71+
return plane < MAX_CODE_POINT + 1 ushr 16
72+
}
73+
private fun charCount(codePoint: Int): Int {
74+
return if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 2 else 1
75+
}
76+
private fun firstOffsetNeedingEscape(input: String): Int {
77+
val length = input.length
78+
var firstOffsetNeedingEscape = -1
79+
var offset = 0
80+
outer@ while (offset < length) {
81+
val codepoint = codePointAt(input, offset)
82+
for (escaped in escapedCodePoints) {
83+
if (codepoint == escaped) {
84+
firstOffsetNeedingEscape = offset
85+
break@outer
86+
}
87+
}
88+
offset += charCount(codepoint)
89+
}
90+
return firstOffsetNeedingEscape
91+
}
92+
actual fun escape(input: String): String {
93+
val noEscapes = firstOffsetNeedingEscape(input)
94+
return if (noEscapes == -1) {
95+
input
96+
} else {
97+
val length = input.length
98+
val needsEscapes = length - noEscapes
99+
val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4)
100+
builder.append(input, 0, noEscapes)
101+
var offset = noEscapes
102+
while (offset < length) {
103+
val codepoint = codePointAt(input, offset)
104+
offset += charCount(codepoint)
105+
val idx = indexOf(escapedCodePoints, codepoint)
106+
if (idx == -1) {
107+
builder.append(toChars(codepoint))
108+
} else {
109+
builder.append(toChars(escapeCodePoint))
110+
builder.append(toChars(escapedByCodePoints[idx]))
111+
}
112+
}
113+
builder.toString()
114+
}
115+
}
116+
private fun firstOffsetNeedingUnescape(input: String): Int {
117+
val length = input.length
118+
var firstOffsetNeedingEscape = -1
119+
var offset = 0
120+
while (offset < length) {
121+
val codepoint = codePointAt(input, offset)
122+
if (codepoint == escapeCodePoint) {
123+
firstOffsetNeedingEscape = offset
124+
break
125+
}
126+
offset += charCount(codepoint)
127+
}
128+
return firstOffsetNeedingEscape
129+
}
130+
actual fun unescape(input: String): String {
131+
val noEscapes = firstOffsetNeedingUnescape(input)
132+
return if (noEscapes == -1) {
133+
input
134+
} else {
135+
val length = input.length
136+
val needsEscapes = length - noEscapes
137+
val builder = StringBuilder(noEscapes + 4 + needsEscapes * 5 / 4)
138+
builder.append(input, 0, noEscapes)
139+
var offset = noEscapes
140+
while (offset < length) {
141+
var codepoint = codePointAt(input, offset)
142+
offset += charCount(codepoint)
143+
// if we need to escape something, escape it
144+
if (codepoint == escapeCodePoint) {
145+
if (offset < length) {
146+
codepoint = codePointAt(input, offset)
147+
val idx = indexOf(escapedByCodePoints, codepoint)
148+
if (idx != -1) {
149+
codepoint = escapedCodePoints[idx]
150+
}
151+
offset += charCount(codepoint)
152+
} else {
153+
throw IllegalArgumentException(
154+
"Escape character '" +
155+
toChars(escapeCodePoint).concatToString(0, 0 + 1) +
156+
"' can't be the last character in a string.")
157+
}
158+
}
159+
// we didn't escape it, append it raw
160+
builder.append(toChars(codepoint))
161+
}
162+
builder.toString()
163+
}
164+
}
21165

22166
actual companion object {
23-
actual fun selfEscape(escapePolicy: String): PerCharacterEscaper = TODO()
24-
actual fun specifiedEscape(escapePolicy: String): PerCharacterEscaper = TODO()
167+
private fun indexOf(arr: IntArray, target: Int): Int {
168+
for ((index, value) in arr.withIndex()) {
169+
if (value == target) {
170+
return index
171+
}
172+
}
173+
return -1
174+
}
175+
private fun codePointAt(value: String, offset: Int): Int {
176+
val codePoint = js("value.codePointAt(offset)")
177+
return codePoint
178+
}
179+
private fun codePoints(value: String): IntArray {
180+
val result = mutableListOf<Int>()
181+
var offset = 0
182+
while (offset < value.length) {
183+
val codepoint = codePointAt(value, offset)
184+
result.add(codepoint)
185+
offset += 1
186+
}
187+
188+
return result.toIntArray()
189+
}
190+
191+
/**
192+
* If your escape policy is "'123", it means this:
193+
* ```
194+
* abc->abc
195+
* 123->'1'2'3
196+
* I won't->I won''t
197+
* ```
198+
*/
199+
actual fun selfEscape(escapePolicy: String): PerCharacterEscaper {
200+
val escapedCodePoints = codePoints(escapePolicy)
201+
val escapeCodePoint = escapedCodePoints[0]
202+
return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedCodePoints)
203+
}
204+
205+
/**
206+
* If your escape policy is "'a1b2c3d", it means this:
207+
* ```
208+
* abc->abc
209+
* 123->'b'c'd
210+
* I won't->I won'at
211+
* ```
212+
*/
213+
actual fun specifiedEscape(escapePolicy: String): PerCharacterEscaper {
214+
val codePoints = codePoints(escapePolicy)
215+
require(codePoints.size % 2 == 0)
216+
val escapeCodePoint = codePoints[0]
217+
val escapedCodePoints = IntArray(codePoints.size / 2)
218+
val escapedByCodePoints = IntArray(codePoints.size / 2)
219+
for (i in escapedCodePoints.indices) {
220+
escapedCodePoints[i] = codePoints[2 * i]
221+
escapedByCodePoints[i] = codePoints[2 * i + 1]
222+
}
223+
return PerCharacterEscaper(escapeCodePoint, escapedCodePoints, escapedByCodePoints)
224+
}
25225
}
26226
}

0 commit comments

Comments
 (0)