Skip to content
This repository was archived by the owner on Mar 8, 2020. It is now read-only.

Commit 4eb5b4c

Browse files
authored
Merge pull request #64 from bzz/fix-62
Handle quoting U+0000 properly
2 parents 6263505 + e7a074f commit 4eb5b4c

8 files changed

+2928
-19
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@
44
.sdk
55
.config
66
build
7+
node_modules
8+
.vscode

driver/normalizer/normalizer.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ var Preprocessors = []Mapping{
3636
Map(
3737
Part("_", Obj{
3838
uast.KeyType: String("StringLiteral"),
39-
"value": AnyNode(nil),
39+
"value": Any(),
4040
"extra": Fields{
4141
{Name: "raw", Op: Var("raw")},
4242
{Name: "rawValue", Op: Any()},
@@ -360,7 +360,6 @@ func (op singleQuote) Check(st *State, n nodes.Node) (bool, error) {
360360
if !strings.HasPrefix(s, `'`) || !strings.HasSuffix(s, `'`) {
361361
return false, nil
362362
}
363-
s = s[1 : len(s)-1]
364363
s, err := unquoteSingle(s)
365364
if err != nil {
366365
return false, err

driver/normalizer/strconv.go

+65-5
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,44 @@
11
package normalizer
22

33
import (
4+
"fmt"
45
"strconv"
6+
"strings"
57
"unicode/utf8"
68
)
79

810
// Functions below are copied from strconv.Unquote and strconv.Quote.
911
// Original functions are unable to escape/unescape values containing
1012
// multiple characters since in Go single quotes represent a rune literal
13+
// https://github.com/golang/go/blob/65a54aef5bedbf8035a465d12ad54783fb81e957/src/strconv/quote.go#L360
1114

1215
// unquoteSingle is the same as strconv.Unquote, but uses ' as a quote.
1316
func unquoteSingle(s string) (string, error) {
17+
n := len(s)
18+
if n < 2 {
19+
return "", fmt.Errorf("%+q is not a quoted string", s)
20+
}
21+
quote := s[0]
22+
if quote != s[n-1] {
23+
return "", fmt.Errorf("%+q does not begin and end with a quote", s)
24+
}
25+
s = s[1 : len(s)-1]
26+
27+
if contains(s, '\n') {
28+
return "", fmt.Errorf("%+q contains EOL", s)
29+
}
30+
31+
// Is it trivial? Avoid allocation.
32+
if !contains(s, '\\') && !contains(s, quote) {
33+
r, size := utf8.DecodeRuneInString(s)
34+
if size == len(s) && (r != utf8.RuneError || size != 1) {
35+
return s, nil
36+
}
37+
}
38+
s = replaceEscapedMaybe(s, "\\0", "\x00") // treatment of special JS escape seq
39+
1440
var runeTmp [utf8.UTFMax]byte
15-
buf := make([]byte, 0, 3*len(s)/2)
41+
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
1642
for len(s) > 0 {
1743
c, multibyte, ss, err := strconv.UnquoteChar(s, '\'')
1844
if err != nil {
@@ -29,15 +55,49 @@ func unquoteSingle(s string) (string, error) {
2955
return string(buf), nil
3056
}
3157

58+
// contains reports whether the string contains the byte c.
59+
func contains(s string, c byte) bool {
60+
return strings.IndexByte(s, c) >= 0
61+
}
62+
63+
// replaceEscapedMaybe returns a copy of s in which occurrences of old followed by a
64+
// non-digit are replaced by repl.
65+
// Is not part of the stdlib, handles the special case of JS escape sequence.
66+
// Regexp replacement and manual expansion performance was tested against the
67+
// current implementation and found this was fastest.
68+
func replaceEscapedMaybe(s, old, repl string) string {
69+
var out strings.Builder
70+
for s != "" {
71+
pos := strings.Index(s, old)
72+
if pos < 0 {
73+
break
74+
}
75+
out.WriteString(s[:pos])
76+
s = s[pos+len(old):]
77+
r, n := utf8.DecodeRuneInString(s)
78+
s = s[n:]
79+
if r >= '0' && r <= '9' {
80+
out.WriteString(old)
81+
} else {
82+
out.WriteString(repl)
83+
}
84+
if n != 0 {
85+
out.WriteRune(r)
86+
}
87+
}
88+
out.WriteString(s)
89+
return out.String()
90+
}
91+
3292
const lowerhex = "0123456789abcdef"
3393

3494
// quoteSingle is the same as strconv.Quote, but uses ' as a quote.
95+
// quoteSingle(unquoteSingle(s)) may not result in exact same bytes as s,
96+
// because quoteSingle always uses the hex escape sequence format.
3597
func quoteSingle(s string) string {
36-
const (
37-
quote = '\''
38-
)
39-
98+
const quote = '\''
4099
buf := make([]byte, 0, 3*len(s)/2)
100+
41101
buf = append(buf, quote)
42102
for width := 0; len(s) > 0; s = s[width:] {
43103
r := rune(s[0])

driver/normalizer/strconv_test.go

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package normalizer
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
var testCasesUnquote = []struct {
11+
quoted string
12+
unquoted string
13+
// In some cases unquoting and then re-quoting a quoted string does not produce a
14+
// string that is bitwise identical to the original, even though they denote the same bytes.
15+
// This can happen, e.g, if we switch between hex and octal encoding of a byte.
16+
// Test cases where this happens set canonicalUnquoted to the string that is expected
17+
// to be decoded via Go's native rules to the byte sequence we want.
18+
canonicalQuoted string
19+
}{
20+
{`'a'`, "a", `'a'`},
21+
{`'\x00'`, "\u0000", `'\x00'`},
22+
{`'\0'`, "\u0000", "'\\x00'"},
23+
{`'\0something\0'`, "\u0000something\u0000", "'\\x00something\\x00'"},
24+
{`'\0something\0else'`, "\u0000something\u0000else", "'\\x00something\\x00else'"},
25+
{`'\u0000123\0s'`, "\u0000123\u0000s", "'\\x00123\\x00s'"},
26+
}
27+
28+
func TestUnquoteSingle(t *testing.T) {
29+
for _, test := range testCasesUnquote {
30+
t.Run("", func(t *testing.T) {
31+
s, err := unquoteSingle(test.quoted)
32+
require.NoError(t, err)
33+
require.Equal(t, test.unquoted, s)
34+
})
35+
}
36+
}
37+
38+
func TestUnquoteSingleAndQuoteBack(t *testing.T) {
39+
for _, test := range testCasesUnquote {
40+
t.Run("", func(t *testing.T) {
41+
u, err := unquoteSingle(test.quoted)
42+
require.NoError(t, err)
43+
44+
q := quoteSingle(u)
45+
assertEquals(t, test.canonicalQuoted, q)
46+
})
47+
}
48+
}
49+
50+
func assertEquals(t *testing.T, quoted, actual string) {
51+
if !assert.Equal(t, quoted, actual) {
52+
printDebug(t, quoted, actual)
53+
t.FailNow()
54+
}
55+
}
56+
57+
func printDebug(t *testing.T, quoted, actual string) {
58+
t.Logf("\texpected: len=%d", len(quoted))
59+
for _, c := range quoted {
60+
t.Logf("%x - %#U", c, c)
61+
}
62+
t.Logf("\n\tactual: len=%d", len(actual))
63+
for _, c := range actual {
64+
t.Logf("%x - %#U", c, c)
65+
}
66+
}
67+
68+
func BenchmarkReplacingNullEscape(b *testing.B) {
69+
for _, test := range testCasesUnquote {
70+
b.Run("", func(b *testing.B) {
71+
b.ReportAllocs()
72+
for n := 0; n < b.N; n++ {
73+
replaceEscapedMaybe(test.quoted, "\\0", "\x00")
74+
}
75+
})
76+
}
77+
}

fixtures/string-literal.js

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
11
a = "a"
22
u8 = "ё"
33
bc = "b\nc"
4+
5+
var escSlash = '\0SLASH'+Math.random()+'\0';
6+
var escOpen = '\0OPEN'+Math.random()+'\0';
7+
var escClose = '\0CLOSE'+Math.random()+'\0';
8+
var escComma = '\0COMMA'+Math.random()+'\0';
9+
var escPeriod = '\0PERIOD'+Math.random()+'\0';

0 commit comments

Comments
 (0)