Skip to content
This repository was archived by the owner on Mar 8, 2020. It is now read-only.

Handle quoting U+0000 properly #64

Merged
merged 20 commits into from
Mar 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
.sdk
.config
build
node_modules
.vscode
3 changes: 1 addition & 2 deletions driver/normalizer/normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ var Preprocessors = []Mapping{
Map(
Part("_", Obj{
uast.KeyType: String("StringLiteral"),
"value": AnyNode(nil),
"value": Any(),
"extra": Fields{
{Name: "raw", Op: Var("raw")},
{Name: "rawValue", Op: Any()},
Expand Down Expand Up @@ -360,7 +360,6 @@ func (op singleQuote) Check(st *State, n nodes.Node) (bool, error) {
if !strings.HasPrefix(s, `'`) || !strings.HasSuffix(s, `'`) {
return false, nil
}
s = s[1 : len(s)-1]
s, err := unquoteSingle(s)
if err != nil {
return false, err
Expand Down
70 changes: 65 additions & 5 deletions driver/normalizer/strconv.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,44 @@
package normalizer

import (
"fmt"
"strconv"
"strings"
"unicode/utf8"
)

// Functions below are copied from strconv.Unquote and strconv.Quote.
// Original functions are unable to escape/unescape values containing
// multiple characters since in Go single quotes represent a rune literal
// https://github.com/golang/go/blob/65a54aef5bedbf8035a465d12ad54783fb81e957/src/strconv/quote.go#L360

// unquoteSingle is the same as strconv.Unquote, but uses ' as a quote.
func unquoteSingle(s string) (string, error) {
n := len(s)
if n < 2 {
return "", fmt.Errorf("%+q is not a quoted string", s)
}
quote := s[0]
if quote != s[n-1] {
return "", fmt.Errorf("%+q does not begin and end with a quote", s)
}
s = s[1 : len(s)-1]

if contains(s, '\n') {
return "", fmt.Errorf("%+q contains EOL", s)
}

// Is it trivial? Avoid allocation.
if !contains(s, '\\') && !contains(s, quote) {
r, size := utf8.DecodeRuneInString(s)
if size == len(s) && (r != utf8.RuneError || size != 1) {
return s, nil
}
}
s = replaceEscapedMaybe(s, "\\0", "\x00") // treatment of special JS escape seq

var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2)
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
for len(s) > 0 {
c, multibyte, ss, err := strconv.UnquoteChar(s, '\'')
if err != nil {
Expand All @@ -29,15 +55,49 @@ func unquoteSingle(s string) (string, error) {
return string(buf), nil
}

// contains reports whether the string contains the byte c.
func contains(s string, c byte) bool {
return strings.IndexByte(s, c) >= 0
}

// replaceEscapedMaybe returns a copy of s in which occurrences of old followed by a
// non-digit are replaced by repl.
// Is not part of the stdlib, handles the special case of JS escape sequence.
// Regexp replacement and manual expansion performance was tested against the
// current implementation and found this was fastest.
func replaceEscapedMaybe(s, old, repl string) string {
var out strings.Builder
for s != "" {
pos := strings.Index(s, old)
if pos < 0 {
break
}
out.WriteString(s[:pos])
s = s[pos+len(old):]
r, n := utf8.DecodeRuneInString(s)
s = s[n:]
if r >= '0' && r <= '9' {
out.WriteString(old)
} else {
out.WriteString(repl)
}
if n != 0 {
out.WriteRune(r)
}
}
out.WriteString(s)
return out.String()
}

const lowerhex = "0123456789abcdef"

// quoteSingle is the same as strconv.Quote, but uses ' as a quote.
// quoteSingle(unquoteSingle(s)) may not result in exact same bytes as s,
// because quoteSingle always uses the hex escape sequence format.
func quoteSingle(s string) string {
const (
quote = '\''
)

const quote = '\''
buf := make([]byte, 0, 3*len(s)/2)

buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
Expand Down
77 changes: 77 additions & 0 deletions driver/normalizer/strconv_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package normalizer

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

var testCasesUnquote = []struct {
quoted string
unquoted string
// In some cases unquoting and then re-quoting a quoted string does not produce a
// string that is bitwise identical to the original, even though they denote the same bytes.
// This can happen, e.g, if we switch between hex and octal encoding of a byte.
// Test cases where this happens set canonicalUnquoted to the string that is expected
// to be decoded via Go's native rules to the byte sequence we want.
canonicalQuoted string
}{
{`'a'`, "a", `'a'`},
{`'\x00'`, "\u0000", `'\x00'`},
{`'\0'`, "\u0000", "'\\x00'"},
{`'\0something\0'`, "\u0000something\u0000", "'\\x00something\\x00'"},
{`'\0something\0else'`, "\u0000something\u0000else", "'\\x00something\\x00else'"},
{`'\u0000123\0s'`, "\u0000123\u0000s", "'\\x00123\\x00s'"},
}

func TestUnquoteSingle(t *testing.T) {
for _, test := range testCasesUnquote {
t.Run("", func(t *testing.T) {
s, err := unquoteSingle(test.quoted)
require.NoError(t, err)
require.Equal(t, test.unquoted, s)
})
}
}

func TestUnquoteSingleAndQuoteBack(t *testing.T) {
for _, test := range testCasesUnquote {
t.Run("", func(t *testing.T) {
u, err := unquoteSingle(test.quoted)
require.NoError(t, err)

q := quoteSingle(u)
assertEquals(t, test.canonicalQuoted, q)
})
}
}

func assertEquals(t *testing.T, quoted, actual string) {
if !assert.Equal(t, quoted, actual) {
printDebug(t, quoted, actual)
t.FailNow()
}
}

func printDebug(t *testing.T, quoted, actual string) {
t.Logf("\texpected: len=%d", len(quoted))
for _, c := range quoted {
t.Logf("%x - %#U", c, c)
}
t.Logf("\n\tactual: len=%d", len(actual))
for _, c := range actual {
t.Logf("%x - %#U", c, c)
}
}

func BenchmarkReplacingNullEscape(b *testing.B) {
for _, test := range testCasesUnquote {
b.Run("", func(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
replaceEscapedMaybe(test.quoted, "\\0", "\x00")
}
})
}
}
6 changes: 6 additions & 0 deletions fixtures/string-literal.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
a = "a"
u8 = "ё"
bc = "b\nc"

var escSlash = '\0SLASH'+Math.random()+'\0';
var escOpen = '\0OPEN'+Math.random()+'\0';
var escClose = '\0CLOSE'+Math.random()+'\0';
var escComma = '\0COMMA'+Math.random()+'\0';
var escPeriod = '\0PERIOD'+Math.random()+'\0';
Loading