Skip to content
This repository was archived by the owner on Mar 8, 2020. It is now read-only.

Handle quoting U+0000 properly #64

Merged
merged 20 commits into from
Mar 13, 2019
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
.sdk
.config
build
node_modules
.vscode
3 changes: 1 addition & 2 deletions driver/normalizer/normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ var Preprocessors = []Mapping{
Map(
Part("_", Obj{
uast.KeyType: String("StringLiteral"),
"value": AnyNode(nil),
"value": Any(),
"extra": Fields{
{Name: "raw", Op: Var("raw")},
{Name: "rawValue", Op: Any()},
Expand Down Expand Up @@ -360,7 +360,6 @@ func (op singleQuote) Check(st *State, n nodes.Node) (bool, error) {
if !strings.HasPrefix(s, `'`) || !strings.HasSuffix(s, `'`) {
return false, nil
}
s = s[1 : len(s)-1]
s, err := unquoteSingle(s)
if err != nil {
return false, err
Expand Down
70 changes: 65 additions & 5 deletions driver/normalizer/strconv.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,44 @@
package normalizer

import (
"fmt"
"strconv"
"strings"
"unicode/utf8"
)

// Functions below are copied from strconv.Unquote and strconv.Quote.
// Original functions are unable to escape/unescape values containing
// multiple characters since in Go single quotes represent a rune literal
// https://github.com/golang/go/blob/65a54aef5bedbf8035a465d12ad54783fb81e957/src/strconv/quote.go#L360

// unquoteSingle is the same as strconv.Unquote, but uses ' as a quote.
func unquoteSingle(s string) (string, error) {
n := len(s)
if n < 2 {
return "", fmt.Errorf("%+q is not a quoted string", s)
}
quote := s[0]
if quote != s[n-1] {
return "", fmt.Errorf("%+q does not begin and end with a quote", s)
}
s = s[1 : len(s)-1]

if contains(s, '\n') {
return "", fmt.Errorf("%+q contains EOL", s)
}

// Is it trivial? Avoid allocation.
if !contains(s, '\\') && !contains(s, quote) {
r, size := utf8.DecodeRuneInString(s)
if size == len(s) && (r != utf8.RuneError || size != 1) {
return s, nil
}
}
s = replaceEscapedMaybe(s, "\\0", "\x00") // treatment of special JS escape seq

var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2)
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
for len(s) > 0 {
c, multibyte, ss, err := strconv.UnquoteChar(s, '\'')
if err != nil {
Expand All @@ -29,15 +55,49 @@ func unquoteSingle(s string) (string, error) {
return string(buf), nil
}

// contains reports whether the string contains the byte c.
func contains(s string, c byte) bool {
return strings.IndexByte(s, c) >= 0
}

// replaceEscapedMaybe returns a copy of s in which occurrences of old followed by a
// non-digit are replaced by repl.
// Is not part of the stdlib, handles the special case of JS escape sequence.
// Regexp replacement and manual expansion performance was tested against the
// current implementation and found this was fastest.
func replaceEscapedMaybe(s, old, repl string) string {
var out strings.Builder
for s != "" {
pos := strings.Index(s, old)
if pos < 0 {
break
}
out.WriteString(s[:pos])
s = s[pos+len(old):]
r, n := utf8.DecodeRuneInString(s)
s = s[n:]
if r >= '0' && r <= '9' {
out.WriteString(old)
} else {
out.WriteString(repl)
}
if n != 0 {
out.WriteRune(r)
}
}
out.WriteString(s)
return out.String()
}

const lowerhex = "0123456789abcdef"

// quoteSingle is the same as strconv.Quote, but uses ' as a quote.
// quoteSingle(unquoteSingle(s)) may not result in exact same bytes as s,
// because quoteSingle always uses the hex escape sequence format.
func quoteSingle(s string) string {
const (
quote = '\''
)

const quote = '\''
buf := make([]byte, 0, 3*len(s)/2)

buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
Expand Down
78 changes: 78 additions & 0 deletions driver/normalizer/strconv_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package normalizer

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

var testCasesUnquote = []struct {
quoted string
unquoted string
// If this is non-empty it means that quoteing back unqoted string does not
// produce same result bit-wise.
// This happens when we lose the information about original escape sequence (octal, hex)
// Golang unquote() defaults to hex format, so it's used as canonical one.
canonicalQuoted string
}{
{`'a'`, "a", ""},
{`'\x00'`, "\u0000", ""},
{`'\0'`, "\u0000", "'\\x00'"},
{`'\0something\0'`, "\u0000something\u0000", "'\\x00something\\x00'"},
{`'\0something\0else'`, "\u0000something\u0000else", "'\\x00something\\x00else'"},
{`'\u0000123\0s'`, "\u0000123\u0000s", "'\\x00123\\x00s'"},
}

func TestUnquoteSingle(t *testing.T) {
for _, test := range testCasesUnquote {
t.Run("", func(t *testing.T) {
s, err := unquoteSingle(test.quoted)
require.NoError(t, err)
require.Equal(t, test.unquoted, s)
})
}
}

func TestUnquoteSingleAndQuoteBack(t *testing.T) {
for _, test := range testCasesUnquote {
t.Run("", func(t *testing.T) {
u, err := unquoteSingle(test.quoted)
require.NoError(t, err)

q := quoteSingle(u)
if test.canonicalQuoted != "" {
assertEquals(t, test.canonicalQuoted, q)
} else {
assertEquals(t, test.quoted, q)
}
})
}
}

func assertEquals(t *testing.T, quoted, actual string) {
if !assert.Equal(t, quoted, actual) {
printDebug(t, quoted, actual)
t.FailNow()
}
}

func printDebug(t *testing.T, quoted, actual string) {
t.Logf("\texpected: len=%d", len(quoted))
for _, c := range quoted {
t.Logf("%x - %#U", c, c)
}
t.Logf("\n\tactual: len=%d", len(actual))
for _, c := range actual {
t.Logf("%x - %#U", c, c)
}
}

func BenchmarkReplacingNullEscape(b *testing.B) {
b.ReportAllocs()
for _, test := range testCasesUnquote {
for n := 0; n < b.N; n++ {
replaceEscapedMaybe(test.quoted, "\\0", "\x00")
}
}
}
6 changes: 6 additions & 0 deletions fixtures/string-literal.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
a = "a"
u8 = "ё"
bc = "b\nc"

var escSlash = '\0SLASH'+Math.random()+'\0';
var escOpen = '\0OPEN'+Math.random()+'\0';
var escClose = '\0CLOSE'+Math.random()+'\0';
var escComma = '\0COMMA'+Math.random()+'\0';
var escPeriod = '\0PERIOD'+Math.random()+'\0';
Loading