Skip to content
This repository was archived by the owner on Mar 8, 2020. It is now read-only.

Commit a769379

Browse files
committed
sem: fix processing \0 escaped sequence in string literal
Signed-off-by: Alexander Bezzubov <[email protected]>
1 parent 9799217 commit a769379

File tree

2 files changed

+59
-7
lines changed

2 files changed

+59
-7
lines changed

driver/normalizer/strconv.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package normalizer
22

33
import (
4+
"fmt"
45
"strconv"
56
"unicode/utf8"
67
)
@@ -18,7 +19,7 @@ func unquoteSingle(s string) (string, error) {
1819
}
1920
quote := s[0]
2021
if quote != s[n-1] {
21-
return "", strconv.ErrSyntax
22+
return "", fmt.Errorf("string does not begin and end with a quote")
2223
}
2324
s = s[1 : len(s)-1]
2425

@@ -33,6 +34,7 @@ func unquoteSingle(s string) (string, error) {
3334
return s, nil
3435
}
3536
}
37+
s = replaceEscapedMaybe(s, '0', '\x00')
3638

3739
var runeTmp [utf8.UTFMax]byte
3840
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
@@ -62,6 +64,42 @@ func contains(s string, c byte) bool {
6264
return false
6365
}
6466

67+
// replaceEscapedMaybe returns a copy of s with "\\old[^0-9]" replaced by new.
68+
func replaceEscapedMaybe(s string, old, new rune) string {
69+
var runeTmp [utf8.UTFMax]byte
70+
n := utf8.EncodeRune(runeTmp[:], new)
71+
72+
lastCp := 0
73+
var buf []byte
74+
for i, w := 0, 0; i < len(s); i += w {
75+
r1, w1 := utf8.DecodeRuneInString(s[i:])
76+
w = w1
77+
if r1 == '\\' { // find sequence \\old[^0-9]
78+
r2, w2 := utf8.DecodeRuneInString(s[i+w1:])
79+
if r2 == old {
80+
r3, _ := utf8.DecodeRuneInString(s[i+w1+w2:])
81+
if 0 > r3 || r3 > 9 { // not a number after "\\old"
82+
w += w2
83+
if len(buf) == 0 {
84+
buf = make([]byte, 0, 3*len(s)/2)
85+
}
86+
buf = append(buf, []byte(s[lastCp:i])...)
87+
buf = append(buf, runeTmp[:n]...)
88+
lastCp = i + w
89+
}
90+
}
91+
}
92+
}
93+
if lastCp == 0 {
94+
return s
95+
}
96+
97+
if 0 < lastCp && lastCp < len(s) {
98+
return string(append(buf, []byte(s[lastCp:len(s)])...))
99+
}
100+
return string(buf)
101+
}
102+
65103
const lowerhex = "0123456789abcdef"
66104

67105
// quoteSingle is the same as strconv.Quote, but uses ' as a quote.

driver/normalizer/strconv_test.go

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package normalizer
22

33
import (
4+
"regexp"
45
"testing"
56

67
"github.com/stretchr/testify/assert"
@@ -40,12 +41,25 @@ func TestUnquoteSingleAndQuoteBack(t *testing.T) {
4041
}
4142
}
4243

43-
func TestUnquoteSingle_StringAndQuoteBack(t *testing.T) {
44-
const o = "'rand'"
44+
func BenchmarkReplacingNullEscape_Iterative(b *testing.B) {
45+
b.ReportAllocs()
46+
s := testCasesUnquote[3].in
47+
for n := 0; n < b.N; n++ {
48+
replaceEscapedMaybe(s, '0', '\x00')
49+
}
50+
}
51+
52+
func BenchmarkReplacingNullEscape_Regexp(b *testing.B) {
53+
b.ReportAllocs()
54+
s := testCasesUnquote[3].in
55+
for n := 0; n < b.N; n++ {
56+
replaceEscapedMaybeRegexp(s)
57+
}
58+
}
4559

46-
s, err := unquoteSingle(o)
47-
require.NoError(t, err)
48-
q := quoteSingle(s)
60+
var re = regexp.MustCompile(`\\0([^0-9]|$)`)
4961

50-
require.Equal(t, o, q)
62+
// replaceEscapedMaybeRegexp is very simple, but slower alternative to normalizer.replaceEscapedMaybe
63+
func replaceEscapedMaybeRegexp(s string) string {
64+
return re.ReplaceAllString(s, "\x00$1")
5165
}

0 commit comments

Comments
 (0)