Skip to content

Commit ac9c3ea

Browse files
authored
Optimize quote removal (#62)
1 parent edaae10 commit ac9c3ea

File tree

3 files changed

+22
-63
lines changed

3 files changed

+22
-63
lines changed

sqllexer.go

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ type Token struct {
4343
Value string
4444
isTableIndicator bool // true if the token is a table indicator
4545
hasDigits bool
46-
quotes []int // private - only used by trimQuotes
46+
hasQuotes bool // private - only used by trimQuotes
4747
lastValueToken LastValueToken // private - internal state
4848
}
4949

@@ -88,9 +88,9 @@ type Lexer struct {
8888
start int // the start position of the current token
8989
config *LexerConfig
9090
token *Token
91-
hasDigits bool // true if the token has digits
92-
quotes []int // Indexes of quotes in the token
93-
isTableIndicator bool // true if the token is a table indicator
91+
hasQuotes bool // true if any quotes in token
92+
hasDigits bool // true if the token has digits
93+
isTableIndicator bool // true if the token is a table indicator
9494
}
9595

9696
func New(input string, opts ...lexerOption) *Lexer {
@@ -414,26 +414,23 @@ func (s *Lexer) scanDoubleQuotedIdentifier(delimiter rune) *Token {
414414
}
415415

416416
s.start = s.cursor
417-
offset := s.start // offset is used to calculate the indexes of quotes in the token value
418-
s.quotes = append(s.quotes, s.cursor-offset) // store the opening quote position
419-
ch := s.next() // consume the opening quote
417+
s.hasQuotes = true
418+
ch := s.next() // consume the opening quote
420419
for {
421420
// encountered the closing quote
422421
// BUT if it's followed by .", then we should keep going
423422
// e.g. postgres "foo"."bar"
424423
// e.g. sqlserver [foo].[bar]
425424
if ch == closingDelimiter {
426-
s.quotes = append(s.quotes, s.cursor-offset)
427425
specialCase := []rune{closingDelimiter, '.', delimiter}
428426
if s.matchAt([]rune(specialCase)) {
429-
s.quotes = append(s.quotes, s.cursor+2-offset)
430427
ch = s.nextBy(3) // consume the "."
431428
continue
432429
}
433430
break
434431
}
435432
if isEOF(ch) {
436-
s.quotes = nil // if we hit EOF, we clear the quotes
433+
s.hasQuotes = false // if we hit EOF, we clear the quotes
437434
return s.emit(ERROR)
438435
}
439436
s.hasDigits = s.hasDigits || isDigit(ch)
@@ -629,16 +626,10 @@ func (s *Lexer) emit(t TokenType) *Token {
629626
}
630627

631628
tok.hasDigits = s.hasDigits
632-
633-
if len(s.quotes) > 0 {
634-
tok.quotes = s.quotes
635-
} else {
636-
tok.quotes = nil
637-
}
629+
tok.hasQuotes = s.hasQuotes
638630

639631
// Reset lexer state
640632
s.start = s.cursor
641-
s.quotes = nil
642633
s.isTableIndicator = false
643634
s.hasDigits = false
644635

sqllexer_test.go

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,35 +1151,29 @@ func TestLexerIdentifierWithQuotes(t *testing.T) {
11511151
tests := []struct {
11521152
input string
11531153
expectedTokens []TokenSpec
1154-
expectedQuotes [][]int
1154+
expectedQuotes bool
11551155
lexerOpts []lexerOption
11561156
}{
11571157
{
11581158
input: `"abc"`,
11591159
expectedTokens: []TokenSpec{
11601160
{QUOTED_IDENT, `"abc"`},
11611161
},
1162-
expectedQuotes: [][]int{
1163-
{0, 4},
1164-
},
1162+
expectedQuotes: true,
11651163
},
11661164
{
11671165
input: `"abc"."def"`,
11681166
expectedTokens: []TokenSpec{
11691167
{QUOTED_IDENT, `"abc"."def"`},
11701168
},
1171-
expectedQuotes: [][]int{
1172-
{0, 4, 6, 10},
1173-
},
1169+
expectedQuotes: true,
11741170
},
11751171
{
11761172
input: `"fóo"."bar"`,
11771173
expectedTokens: []TokenSpec{
11781174
{QUOTED_IDENT, `"fóo"."bar"`},
11791175
},
1180-
expectedQuotes: [][]int{
1181-
{0, 5, 7, 11},
1182-
},
1176+
expectedQuotes: true,
11831177
},
11841178
{
11851179
input: `SELECT "fóo"."`,
@@ -1188,7 +1182,7 @@ func TestLexerIdentifierWithQuotes(t *testing.T) {
11881182
{SPACE, " "},
11891183
{ERROR, `"fóo"."`},
11901184
},
1191-
expectedQuotes: [][]int{},
1185+
expectedQuotes: false,
11921186
},
11931187
}
11941188

@@ -1219,23 +1213,8 @@ func TestLexerIdentifierWithQuotes(t *testing.T) {
12191213
t.Errorf("token[%d] got value %q, want %q", i, got.Value, want.Value)
12201214
}
12211215

1222-
if i < len(tt.expectedQuotes) {
1223-
quotes := tt.expectedQuotes[i]
1224-
if quotes == nil {
1225-
if got.quotes != nil {
1226-
t.Errorf("token[%d] got quotes, want nil", i)
1227-
}
1228-
} else {
1229-
if len(got.quotes) != len(quotes) {
1230-
t.Errorf("token[%d] got %d quotes, want %d", i, len(got.quotes), len(quotes))
1231-
} else {
1232-
for j, quote := range quotes {
1233-
if got.quotes[j] != quote {
1234-
t.Errorf("token[%d] got quote[%d] %d, want %d", i, j, got.quotes[j], quote)
1235-
}
1236-
}
1237-
}
1238-
}
1216+
if got.hasQuotes != tt.expectedQuotes {
1217+
t.Errorf("token[%d] got %v quotes, want %v", i, got.hasQuotes, tt.expectedQuotes)
12391218
}
12401219

12411220
i++

sqllexer_utils.go

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -244,27 +244,16 @@ func replaceDigits(token *Token, placeholder string) string {
244244

245245
func trimQuotes(token *Token) string {
246246
var trimmedToken strings.Builder
247-
trimmedToken.Grow(len(token.Value) - len(token.quotes))
247+
trimmedToken.Grow(len(token.Value))
248248

249-
start := 0
250-
251-
// loop over token.quotes indexes, write start:token.quotes[i] to builder
252-
// write start:token.End to builder
253-
for i := 0; i < len(token.quotes); i++ {
254-
if token.quotes[i] > len(token.Value) {
255-
break
256-
}
257-
if token.quotes[i]-start >= 1 {
258-
trimmedToken.WriteString(token.Value[start:token.quotes[i]])
249+
for _, r := range token.Value {
250+
if isDoubleQuote(r) || r == '[' || r == ']' || r == '`' {
251+
// trimmedToken.WriteString(placeholder)
252+
} else {
253+
trimmedToken.WriteRune(r)
259254
}
260-
start = token.quotes[i] + 1
261-
}
262-
263-
// write start:token.End to builder
264-
if start < len(token.Value) {
265-
trimmedToken.WriteString(token.Value[start:len(token.Value)])
266255
}
267-
token.quotes = nil
256+
token.hasQuotes = false
268257
return trimmedToken.String()
269258
}
270259

0 commit comments

Comments
 (0)