Skip to content

Commit 402376f

Browse files
authored
Merge branch 'master' into fmt
2 parents 8852f25 + d4b0fe7 commit 402376f

File tree

7 files changed

+386
-85
lines changed

7 files changed

+386
-85
lines changed

src/m3ninx/index/regexp.go

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
package index
2222

2323
import (
24-
"context"
2524
"fmt"
2625
re "regexp"
2726
"regexp/syntax"
@@ -37,7 +36,6 @@ var (
3736
// dotStartCompiledRegex is a CompileRegex that matches any input.
3837
// NB: It can be accessed through DotStartCompiledRegex().
3938
dotStarCompiledRegex CompiledRegex
40-
cacheContext = context.Background()
4139
)
4240

4341
func init() {
@@ -146,18 +144,15 @@ func CompileRegex(r []byte) (CompiledRegex, error) {
146144
// Issue (a): Vellum does not allow regexps which use characters '^', or '$'.
147145
// To address this issue, we strip these characters from appropriate locations in the parsed syntax.Regexp
148146
// for Vellum's RE.
149-
vellumRe, err := ensureRegexpUnanchored(reAst)
147+
vellumRe, err := EnsureRegexpUnanchored(reAst)
150148
if err != nil {
151149
return CompiledRegex{}, fmt.Errorf("unable to create FST re: %v", err)
152150
}
153151

154152
// Issue (b): Vellum treats every regular expression as anchored, where as the map-backed segment does not.
155153
// To address this issue, we ensure that every incoming regular expression is modified to be anchored
156154
// when querying the map-backed segment, and isn't anchored when querying Vellum's RE.
157-
simpleRe, err := ensureRegexpAnchored(vellumRe)
158-
if err != nil {
159-
return CompiledRegex{}, fmt.Errorf("unable to create map re: %v", err)
160-
}
155+
simpleRe := EnsureRegexpAnchored(vellumRe)
161156

162157
simpleRE, err := re.Compile(simpleRe.String())
163158
if err != nil {
@@ -191,10 +186,10 @@ func parseRegexp(re string) (*syntax.Regexp, error) {
191186
return syntax.Parse(re, syntax.Perl)
192187
}
193188

194-
// ensureRegexpAnchored adds '^' and '$' characters to appropriate locations in the parsed syntax.Regexp,
195-
// to ensure every input regular expression is converted to it's equivalent anchored regular expression.
189+
// EnsureRegexpAnchored adds '^' and '$' characters to appropriate locations in the parsed syntax.Regexp,
190+
// to ensure every input regular expression is converted to its equivalent anchored regular expression.
196191
// NB: assumes input regexp AST is un-anchored.
197-
func ensureRegexpAnchored(unanchoredRegexp *syntax.Regexp) (*syntax.Regexp, error) {
192+
func EnsureRegexpAnchored(unanchoredRegexp *syntax.Regexp) *syntax.Regexp {
198193
ast := &syntax.Regexp{
199194
Op: syntax.OpConcat,
200195
Flags: syntax.Perl,
@@ -210,13 +205,13 @@ func ensureRegexpAnchored(unanchoredRegexp *syntax.Regexp) (*syntax.Regexp, erro
210205
},
211206
},
212207
}
213-
return simplify(ast.Simplify()), nil
208+
return simplify(ast.Simplify())
214209
}
215210

216-
// ensureRegexpUnanchored strips '^' and '$' characters from appropriate locations in the parsed syntax.Regexp,
217-
// to ensure every input regular expression is converted to it's equivalent un-anchored regular expression
211+
// EnsureRegexpUnanchored strips '^' and '$' characters from appropriate locations in the parsed syntax.Regexp,
212+
// to ensure every input regular expression is converted to its equivalent un-anchored regular expression
218213
// assuming the entire input is matched.
219-
func ensureRegexpUnanchored(parsed *syntax.Regexp) (*syntax.Regexp, error) {
214+
func EnsureRegexpUnanchored(parsed *syntax.Regexp) (*syntax.Regexp, error) {
220215
r, _, err := ensureRegexpUnanchoredHelper(parsed, true, true)
221216
if err != nil {
222217
return nil, err

src/m3ninx/index/regexp_prop_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,9 @@ func TestRegexpCompilationProperty(t *testing.T) {
7777
func compileRegexp(x string, t *testing.T) *regexp.Regexp {
7878
ast, err := parseRegexp(x)
7979
require.NoError(t, err)
80-
astp, err := ensureRegexpUnanchored(ast)
81-
require.NoError(t, err)
82-
ast2p, err := ensureRegexpAnchored(astp)
80+
astp, err := EnsureRegexpUnanchored(ast)
8381
require.NoError(t, err)
82+
ast2p := EnsureRegexpAnchored(astp)
8483
re, err := regexp.Compile(ast2p.String())
8584
require.NoError(t, err)
8685
return re

src/m3ninx/index/regexp_test.go

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -57,162 +57,162 @@ func TestEnsureRegexpUnachoredee(t *testing.T) {
5757

5858
func TestEnsureRegexpUnachored(t *testing.T) {
5959
testCases := []testCase{
60-
testCase{
60+
{
6161
name: "naked ^",
6262
input: "^",
6363
expectedOutput: "emp{}",
6464
},
65-
testCase{
65+
{
6666
name: "naked $",
6767
input: "$",
6868
expectedOutput: "emp{}",
6969
},
70-
testCase{
70+
{
7171
name: "empty string ^$",
7272
input: "^$",
7373
expectedOutput: "cat{}",
7474
},
75-
testCase{
75+
{
7676
name: "invalid naked concat ^$",
7777
input: "$^",
7878
expectedOutput: "cat{eot{}bot{}}",
7979
},
80-
testCase{
80+
{
8181
name: "simple case of ^",
8282
input: "^abc",
8383
expectedOutput: "str{abc}",
8484
},
85-
testCase{
85+
{
8686
name: "simple case of $",
8787
input: "abc$",
8888
expectedOutput: "str{abc}",
8989
},
90-
testCase{
90+
{
9191
name: "simple case of both ^ & $",
9292
input: "^abc$",
9393
expectedOutput: "str{abc}",
9494
},
95-
testCase{
95+
{
9696
name: "weird case of internal ^",
9797
input: "^a^bc$",
9898
expectedOutput: "cat{lit{a}bot{}str{bc}}",
9999
},
100-
testCase{
100+
{
101101
name: "weird case of internal $",
102102
input: "^a$bc$",
103103
expectedOutput: "cat{lit{a}eot{}str{bc}}",
104104
},
105-
testCase{
105+
{
106106
name: "alternate of sub expressions with only legal ^ and $",
107107
input: "(?:^abc$)|(?:^xyz$)",
108108
expectedOutput: "alt{str{abc}str{xyz}}",
109109
},
110-
testCase{
110+
{
111111
name: "concat of sub expressions with only legal ^ and $",
112112
input: "(^abc$)(?:^xyz$)",
113113
expectedOutput: "cat{cap{cat{str{abc}eot{}}}bot{}str{xyz}}",
114114
},
115-
testCase{
115+
{
116116
name: "alternate of sub expressions with illegal ^ and $",
117117
input: "(?:^a$bc$)|(?:^xyz$)",
118118
expectedOutput: "alt{cat{lit{a}eot{}str{bc}}str{xyz}}",
119119
},
120-
testCase{
120+
{
121121
name: "concat of sub expressions with illegal ^ and $",
122122
input: "(?:^a$bc$)(?:^xyz$)",
123123
expectedOutput: "cat{lit{a}eot{}str{bc}eot{}bot{}str{xyz}}",
124124
},
125-
testCase{
125+
{
126126
name: "question mark case both boundaries success",
127127
input: "(?:^abc$)?",
128128
expectedOutput: "que{str{abc}}",
129129
},
130-
testCase{
130+
{
131131
name: "question mark case only ^",
132132
input: "(?:^abc)?",
133133
expectedOutput: "que{str{abc}}",
134134
},
135-
testCase{
135+
{
136136
name: "question mark case only $",
137137
input: "(?:abc$)?",
138138
expectedOutput: "que{str{abc}}",
139139
},
140-
testCase{
140+
{
141141
name: "question concat case $",
142142
input: "abc$?",
143143
expectedOutput: "str{abc}",
144144
},
145-
testCase{
145+
{
146146
name: "star mark case both boundaries success",
147147
input: "(?:^abc$)*",
148148
expectedOutput: "cat{que{str{abc}}star{cat{bot{}str{abc}eot{}}}}",
149149
},
150-
testCase{
150+
{
151151
name: "star mark case only ^",
152152
input: "(?:^abc)*",
153153
expectedOutput: "cat{que{str{abc}}star{cat{bot{}str{abc}}}}",
154154
},
155-
testCase{
155+
{
156156
name: "star mark case only $",
157157
input: "(?:abc$)*",
158158
expectedOutput: "cat{que{str{abc}}star{cat{str{abc}eot{}}}}",
159159
},
160-
testCase{
160+
{
161161
name: "star concat case $",
162162
input: "abc$*",
163163
expectedOutput: "cat{str{abc}star{eot{}}}",
164164
},
165-
testCase{
165+
{
166166
name: "star concat case ^",
167167
input: "^*abc",
168168
expectedOutput: "cat{star{bot{}}str{abc}}",
169169
},
170-
testCase{
170+
{
171171
name: "plus mark case both boundaries success",
172172
input: "(?:^abc$)+",
173173
expectedOutput: "cat{str{abc}star{cat{bot{}str{abc}eot{}}}}",
174174
},
175-
testCase{
175+
{
176176
name: "plus mark case with capturing group",
177177
input: "(^abc$)+",
178178
expectedOutput: "cat{cap{str{abc}}star{cap{cat{bot{}str{abc}eot{}}}}}",
179179
},
180-
testCase{
180+
{
181181
name: "plus mark case only ^",
182182
input: "(?:^abc)+",
183183
expectedOutput: "cat{str{abc}star{cat{bot{}str{abc}}}}",
184184
},
185-
testCase{
185+
{
186186
name: "plus mark case only $",
187187
input: "(?:abc$)+",
188188
expectedOutput: "cat{str{abc}star{cat{str{abc}eot{}}}}",
189189
},
190-
testCase{
190+
{
191191
name: "plus concat case $",
192192
input: "abc$+",
193193
expectedOutput: "cat{str{abc}star{eot{}}}",
194194
},
195-
testCase{
195+
{
196196
name: "plus concat case ^",
197197
input: "^+abc",
198198
expectedOutput: "cat{star{bot{}}str{abc}}",
199199
},
200-
testCase{
200+
{
201201
name: "repeat case both boundaries success",
202202
input: "(?:^abc$){3,4}",
203203
expectedOutput: "cat{str{abc}rep{2,3 cat{bot{}str{abc}eot{}}}}",
204204
},
205-
testCase{
205+
{
206206
name: "repeat case unbounded max",
207207
input: "(?:^abc$){3,}",
208208
expectedOutput: "cat{str{abc}rep{2,-1 cat{bot{}str{abc}eot{}}}}",
209209
},
210-
testCase{
210+
{
211211
name: "repeat case unbounded max with 1 min",
212212
input: "(?:^abc$){1,2}",
213213
expectedOutput: "cat{str{abc}rep{0,1 cat{bot{}str{abc}eot{}}}}",
214214
},
215-
testCase{
215+
{
216216
name: "repeat case unbounded max with 0 min",
217217
input: "(?:^abc$){0,2}",
218218
expectedOutput: "rep{0,2 cat{bot{}str{abc}eot{}}}",
@@ -222,7 +222,7 @@ func TestEnsureRegexpUnachored(t *testing.T) {
222222
t.Run(tc.name, func(t *testing.T) {
223223
re, err := parseRegexp(tc.input)
224224
require.NoError(t, err)
225-
parsed, err := ensureRegexpUnanchored(re)
225+
parsed, err := EnsureRegexpUnanchored(re)
226226
require.NoError(t, err)
227227
assert.Equal(t, tc.expectedOutput, dumpRegexp(parsed))
228228
})
@@ -231,57 +231,57 @@ func TestEnsureRegexpUnachored(t *testing.T) {
231231

232232
func TestEnsureRegexpAnchored(t *testing.T) {
233233
testCases := []testCase{
234-
testCase{
234+
{
235235
name: "naked ^",
236236
input: "(?:)",
237237
expectedOutput: "cat{bot{}eot{\\z}}",
238238
},
239-
testCase{
239+
{
240240
name: "invalid naked concat ^$",
241241
input: "$^",
242242
expectedOutput: "cat{bot{}eot{}bot{}eot{\\z}}",
243243
},
244-
testCase{
244+
{
245245
name: "simple case of literal",
246246
input: "abc",
247247
expectedOutput: "cat{bot{}str{abc}eot{\\z}}",
248248
},
249-
testCase{
249+
{
250250
name: "weird case of internal ^",
251251
input: "a^bc",
252252
expectedOutput: "cat{bot{}lit{a}bot{}str{bc}eot{\\z}}",
253253
},
254-
testCase{
254+
{
255255
name: "weird case of internal $",
256256
input: "a$bc",
257257
expectedOutput: "cat{bot{}lit{a}eot{}str{bc}eot{\\z}}",
258258
},
259-
testCase{
259+
{
260260
name: "alternate of sub expressions with only legal ^ and $",
261261
input: "abc|xyz",
262262
expectedOutput: "cat{bot{}alt{str{abc}str{xyz}}eot{\\z}}",
263263
},
264-
testCase{
264+
{
265265
name: "concat of sub expressions with only legal ^ and $",
266266
input: "(?:abc)(?:xyz)",
267267
expectedOutput: "cat{bot{}str{abcxyz}eot{\\z}}",
268268
},
269-
testCase{
269+
{
270270
name: "question mark case both boundaries success",
271271
input: "(?:abc)?",
272272
expectedOutput: "cat{bot{}que{str{abc}}eot{\\z}}",
273273
},
274-
testCase{
274+
{
275275
name: "star mark case both boundaries success",
276276
input: "(?:abc)*",
277277
expectedOutput: "cat{bot{}star{str{abc}}eot{\\z}}",
278278
},
279-
testCase{
279+
{
280280
name: "plus mark case both boundaries success",
281281
input: "(?:abc)+",
282282
expectedOutput: "cat{bot{}plus{str{abc}}eot{\\z}}",
283283
},
284-
testCase{
284+
{
285285
name: "repeat case both boundaries success",
286286
input: "(?:abc){3,4}",
287287
expectedOutput: "cat{bot{}str{abc}str{abc}str{abc}que{str{abc}}eot{\\z}}",
@@ -291,8 +291,7 @@ func TestEnsureRegexpAnchored(t *testing.T) {
291291
t.Run(tc.name, func(t *testing.T) {
292292
re, err := parseRegexp(tc.input)
293293
require.NoError(t, err)
294-
parsed, err := ensureRegexpAnchored(re)
295-
require.NoError(t, err)
294+
parsed := EnsureRegexpAnchored(re)
296295
assert.Equal(t, tc.expectedOutput, dumpRegexp(parsed))
297296
})
298297
}

0 commit comments

Comments
 (0)