@@ -161,34 +161,22 @@ def __tregex_compile(self, pattern):
161
161
def __compile_cpython_sre (self ):
162
162
if not self .__compiled_sre_pattern :
163
163
import _cpython_sre
164
- self .__compiled_sre_pattern = _cpython_sre .compile (self ._emit ( self . pattern ) , self .flags , self .code , self .num_groups , self .groupindex , self .indexgroup )
164
+ self .__compiled_sre_pattern = _cpython_sre .compile (self .pattern , self .flags , self .code , self .num_groups , self .groupindex , self .indexgroup )
165
165
return self .__compiled_sre_pattern
166
166
167
167
168
- def _decode_string (self , string , flags = 0 ):
168
+ def _decode_pattern (self , string , flags = 0 ):
169
169
if isinstance (string , str ):
170
+ # TODO: fix this in the regex engine
171
+ pattern = string .replace (r'\"' , '"' ).replace (r"\'" , "'" )
172
+
173
+ # TODO: that's not nearly complete but should be sufficient for now
174
+ from sre_compile import SRE_FLAG_VERBOSE
175
+ if flags & SRE_FLAG_VERBOSE :
176
+ pattern = tregex_preprocess_for_verbose (pattern )
177
+ return tregex_preprocess_default (pattern )
178
+ else :
170
179
return string
171
- elif isinstance (string , bytes ):
172
- return string .decode ()
173
- elif isinstance (string , bytearray ):
174
- return string .decode ()
175
- elif isinstance (string , memoryview ):
176
- # return bytes(string).decode()
177
- raise TypeError ("'memoryview' is currently unsupported as search pattern" )
178
- raise TypeError ("invalid search pattern {!r}" .format (string ))
179
-
180
-
181
- def _decode_pattern (self , string , flags = 0 ):
182
- pattern = self ._decode_string (string , flags )
183
-
184
- # TODO: fix this in the regex engine
185
- pattern = pattern .replace (r'\"' , '"' ).replace (r"\'" , "'" )
186
-
187
- # TODO: that's not nearly complete but should be sufficient for now
188
- from sre_compile import SRE_FLAG_VERBOSE
189
- if flags & SRE_FLAG_VERBOSE :
190
- pattern = tregex_preprocess_for_verbose (pattern )
191
- return tregex_preprocess_default (pattern )
192
180
193
181
194
182
def __repr__ (self ):
@@ -210,7 +198,6 @@ def __repr__(self):
210
198
211
199
def _search (self , pattern , string , pos , endpos ):
212
200
pattern = self .__tregex_compile (pattern )
213
- string = self ._decode_string (string )
214
201
if endpos == - 1 or endpos >= len (string ):
215
202
result = tregex_call_safe (pattern .exec , string , pos )
216
203
else :
@@ -227,29 +214,33 @@ def search(self, string, pos=0, endpos=None):
227
214
return self .__compile_cpython_sre ().search (string , pos , default (endpos , maxsize ()))
228
215
229
216
def match (self , string , pos = 0 , endpos = None ):
230
- try :
231
- if not self .pattern .startswith ("^" ):
232
- return self ._search ("^" + self .pattern , string , pos , default (endpos , - 1 ))
233
- else :
234
- return self ._search (self .pattern , string , pos , default (endpos , - 1 ))
235
- except RuntimeError :
236
- return self .__compile_cpython_sre ().match (string , pos , default (endpos , maxsize ()))
217
+ pattern = self .pattern
218
+ if isinstance (pattern , str ):
219
+ try :
220
+ if not pattern .startswith ("^" ):
221
+ return self ._search ("^" + pattern , string , pos , default (endpos , - 1 ))
222
+ else :
223
+ return self ._search (pattern , string , pos , default (endpos , - 1 ))
224
+ except RuntimeError :
225
+ pass
226
+ return self .__compile_cpython_sre ().match (string , pos , default (endpos , maxsize ()))
237
227
238
228
def fullmatch (self , string , pos = 0 , endpos = None ):
239
- try :
240
- pattern = self .pattern
241
- if not pattern .startswith ("^" ):
242
- pattern = "^" + pattern
243
- if not pattern .endswith ("$" ):
244
- pattern = pattern + "$"
245
- return self ._search (pattern , string , pos , default (endpos , - 1 ))
246
- except RuntimeError :
247
- return self .__compile_cpython_sre ().fullmatch (string , pos , default (endpos , maxsize ()))
229
+ pattern = self .pattern
230
+ if isinstance (pattern , str ):
231
+ try :
232
+ if not pattern .startswith ("^" ):
233
+ pattern = "^" + pattern
234
+ if not pattern .endswith ("$" ):
235
+ pattern = pattern + "$"
236
+ return self ._search (pattern , string , pos , default (endpos , - 1 ))
237
+ except RuntimeError :
238
+ pass
239
+ return self .__compile_cpython_sre ().fullmatch (string , pos , default (endpos , maxsize ()))
248
240
249
241
def findall (self , string , pos = 0 , endpos = - 1 ):
250
242
try :
251
243
pattern = self .__tregex_compile (self .pattern )
252
- string = self ._decode_string (string )
253
244
if endpos > len (string ):
254
245
endpos = len (string )
255
246
elif endpos < 0 :
@@ -281,9 +272,9 @@ def group(match_result, group_nr, string):
281
272
return string [group_start :group_end ]
282
273
283
274
n = len (repl )
284
- result = self . _emit ( "" )
275
+ result = ""
285
276
start = 0
286
- backslash = self . _emit ( '\\ ' )
277
+ backslash = '\\ '
287
278
pos = repl .find (backslash , start )
288
279
while pos != - 1 and start < n :
289
280
if pos + 1 < n :
@@ -292,15 +283,15 @@ def group(match_result, group_nr, string):
292
283
group_str = group (match_result , group_nr , string )
293
284
if group_str is None :
294
285
raise ValueError ("invalid group reference %s at position %s" % (group_nr , pos ))
295
- result += repl [start :pos ] + self . _emit ( group_str )
286
+ result += repl [start :pos ] + group_str
296
287
start = pos + 2
297
288
elif repl [pos + 1 ] == 'g' :
298
289
group_ref , group_ref_end , digits_only = self .__extract_groupname (repl , pos + 2 )
299
290
if group_ref :
300
291
group_str = group (match_result , int (group_ref ) if digits_only else pattern .groups [group_ref ], string )
301
292
if group_str is None :
302
293
raise ValueError ("invalid group reference %s at position %s" % (group_ref , pos ))
303
- result += repl [start :pos ] + self . _emit ( group_str )
294
+ result += repl [start :pos ] + group_str
304
295
start = group_ref_end + 1
305
296
elif repl [pos + 1 ] == backslash :
306
297
result += repl [start :pos ] + backslash
@@ -331,40 +322,33 @@ def sub(self, repl, string, count=0):
331
322
n = 0
332
323
try :
333
324
pattern = self .__tregex_compile (self .pattern )
334
- decoded_string = self ._decode_string (string )
335
325
result = []
336
326
pos = 0
337
327
is_string_rep = isinstance (repl , str ) or isinstance (repl , bytes ) or isinstance (repl , bytearray )
338
328
if is_string_rep :
339
329
repl = _process_escape_sequences (repl )
340
330
progress = True
341
- while (count == 0 or n < count ) and pos <= len (decoded_string ) and progress :
342
- match_result = tregex_call_safe (pattern .exec , decoded_string , pos )
331
+ while (count == 0 or n < count ) and pos <= len (string ) and progress :
332
+ match_result = tregex_call_safe (pattern .exec , string , pos )
343
333
if not match_result .isMatch :
344
334
break
345
335
n += 1
346
336
start = match_result .start [0 ]
347
337
end = match_result .end [0 ]
348
- result .append (self . _emit ( decoded_string [pos :start ]) )
338
+ result .append (string [pos :start ])
349
339
if is_string_rep :
350
- result .append (self .__replace_groups (repl , decoded_string , match_result , pattern ))
340
+ result .append (self .__replace_groups (repl , string , match_result , pattern ))
351
341
else :
352
342
_srematch = SRE_Match (self , pos , - 1 , match_result )
353
343
_repl = repl (_srematch )
354
344
result .append (_repl )
355
345
pos = end
356
346
progress = (start != end )
357
- result .append (self . _emit ( decoded_string [pos :]) )
358
- return self . _emit ( "" ) .join (result )
347
+ result .append (string [pos :])
348
+ return "" .join (result )
359
349
except BaseException :
360
350
return self .__compile_cpython_sre ().sub (repl , string , count )
361
351
362
- def _emit (self , str_like_obj ):
363
- assert isinstance (str_like_obj , str ) or isinstance (str_like_obj , bytes )
364
- if self .__was_bytes != isinstance (str_like_obj , bytes ):
365
- return str_like_obj .encode ()
366
- return str_like_obj
367
-
368
352
369
353
compile = SRE_Pattern
370
354
0 commit comments