@@ -30,7 +30,6 @@ use datafusion_expr::{
30
30
} ;
31
31
use itertools:: izip;
32
32
use regex:: Regex ;
33
- use std:: collections:: hash_map:: Entry ;
34
33
use std:: collections:: HashMap ;
35
34
use std:: sync:: { Arc , OnceLock } ;
36
35
@@ -310,12 +309,13 @@ where
310
309
Some ( regex) => regex,
311
310
} ;
312
311
313
- let pattern = compile_regex ( regex, flags_scalar) ?;
312
+ let pattern = get_pattern ( regex, flags_scalar) ?;
313
+ let re = compile_regex ( pattern) ?;
314
314
315
315
Ok ( Arc :: new ( Int64Array :: from_iter_values (
316
316
values
317
317
. iter ( )
318
- . map ( |value| count_matches ( value, & pattern , start_scalar) )
318
+ . map ( |value| count_matches ( value, & re , start_scalar) )
319
319
. collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
320
320
) ) )
321
321
}
@@ -356,15 +356,15 @@ where
356
356
Some ( regex) => regex,
357
357
} ;
358
358
359
- let pattern = compile_regex ( regex, flags_scalar) ?;
360
-
359
+ let pattern = get_pattern ( regex, flags_scalar) ?;
360
+ let re = compile_regex ( pattern ) ? ;
361
361
let start_array = start_array. unwrap ( ) ;
362
362
363
363
Ok ( Arc :: new ( Int64Array :: from_iter_values (
364
364
values
365
365
. iter ( )
366
366
. zip ( start_array. iter ( ) )
367
- . map ( |( value, start) | count_matches ( value, & pattern , start) )
367
+ . map ( |( value, start) | count_matches ( value, & re , start) )
368
368
. collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
369
369
) ) )
370
370
}
@@ -549,34 +549,37 @@ where
549
549
}
550
550
}
551
551
552
- fn compile_and_cache_regex (
553
- regex : & str ,
554
- flags : Option < & str > ,
555
- regex_cache : & mut HashMap < String , Regex > ,
556
- ) -> Result < Regex , ArrowError > {
557
- match regex_cache. entry ( regex. to_string ( ) ) {
558
- Entry :: Vacant ( entry) => {
559
- let compiled = compile_regex ( regex, flags) ?;
560
- entry. insert ( compiled. clone ( ) ) ;
561
- Ok ( compiled)
562
- }
563
- Entry :: Occupied ( entry) => Ok ( entry. get ( ) . to_owned ( ) ) ,
552
+ fn compile_and_cache_regex < ' a > (
553
+ regex : & ' a str ,
554
+ flags : Option < & ' a str > ,
555
+ regex_cache : & ' a mut HashMap < String , Regex > ,
556
+ ) -> Result < & ' a Regex , ArrowError > {
557
+ let pattern = get_pattern ( regex, flags) ?;
558
+
559
+ if regex_cache. contains_key ( & pattern) {
560
+ return Ok ( regex_cache. get ( & pattern) . unwrap ( ) ) ;
564
561
}
562
+
563
+ let re = compile_regex ( pattern. clone ( ) ) ?;
564
+ regex_cache. insert ( pattern. clone ( ) , re) ;
565
+ Ok ( regex_cache. get ( & pattern) . unwrap ( ) )
565
566
}
566
567
567
- fn compile_regex ( regex : & str , flags : Option < & str > ) -> Result < Regex , ArrowError > {
568
- let pattern = match flags {
569
- None | Some ( "" ) => regex. to_string ( ) ,
568
+ fn get_pattern ( regex : & str , flags : Option < & str > ) -> Result < String , ArrowError > {
569
+ match flags {
570
+ None | Some ( "" ) => Ok ( regex. to_string ( ) ) ,
570
571
Some ( flags) => {
571
572
if flags. contains ( "g" ) {
572
573
return Err ( ArrowError :: ComputeError (
573
574
"regexp_count() does not support global flag" . to_string ( ) ,
574
575
) ) ;
575
576
}
576
- format ! ( "(?{}){}" , flags, regex)
577
+ Ok ( format ! ( "(?{}){}" , flags, regex) )
577
578
}
578
- } ;
579
+ }
580
+ }
579
581
582
+ fn compile_regex ( pattern : String ) -> Result < Regex , ArrowError > {
580
583
Regex :: new ( & pattern) . map_err ( |_| {
581
584
ArrowError :: ComputeError ( format ! (
582
585
"Regular expression did not compile: {}" ,
0 commit comments