File tree Expand file tree Collapse file tree 2 files changed +67
-11
lines changed Expand file tree Collapse file tree 2 files changed +67
-11
lines changed Original file line number Diff line number Diff line change @@ -137,9 +137,9 @@ fun guess_and_convert_to_pdl ( (ArrayRef | Value | ColumnLike) $x,
137
137
:$strings_as_factors =false, :$test_count =1000, :$na =[qw( BAD NA) ]) {
138
138
return $x if ( $x -> $_DOES(' PDL' ) );
139
139
140
- my $ is_na = sub {
141
- length ( $_ [0] ) == 0 or List::AllUtils::any { $_ [0] eq $_ } @$na ;
142
- };
140
+ # see utils/benchmarks/ is_na.pl for why grep is used here
141
+ my @na = ( @$na , ' ' ) ;
142
+ my $is_na = sub { scalar ( grep { $_ [0] eq $_ } @na ) };
143
143
144
144
my $like_number ;
145
145
if ( !ref $x ) {
@@ -153,21 +153,20 @@ fun guess_and_convert_to_pdl ( (ArrayRef | Value | ColumnLike) $x,
153
153
@$x [ 0 .. List::AllUtils::min( $test_count - 1, $# $x ) ];
154
154
}
155
155
156
+ my $piddle ;
156
157
if ($like_number ) {
157
- my @data = map { &$is_na ($_ ) ? ' nan' : $_ } @$x ;
158
- my $piddle = pdl( \@data );
159
- $piddle -> inplace-> setnantobad;
160
- return $piddle ;
158
+ local $SIG {__WARN__ } = sub {};
159
+ $piddle = pdl($x );
161
160
}
162
161
else {
163
- my $piddle =
162
+ $piddle =
164
163
$strings_as_factors
165
164
? PDL::Factor-> new($x )
166
165
: PDL::SV-> new($x );
167
- my $is_bad = pdl( [ map { &$is_na ($_ ) } @$x ] );
168
- $piddle = $piddle -> setbadif($is_bad );
169
- return $piddle ;
170
166
}
167
+ my $isbad = pdl( [ map { &$is_na ($_ ) } @$x ] );
168
+ $piddle = $piddle -> setbadif($isbad );
169
+ return $piddle ;
171
170
}
172
171
173
172
1;
Original file line number Diff line number Diff line change
1
+ # !/usr/bin/env perl
2
+
3
+ # A "is_na" function is used in Data::Frame::Util's
4
+ # guess_and_convert_to_pdl() function.
5
+ #
6
+ # The result indicates that for a small length of @na, grep could be the
7
+ # most practical way for performance. It looks like that the overhead from
8
+ # closure and Perl operators is obvious in this case.
9
+
10
+ use 5.016;
11
+ use warnings;
12
+
13
+ use List::Util qw( any) ;
14
+ use Benchmark qw( :all) ;
15
+
16
+ my @na = ( qw( NA BAD) , ' ' );
17
+
18
+ # array of string with 100k data
19
+ my @s = (qw( foo bar baz quux) ) x 25000 ;
20
+
21
+ sub is_na_any {
22
+ any { $_ [0] eq $_ } @na ;
23
+ }
24
+
25
+ sub is_na_grep {
26
+ scalar (grep { $_ [0] eq $_ } @na );
27
+ }
28
+
29
+ my $re = qr / ^(?:NA|BAD|)$ / ;
30
+
31
+ sub is_na_regex {
32
+ $_ [0] =~ $re ;
33
+ }
34
+
35
+ sub is_na_regex2 {
36
+ # this is for comparison with above is_na_regex()
37
+ $_ [0] =~ / ^(?:NA|BAD|)$ / ;
38
+ }
39
+
40
+ cmpthese(
41
+ 100,
42
+ {
43
+ ' is_na_any' => sub {
44
+ my @x = map { is_na_any($_ ) } @s ;
45
+ },
46
+ ' is_na_grep' => sub {
47
+ my @x = map { is_na_grep($_ ) } @s ;
48
+ },
49
+ ' is_na_regex' => sub {
50
+ my @x = map { is_na_regex($_ ) } @s ;
51
+ },
52
+ ' is_na_regex2' => sub {
53
+ my @x = map { is_na_regex2($_ ) } @s ;
54
+ },
55
+ },
56
+ );
57
+
You can’t perform that action at this time.
0 commit comments