Skip to content

Commit 6bf00ce

Browse files
committed
improve read_csv()'s dtype parameter
1 parent 287a820 commit 6bf00ce

File tree

4 files changed

+49
-37
lines changed

4 files changed

+49
-37
lines changed

lib/Data/Frame/Examples.pm

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,11 @@ my %data_setup = (
4040
return $df;
4141
}
4242
},
43-
economics => { params => { col_types => { date => 'PDL::DateTime' } } },
44-
economics_long =>
45-
{ params => { col_types => { date => 'PDL::DateTime' } } },
46-
mpg => {},
47-
mtcars => {},
48-
txhousing => {},
43+
economics => { params => { dtype => { date => 'datetime' } } },
44+
economics_long => { params => { dtype => { date => 'datetime' } } },
45+
mpg => {},
46+
mtcars => {},
47+
txhousing => {},
4948
);
5049
my @data_names = sort keys %data_setup;
5150

lib/Data/Frame/IO/CSV.pm

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,18 @@ use Package::Stash;
1818
use Ref::Util qw(is_plain_arrayref is_plain_hashref);
1919
use Scalar::Util qw(openhandle looks_like_number);
2020
use Type::Params;
21-
use Types::Standard qw(Any ArrayRef CodeRef HashRef Maybe Str);
21+
use Types::Standard qw(Any ArrayRef CodeRef Enum HashRef Map Maybe Str);
2222
use Types::PDL qw(Piddle);
2323
use Text::CSV;
2424

2525
use Data::Frame::Util qw(guess_and_convert_to_pdl);
26+
use Data::Frame::Types qw(DataType);
2627

2728
=method from_csv
2829
2930
from_csv($file, :$header=true, :$sep=',', :$quote='"',
3031
:$na=[qw(NA BAD)], :$col_names=undef, :$row_names=undef,
31-
HashRef :$col_types={},
32+
Map[Str, DataType] :$dtype={},
3233
:$strings_as_factors=false)
3334
3435
Create a data frame object from a CSV file. For example,
@@ -40,17 +41,17 @@ Some of the parameters are explained below,
4041
=for :list
4142
* C<$file> can be a file name string, a Path::Tiny object, or an opened file
4243
handle.
43-
* C<$col_types> is a hashref associating column names to their types. Types
44+
* C<$dtype> is a hashref associating column names to their types. Types
4445
can be the PDL type names like C<"long">, C<"double">, or names of some PDL's
4546
derived class like C<"PDL::SV">, C<"PDL::Factor">, C<"PDL::DateTime">. If a
46-
column is not specified in C<$col_types>, its type would be automatically
47+
column is not specified in C<$dtype>, its type would be automatically
4748
decided.
4849
4950
=cut
5051

5152
classmethod from_csv ($file, :$header=true, :$sep=",", :$quote='"',
5253
:$na=[qw(NA BAD)], :$col_names=undef, :$row_names=undef,
53-
HashRef :$col_types={},
54+
Map[Str, DataType] :$dtype={},
5455
:$strings_as_factors=false
5556
) {
5657
state $check = Type::Params::compile(
@@ -134,32 +135,32 @@ classmethod from_csv ($file, :$header=true, :$sep=",", :$quote='"',
134135
}
135136
}
136137

137-
my $pdl_types = $class->_pdl_types;
138+
state $additional_type_to_piddle = {
139+
datetime => sub { PDL::DateTime->new_from_datetime($_[0]) },
140+
factor => sub { PDL::Factor->new($_[0]) },
141+
logical => sub { PDL::Logical->new($_[0]) },
142+
};
138143
my $package_pdl_core = Package::Stash->new('PDL::Core');
139144
my $to_piddle = sub {
140145
my ($name) = @_;
141146
my $x = $columns{$name};
142147

143-
if (my $col_type = $col_types->{$name}) {
144-
if (elem($col_type, $pdl_types)) {
145-
my $f = $package_pdl_core->get_symbol("&$col_type");
146-
return $f->($x) if $f;
147-
}
148-
if ($col_type =~ /^PDL::(?:Factor|SV|DateTime)$/) {
149-
if ($col_type eq 'PDL::DateTime') {
150-
return $col_type->new_from_datetime($x);
151-
} else {
152-
return $col_type->new($x);
153-
}
148+
if ( my $type = $dtype->{$name} ) {
149+
my $f_new = $additional_type_to_piddle->{$type}
150+
// $package_pdl_core->get_symbol("&$type");
151+
if ($f_new) {
152+
return $f_new->($x);
154153
}
155-
156-
die "Invalid column type '$col_type'";
157-
} else {
154+
else {
155+
die "Invalid data type '$type'";
156+
}
157+
}
158+
else {
158159
return guess_and_convert_to_pdl(
159160
$x,
160161
na => $na,
161162
strings_as_factors => $strings_as_factors
162-
);
163+
);
163164
}
164165
};
165166

@@ -222,12 +223,6 @@ method to_csv ($file, :$sep=',', :$quote='"', :$na='NA',
222223
}
223224
}
224225

225-
classmethod _pdl_types () {
226-
state $types = [ map { PDL::Types::typefld( $_, 'ppforcetype' ); }
227-
PDL::Types::typesrtkeys() ];
228-
return $types;
229-
}
230-
231226
1;
232227

233228
__END__

lib/Data/Frame/Types.pm

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use warnings;
88
use Type::Library -base, -declare => qw(
99
DataFrame
1010
Indexer
11+
DataType
1112
Column ColumnLike
1213
IndexerFromLabels IndexerFromIndices
1314
);
@@ -23,6 +24,13 @@ declare Indexer, as ConsumerOf ["Data::Frame::Indexer::Role"];
2324
declare ColumnLike, as ConsumerOf['PDL'], where { $_->ndims <= 1 };
2425
declare Column, as ColumnLike;
2526

27+
declare DataType, as Enum [
28+
qw(
29+
ushort long indx longlong float double
30+
string factor logical datetime
31+
)
32+
];
33+
2634
declare_coercion "IndexerFromLabels", to_type Indexer, from Any, via {
2735
require Data::Frame::Indexer;
2836
Data::Frame::Indexer::indexer_s($_);
@@ -45,9 +53,16 @@ Types:
4553
=for :list
4654
* DataFrame
4755
* Indexer
48-
* ColumnLike: This is basically piddle of 0D and 1D.
49-
* Column: Now it's same as ColumnLike, but will likely evolve into a
50-
dedicated type in future.
56+
* ColumnLike
57+
This is basically piddle of 0D and 1D.
58+
* Column
59+
Now it's same as ColumnLike, but will likely evolve into a dedicated type
60+
in future.
61+
* DataType
62+
One of the PDL types
63+
C<"ushort">, C<"long">, C<"indx">, C<"longlong">, C<"float">, C<"double">,
64+
or C<"string"> (PDL::SV), C<"factor"> (PDL::Factor),
65+
C<"logical"> (PDL::Logical), C<"datetime"> (PDL::DateTime).
5166
5267
Coercions:
5368
=for :list

t/25-types.t

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ use Test2::V0;
66

77
use Data::Frame::Types qw(:all);
88

9-
isa_ok(DataFrame, ['Type::Tiny'], 'DataFrame type');
9+
isa_ok( DataFrame, ['Type::Tiny'], 'DataFrame type' );
10+
11+
isa_ok( DataType, ['Type::Tiny'], 'DataType type' );
12+
ok( DataType->validate( { "date" => 'datetime' } ) );
1013

1114
done_testing;

0 commit comments

Comments
 (0)