Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

the value zero is not stored correctly in certain phenotype uploads #5329

Merged
merged 21 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
616a858
also record 0 values in the csv format file parsing.
lukasmueller Feb 15, 2025
16da8d9
check if value is defined as 0 is also a false value, but we want to …
lukasmueller Feb 15, 2025
9cf343a
check for definedness in missing report.
lukasmueller Feb 15, 2025
ec562fb
add use strict; to Plugin Plain.pm.
lukasmueller Feb 15, 2025
3a1697a
Pheno Upload: handle blank values without removing data
dwaring87 Feb 17, 2025
494bcc8
Add backend script for finding phenotype observations of 0 to reupload
dwaring87 Feb 17, 2025
9aa0891
process store only when cvterm_row is defined.
lukasmueller Feb 17, 2025
3965dcf
remove fieldbook image file references.
lukasmueller Feb 17, 2025
4b793b1
output more diagnostic info.
lukasmueller Feb 17, 2025
d7570aa
bring back some tests that were commented out previously.
lukasmueller Feb 17, 2025
e27ff45
delete metadata associated images before deleting md_metadata.
lukasmueller Feb 17, 2025
3736abe
print error if available.
lukasmueller Feb 17, 2025
e64aa62
fix a linting issue (return undef)
lukasmueller Feb 17, 2025
e84bf4d
do not delete nd_experiment entries when deleting phenotypes.
lukasmueller Feb 18, 2025
a6851f6
StorePhenotypes: fix spacing
dwaring87 Feb 18, 2025
c86164f
Project: restore deletion of nd_experiment entries when deleting phen…
dwaring87 Feb 18, 2025
7583091
Store Phenotypes: fix query looking up phenotype and experiment ids f…
dwaring87 Feb 18, 2025
4803f51
StorePhenotypes: use temp table for finding phenotypes to delete
dwaring87 Feb 18, 2025
2aba632
Delete ND Experiments: check for no ids to delete
dwaring87 Feb 18, 2025
d7f1564
StorePhenotypes: fix typo in table name
dwaring87 Feb 18, 2025
7773686
StorePhenotypes: use temp table for saved nd_experiment_ids when dele…
dwaring87 Feb 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bin/delete_nd_experiment_entries.pl
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@ =head1 AUTHOR
close($fh);

eval {
if ( scalar(@nd_experiment_ids) > 0 ) {
my $nd_experiment_ids_string = join ",", @nd_experiment_ids;
my $q = "DELETE FROM nd_experiment WHERE nd_experiment_id IN ($nd_experiment_ids_string)";
my $h = $dbh->prepare($q);
$h->execute();
print STDERR "DELETED ".scalar(@nd_experiment_ids)." Nd Experiment Entries\n";
}
print STDERR "DELETED ".scalar(@nd_experiment_ids)." Nd Experiment Entries\n";
};

if ($@) {
Expand Down
120 changes: 120 additions & 0 deletions bin/extract_pheno_zeros.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/perl

=head1 NAME

extract_pheno_zeros.pl - find all 0 phenotype values in archived uploads and generate a CSV file to re-upload

=head1 DESCRIPTION

extract_pheno_zeros.pl -H [database host] -D [database name] -U [database uesr] -P [database pass] -s [start date YYYY-MM-DD] -o [output csv file]

Options:

-H the database host
-D the database name
-U username
-P password
-d start date YYYY-MM-DD (default = 2024-06-11)
-o output .csv file

=head1 AUTHOR

David Waring <[email protected]>

=cut

use strict;
use warnings;
use DBI;
use Try::Tiny;
use Getopt::Long;
use Data::Dumper;
use CXGN::File::Parse;

my ( $dbhost, $dbname, $username, $password, $date, $output );
GetOptions(
'H=s' => \$dbhost,
'D=s' => \$dbname,
'U=s' => \$username,
'P=s' => \$password,
's=s' => \$date,
'o=s' => \$output,
);

if ( !$dbhost || !$dbname || !$username || !$password ) {
print STDERR "ERROR: Missing either -H dbhost -D dbname -U username or -P password\n";
exit 1;
}
if ( !$output ) {
print STDERR "ERROR: Missing output .xls file\n";
exit 1;
}
if ( !$date || $date eq '' ) {
$date = "2024-06-11";
}

print STDERR "Connecting to database...\n";
my $dsn = 'dbi:Pg:database='.$dbname.";host=".$dbhost.";port=5432";
my $dbh = DBI->connect($dsn, $username, $password, { RaiseError => 1, AutoCommit=>0 });

# Get all phenotype files from the metadata since the start date
my $q = "SELECT dirname || '/' || basename
FROM metadata.md_files
LEFT JOIN metadata.md_metadata ON (md_files.metadata_id = md_metadata.metadata_id)
WHERE md_files.filetype = 'spreadsheet phenotype file' AND md_metadata.create_date > ?
ORDER BY create_date ASC;";
my $h = $dbh->prepare($q);
$h->execute($date);
$dbh->commit();

my %data;
my %traits;

# Check each file for 0s
while ( my ($file) = $h->fetchrow_array() ) {
print STDERR "==> Checking File: $file\n";
my $parser = CXGN::File::Parse->new(
file => $file,
required_columns => [ 'observationunit_name' ],
column_aliases => {
'observationunit_name' => [ 'plot_name', 'subplot_name', 'plant_name', 'observationUnitName', 'plotName', 'subplotName', 'plantName' ]
}
);
my $parsed = $parser->parse();
my $parsed_data = $parsed->{data};
my $trait_columns = $parsed->{optional_columns};

foreach my $row (@$parsed_data) {
my $ou = $row->{'observationunit_name'};
foreach my $trait (@$trait_columns) {
my $value = $row->{$trait};
if ( defined($value) && $value eq '0' ) {
print STDERR "$ou | $trait = 0\n";
$traits{$trait} = 1;
$data{$ou}{$trait} = '0';
}
}
}
}

# Generate output CSV data
my @output;
push @output, join(',', 'observationunit_name', keys %traits);
my @ous = sort keys %data;
foreach my $ou (@ous) {
my @line;
push @line, $ou;
foreach my $trait (keys %traits) {
my $value = $data{$ou}{$trait};
push @line, defined($value) ? $value : '';
}
push @output, join(',', @line);
}


# Write CSV to file
open my $fh, '>', $output or die "Cannot open output file: $!";
foreach (@output) {
print $fh "$_\n";
}
close $fh;
5 changes: 3 additions & 2 deletions lib/CXGN/File/Parse.pm
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ be treated as management factors / treatments.
use Moose;
use Try::Tiny;
use Module::Pluggable require => 1;
use Data::Dumper;

# Path to the file that is being parsed
has 'file' => (
Expand Down Expand Up @@ -391,7 +392,7 @@ sub parse {
foreach my $d (@$data) {
foreach my $c ( @{$parsed->{required_columns}} ) {
my $v = $d->{$c};
if ( !$v || $v eq '' ) {
if ( !defined($v) || $v eq '' ) {
my $r = $d->{_row};
push @{$parsed->{errors}}, "Required column $c does not have a value in row $r";
}
Expand Down Expand Up @@ -463,7 +464,7 @@ sub clean_value {
my $column_arrays = $self->column_arrays();

# trim whitespace
if ( $value && $value ne '' ) {
if ( defined($value) && $value ne '' ) {
$value =~ s/^\s+|\s+$//g;
}

Expand Down
5 changes: 4 additions & 1 deletion lib/CXGN/File/Parse/Plugin/Plain.pm
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package CXGN::File::Parse::Plugin::Plain;

use strict;

use Data::Dumper;
use CXGN::File::Parse;
use Text::CSV;

Expand Down Expand Up @@ -87,7 +90,7 @@ sub parse {
$v = $super->clean_value($v, $h);
$row_info{$h} = $v;

if ( $v && $v ne '' ) {
if ( defined($v) && $v ne '' ) {
if ( ref($v) eq 'ARRAY' ) {
if ( scalar(@$v) > 0 ) {
foreach (@$v) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ sub parse {
$cell_val = $worksheet->get_cell(0,$col)->value();
$cell_val =~ s/^\s+|\s+$//g;
}
if ($cell_val) {
if ($cell_val || $cell_val == 0) {
$header_column_info{$cell_val} = $col;
$traits_seen{$cell_val} = 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ sub parse {
if ($trait_name) {
$traits_seen{$trait_name} = 1;
my $value_string = '';
if ($columns[$col_num]){
if ($columns[$col_num] || $columns[$col_num] == 0){
$value_string = $columns[$col_num];
}
#print STDERR $value_string."\n";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ sub parse {
my $observationunit_name = $row->{'observationunit_name'};

for my $trait_name (@$trait_columns) {
my $value_string = $row->{$trait_name} || '';
my $value_string = defined($row->{$trait_name}) ? $row->{$trait_name} : '';
my $timestamp = '';
my $trait_value = '';
my $trait_value = undef;
if ($timestamp_included){
($trait_value, $timestamp) = split /,/, $value_string;
} else {
Expand Down
Loading