Skip to content

Commit 74020ed

Browse files
committed
updated some distance cutoff defaults. added pair filtering by 3D distance. added average distance measure for preprocessed data
1 parent 67a72a6 commit 74020ed

File tree

9 files changed

+112
-41
lines changed

9 files changed

+112
-41
lines changed
8.27 MB
Binary file not shown.

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Usage
77
-----
88

99
Program: HotSpot3D - 3D mutation proximity analysis program.
10-
Version: V0.5.5
10+
Version: V0.6.0
1111
Author: Beifang Niu, John Wallis, Adam D Scott, & Sohini Sengupta
1212

1313
Usage: hotspot3d <command> [options]

bin/hotspot3d

+22-19
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use strict;
99
use warnings;
1010

11-
our $VERSION = 'V0.5.5';
11+
our $VERSION = 'V0.6.0';
1212

1313
use Carp;
1414
use FileHandle;
@@ -32,34 +32,36 @@ use TGI::Mutpro::Preprocess::Anno;
3232
use TGI::Mutpro::Preprocess::Cosmic;
3333
use TGI::Mutpro::Preprocess::Prior;
3434
use TGI::Mutpro::Preprocess::Homolog;
35+
#use TGI::Mutpro::Preprocess::AllPreprocess;
3536

3637
my $subCmd = shift;
3738
## Add module option here
38-
my %cmds = map{ ($_, 1) } qw( search post visual cluster sigclus summary drugport uppro calpro calroi statis anno trans homo cosmic prior help );
39+
my %cmds = map{ ($_, 1) } qw( search post visual cluster sigclus summary drugport uppro calpro calroi statis anno trans homo cosmic prior prep help );
3940
unless (defined $subCmd) { die help_text(); };
4041
unless (exists $cmds{$subCmd}) {
4142
warn ' Please give valid sub command ! ', "\n";
4243
die help_text();
4344
}
4445
SWITCH:{
4546
## Add module action here
46-
$subCmd eq 'search' && do { TGI::Mutpro::Main::Proximity->new(); last SWITCH; };
47-
$subCmd eq 'post' && do { TGI::Mutpro::Main::Post->new(); last SWITCH; };
48-
$subCmd eq 'visual' && do { TGI::Mutpro::Main::Visual->new(); last SWITCH; };
49-
$subCmd eq 'cluster' && do { TGI::Mutpro::Main::Cluster->new(); last SWITCH; };
50-
$subCmd eq 'sigclus' && do { TGI::Mutpro::Main::Significance->new(); last SWITCH; };
51-
$subCmd eq 'summary' && do { TGI::Mutpro::Main::Summary->new(); last SWITCH; };
52-
$subCmd eq 'drugport' && do { TGI::Mutpro::Preprocess::Drugport->new(); last SWITCH; };
53-
$subCmd eq 'uppro' && do { TGI::Mutpro::Preprocess::Uppro->new(); last SWITCH; };
54-
$subCmd eq 'calpro' && do { TGI::Mutpro::Preprocess::Calpro->new(); last SWITCH; };
55-
$subCmd eq 'calroi' && do { TGI::Mutpro::Preprocess::Calroi->new(); last SWITCH; };
56-
$subCmd eq 'statis' && do { TGI::Mutpro::Preprocess::Statis->new(); last SWITCH; };
57-
$subCmd eq 'anno' && do { TGI::Mutpro::Preprocess::Anno->new(); last SWITCH; };
58-
$subCmd eq 'trans' && do { TGI::Mutpro::Preprocess::Trans->new(); last SWITCH; };
59-
$subCmd eq 'homo' && do { TGI::Mutpro::Preprocess::Homolog->new(); last SWITCH; };
60-
$subCmd eq 'cosmic' && do { TGI::Mutpro::Preprocess::Cosmic->new(); last SWITCH; };
61-
$subCmd eq 'prior' && do { TGI::Mutpro::Preprocess::Prior->new(); last SWITCH; };
62-
$subCmd eq 'help' && do { die help_text(); last SWITCH; };
47+
$subCmd eq 'search' && do { TGI::Mutpro::Main::Proximity->new(); last SWITCH; };
48+
$subCmd eq 'post' && do { TGI::Mutpro::Main::Post->new(); last SWITCH; };
49+
$subCmd eq 'visual' && do { TGI::Mutpro::Main::Visual->new(); last SWITCH; };
50+
$subCmd eq 'cluster' && do { TGI::Mutpro::Main::Cluster->new(); last SWITCH; };
51+
$subCmd eq 'sigclus' && do { TGI::Mutpro::Main::Significance->new(); last SWITCH; };
52+
$subCmd eq 'summary' && do { TGI::Mutpro::Main::Summary->new(); last SWITCH; };
53+
$subCmd eq 'drugport' && do { TGI::Mutpro::Preprocess::Drugport->new(); last SWITCH; };
54+
$subCmd eq 'uppro' && do { TGI::Mutpro::Preprocess::Uppro->new(); last SWITCH; };
55+
$subCmd eq 'calpro' && do { TGI::Mutpro::Preprocess::Calpro->new(); last SWITCH; };
56+
$subCmd eq 'calroi' && do { TGI::Mutpro::Preprocess::Calroi->new(); last SWITCH; };
57+
$subCmd eq 'statis' && do { TGI::Mutpro::Preprocess::Statis->new(); last SWITCH; };
58+
$subCmd eq 'anno' && do { TGI::Mutpro::Preprocess::Anno->new(); last SWITCH; };
59+
$subCmd eq 'trans' && do { TGI::Mutpro::Preprocess::Trans->new(); last SWITCH; };
60+
$subCmd eq 'homo' && do { TGI::Mutpro::Preprocess::Homolog->new(); last SWITCH; };
61+
$subCmd eq 'cosmic' && do { TGI::Mutpro::Preprocess::Cosmic->new(); last SWITCH; };
62+
$subCmd eq 'prior' && do { TGI::Mutpro::Preprocess::Prior->new(); last SWITCH; };
63+
# $subCmd eq 'prep' && do { TGI::Mutpro::Preprocess::AllPreprocess->new(); last SWITCH; };
64+
$subCmd eq 'help' && do { die help_text(); last SWITCH; };
6365
}
6466
sub help_text {
6567
## Add module help here
@@ -71,6 +73,7 @@ Version: $VERSION
7173
Usage: hotspot3d <command> [options]
7274
7375
Preprocessing
76+
prep -- Preprocessing steps 1-7
7477
7578
drugport -- 0) Parse drugport database (OPTIONAL)
7679
uppro -- 1) Update proximity files

dist.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = HotSpot3D
22
author = Beifang Niu, John Wallis, Adam D Scott, & Sohini Sengupta from McDonnell Genome Institute of Washington University at St. Louis
3-
version = 0.5.5
3+
version = 0.6.0
44
license = Perl_5
55
copyright_holder = McDonnell Genome Institute at Washington University
66
copyright_year = 2013

lib/TGI/Mutpro/Main/Cluster.pm

+55-8
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ use Data::Dumper;
2727
my $WEIGHT = "weight";
2828
my $RECURRENCE = "recurrence";
2929
my $UNIQUE = "unique";
30+
my $PVALUEDEFAULT = 0.05;
31+
my $DISTANCEDEFAULT = 10;
32+
my $MAXDISTANCE = 100;
3033

3134
sub new {
3235
my $class = shift;
@@ -35,7 +38,8 @@ sub new {
3538
$this->{'collapsed_file'} = '3D_Proximity.pairwise.singleprotein.collapsed';
3639
$this->{'drug_clean_file'} = undef;
3740
$this->{'output_prefix'} = undef;
38-
$this->{'p_value_cutoff'} = 0.05;
41+
$this->{'p_value_cutoff'} = undef;
42+
$this->{'3d_distance_cutoff'} = undef;
3943
$this->{'linear_cutoff'} = 0;
4044
$this->{'max_radius'} = 10;
4145
$this->{'vertex_type'} = $RECURRENCE;
@@ -58,6 +62,7 @@ sub process {
5862
'collapsed-file=s' => \$this->{'collapsed_file'},
5963
'drug-clean-file=s' => \$this->{'drug_clean_file'},
6064
'p-value-cutoff=f' => \$this->{'p_value_cutoff'},
65+
'3d-distance-cutoff=f' => \$this->{'3d_distance_cutoff'},
6166
'linear-cutoff=f' => \$this->{'linear_cutoff'},
6267
'max-radius=f' => \$this->{'max_radius'},
6368
'vertex-type=s' => \$this->{'vertex_type'},
@@ -69,6 +74,19 @@ sub process {
6974
);
7075
if ( $help ) { print STDERR help_text(); exit 0; }
7176
unless( $options ) { die $this->help_text(); }
77+
if ( not defined $this->{'p_value_cutoff'} ) {
78+
if ( not defined $this->{'3d_distance_cutoff'} ) {
79+
warn "HotSpot3D::Cluster warning: no pair distance limit given, setting to default p-value cutoff = 0.05\n";
80+
$this->{'p_value_cutoff'} = $PVALUEDEFAULT;
81+
$this->{'3d_distance_cutoff'} = $MAXDISTANCE;
82+
} else {
83+
$this->{'p_value_cutoff'} = 1;
84+
}
85+
} else {
86+
if ( not defined $this->{'3d_distance_cutoff'} ) {
87+
$this->{'3d_distance_cutoff'} = $MAXDISTANCE;
88+
}
89+
}
7290
if ( ( not defined $this->{'collapsed_file'} ) and ( not defined $this->{'drug_clean_file'} ) ) {
7391
warn 'You must provide a collapsed pairs file or drug pairs file! ', "\n";
7492
die $this->help_text();
@@ -184,8 +202,8 @@ sub process {
184202
$second = $gene2.":".$m2;
185203
push @mutations , $second; #@mus2;
186204
my ( $dist , $pval ) = split ":" , $master{$first}{$second};
187-
$this->AHC( $pval , $this->{'p_value_cutoff'} , \%clusterings , \@mutations );
188-
if ( $pval < $this->{'p_value_cutoff'} ) {
205+
$this->AHC( $pval , $dist , \%clusterings , \@mutations );
206+
if ( $pval < $this->{'p_value_cutoff'} or $dist < $this->{'3d_distance_cutoff'} ) {
189207
$distance_matrix{$first}{$second} = $dist;
190208
$distance_matrix{$second}{$first} = $dist;
191209
}
@@ -274,7 +292,7 @@ sub process {
274292
} #foreach transcript representation of mutations
275293
my @mutations = @gm1;
276294
push @mutations , @gm2;
277-
$this->AHC( $pval , $this->{'p_value_cutoff'} , \%clusterings , \@mutations );
295+
$this->AHC( $pval , $dist , \%clusterings , \@mutations );
278296
} $fh->getlines;
279297
$fh->close();
280298
} #if using collapsed pairs file
@@ -399,7 +417,18 @@ sub process {
399417
}
400418
}
401419
push @outFilename , $this->{'linear_cutoff'};
402-
push @outFilename , $this->{'p_value_cutoff'};
420+
if ( $this->{'3d_distance_cutoff'} != $MAXDISTANCE ) {
421+
if ( $this->{'p_value_cutoff'} != 1 ) {
422+
push @outFilename , $this->{'p_value_cutoff'};
423+
push @outFilename , $this->{'3d_distance_cutoff'};
424+
} else {
425+
push @outFilename , $this->{'3d_distance_cutoff'};
426+
}
427+
} else {
428+
if ( $this->{'p_value_cutoff'} != 1 ) {
429+
push @outFilename , $this->{'p_value_cutoff'};
430+
}
431+
}
403432
push @outFilename , $this->{'max_radius'};
404433
}
405434
push @outFilename , "clusters";
@@ -541,8 +570,8 @@ sub centroid{
541570

542571
## CLUSTERING FUNCTION - AGGLOMERATIVE HIERARCHICAL CLUSTERING (AHC)
543572
sub AHC {
544-
my ( $this, $pval , $pthreshold , $clusterings , $mutations ) = @_;
545-
if ( $pval < $pthreshold ) { #meets desired significance
573+
my ( $this, $pval , $dist , $clusterings , $mutations ) = @_;
574+
if ( $pval < $this->{'p_value_cutoff'} or $dist < $this->{'3d_distance_cutoff'} ) { #meets desired significance
546575
my ( @temp, @found, @combine );
547576
my ( @uniq, $c );
548577
foreach $c ( keys %{$clusterings} ) { #each cluster
@@ -661,6 +690,23 @@ sub getTranscriptInfo {
661690
return ( $reportedTranscript , $altTranscript , $chromosome , $start , $stop );
662691
}
663692

693+
sub checkPair {
694+
my ( $this , $dist , $pval ) = @_;
695+
if ( $this->{'3d_distance_cutoff'} == $MAXDISTANCE ) {
696+
if ( $pval < $this->{'p_value_cutoff'} ) {
697+
return 1;
698+
}
699+
} elsif ( $this->{'p_value_cutoff'} == 1 ) {
700+
if ( $dist < $this->{'3d_distance_cutoff'} ) {
701+
return 1;
702+
}
703+
} else {
704+
if ( $dist < $this->{'3d_distance_cutoff'} and $pval < $this->{'p_value_cutoff'} ) {
705+
return 1;
706+
}
707+
}
708+
return 0;
709+
}
664710

665711

666712
sub help_text{
@@ -678,7 +724,8 @@ Usage: hotspot3d cluster [options]
678724
679725
OPTIONAL
680726
--output-prefix Output prefix, default: 3D_Proximity
681-
--p-value-cutoff P_value cutoff (<), default: 0.05
727+
--p-value-cutoff P_value cutoff (<), default: 0.05 (if 3d-distance-cutoff also not set)
728+
--3d-distance-cutoff 3D distance cutoff (<), default: 100 (if p-value-cutoff also not set)
682729
--linear-cutoff Linear distance cutoff (> peptides), default: 20
683730
--max-radius Maximum cluster radius (max network geodesic from centroid, <= Angstroms), default: 10
684731
--vertex-type Graph vertex type (recurrence, unique, or weight), default: recurrence

lib/TGI/Mutpro/Main/Proximity.pm

+24-6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ use FileHandle;
1818

1919
use TGI::Mutpro::Preprocess::AminoAcid;
2020

21+
my $PVALUEDEFAULT = 0.05;
22+
my $DISTANCEDEFAULT = 10;
23+
my $MAXDISTANCE = 100;
24+
2125
sub new {
2226
my $class = shift;
2327
my $this = {};
@@ -27,9 +31,9 @@ sub new {
2731
$this->{'data_dir'} = undef;
2832
$this->{'drugport_file'} = undef;
2933
$this->{'output_prefix'} = '3D_Proximity';
30-
$this->{'pvalue_cutoff'} = 0.05;
31-
$this->{'3d_distance_cutoff'} = 10;
32-
$this->{'linear_cutoff'} = 20;
34+
$this->{'pvalue_cutoff'} = undef;
35+
$this->{'3d_distance_cutoff'} = undef;
36+
$this->{'linear_cutoff'} = 0;
3337
$this->{'stat'} = undef;
3438
$this->{'acceptable_types'} = undef;
3539
$this->{'amino_acid_header'} = "amino_acid_change";
@@ -59,6 +63,19 @@ sub process {
5963
);
6064
if ( $help ) { print STDERR help_text(); exit 0; }
6165
unless( $options ) { die $this->help_text(); }
66+
if ( not defined $this->{'p_value_cutoff'} ) {
67+
if ( not defined $this->{'3d_distance_cutoff'} ) {
68+
warn "HotSpot3D::Cluster warning: no pair distance limit given, setting to default p-value cutoff = 0.05\n";
69+
$this->{'p_value_cutoff'} = $PVALUEDEFAULT;
70+
$this->{'3d_distance_cutoff'} = $MAXDISTANCE;
71+
} else {
72+
$this->{'p_value_cutoff'} = 1;
73+
}
74+
} else {
75+
if ( not defined $this->{'3d_distance_cutoff'} ) {
76+
$this->{'3d_distance_cutoff'} = $MAXDISTANCE;
77+
}
78+
}
6279
unless( $this->{'data_dir'} ) { warn 'You must provide a output directory ! ', "\n"; die help_text(); }
6380
unless( -d $this->{'data_dir'} ) { warn 'You must provide a valid data directory ! ', "\n"; die help_text(); }
6481
unless( $this->{'maf'} and (-e $this->{'maf'}) ) { warn 'You must provide a MAF format file ! ', "\n"; die $this->help_text(); }
@@ -299,9 +316,10 @@ sub proximitySearching {
299316
$uid2, $chain2, $pdbcor2, $offset2, $residue2, $domain2, $cosmic2,
300317
$proximityinfor ) = @ta;
301318
if ( $drugportref ) {
302-
if ( $AA->filterWater( $residue1 ) and $AA->filterWater( $residue2 ) ) { next; }
319+
unless ( $AA->filterWater( $residue1 ) and $AA->filterWater( $residue2 ) ) { next; }
303320
} else {
304-
if ( $AA->checkAA( $residue1 ) and $AA->checkAA( $residue2 ) ) { next; }
321+
unless ( $AA->filterNonAA( $residue1 ) and $AA->filterNonAA( $residue2 ) ) {
322+
print "bad AA pair: ".$residue1." - ".$residue2."\n"; next; }
305323
}
306324
my $uniprotcor1 = $pdbcor1 + $offset1;
307325
my $uniprotcor2 = $pdbcor2 + $offset2;
@@ -524,7 +542,7 @@ Usage: hotspot3d search [options]
524542
--missense-only missense mutation only, default: no
525543
--p-value-cutoff p_value cutoff(<=), default: 0.05
526544
--3d-distance-cutoff 3D distance cutoff (<=), default: 10
527-
--linear-cutoff Linear distance cutoff (>= peptides), default: 20
545+
--linear-cutoff Linear distance cutoff (>= peptides), default: 0
528546
--transcript-id-header MAF file column header for transcript id's, default: transcript_name
529547
--amino-acid-header MAF file column header for amino acid changes, default: amino_acid_change
530548

lib/TGI/Mutpro/Preprocess/AminoAcid.pm

+1-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ sub minDistance {
155155

156156
sub filterWater {
157157
my ( $this , $residue ) = @_;
158-
if ( $residue eq "HOH" ) {
158+
if ( $residue ne "HOH" ) {
159159
return 1;
160160
}
161161
return 0;

lib/TGI/Mutpro/Preprocess/Calpro.pm

+4-3
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ sub process {
101101
unless(defined $this->{'uniprot_id'}) { warn 'You must provide a Uniprot ID !', "\n"; die $this->help_text(); }
102102
unless( $this->{'output_dir'} and (-e $this->{'output_dir'} ) ) { warn 'You must provide a output directory ! ', "\n"; die $this->help_text(); }
103103
unless( $this->{'pdb_file_dir'} and (-e $this->{'pdb_file_dir'}) ) { warn 'You must provide a PDB file directory ! ', "\n"; die $this->help_text(); }
104-
if ( $this->{'distance_measure'} eq $MINDISTANCE or $this->{'distance_measure'} eq $AVGDISTANCE ) {
104+
if ( $this->{'distance_measure'} ne $MINDISTANCE and $this->{'distance_measure'} ne $AVGDISTANCE ) {
105105
warn "HotSpot3D::Calpro warning: measure not recognized, resetting to default = averageDistance\n";
106106
$this->{'distance_measure'} = $AVGDISTANCE;
107107
}
@@ -323,9 +323,9 @@ sub writeProximityFile {
323323
# is not close to the amino acid at '$residuePosition'
324324
# of peptide chain '$uniprotChain'
325325
$aaObjRef = $$peptideRef{$chain}->getAminoAcidObject($position);
326-
if ( $this->{'distance_measure'} == $MINDISTANCE ) {
326+
if ( $this->{'distance_measure'} eq $MINDISTANCE ) {
327327
$distanceBetweenResidues = $$aaObjRef->minDistance($uniprotAminoAcidRef);
328-
} elsif ( $this->{'distance_measure'} == $AVGDISTANCE ) {
328+
} elsif ( $this->{'distance_measure'} eq $AVGDISTANCE ) {
329329
$distanceBetweenResidues = $$aaObjRef->averageDistance($uniprotAminoAcidRef);
330330
} else {
331331
$distanceBetweenResidues = $$aaObjRef->averageDistance($uniprotAminoAcidRef);
@@ -456,6 +456,7 @@ sub checkOffsets {
456456
$aminoAcidA = TGI::Mutpro::Preprocess::PdbStructure::convertAA( $aminoAcidA );
457457
$aminoAcidB = TGI::Mutpro::Preprocess::PdbStructure::convertAA( $aminoAcidB );
458458
next if ( !defined $aminoAcidA || !defined $aminoAcidB );
459+
#next unless ( TGI::Mutpro::Preprocess::AminoAcid::checkAA( $aminoAcidA ) and TGI::Mutpro::Preprocess::AminoAcid::checkAA( $aminoAcidB )
459460
if ( defined $pdbUniprotPosition{$pdbId}{$uniprotA}{$positionA+$offsetA} && $pdbUniprotPosition{$pdbId}{$uniprotA}{$positionA+$offsetA} ne $aminoAcidA ) {
460461
print $coorfh "Inconsistent amino acids for $uniprotA position $positionA+$offsetA in $pdbId: '$pdbUniprotPosition{$pdbId}{$uniprotA}{$positionA+$offsetA}' and $aminoAcidA \n";
461462
}

lib/TGI/Mutpro/Preprocess/Peptide.pm

+4-2
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,9 @@ sub addAminoAcid {
7777
# ref to AminoAcid object
7878
my $self = shift;
7979
my ($position, $aaRef) = @_;
80-
${$self->{AA}}{$position} = $aaRef;
80+
if ( $aaRef->filterNonAA( $aaRef->name() ) ) {
81+
${$self->{AA}}{$position} = $aaRef;
82+
}
8183
}
8284

8385
sub getAminoAcidObject {
@@ -106,5 +108,5 @@ sub aminoAcidPositionNumbers {
106108
return \@positions;
107109
}
108110

109-
return 1;
111+
1;
110112

0 commit comments

Comments
 (0)