@@ -27,6 +27,9 @@ use Data::Dumper;
27
27
my $WEIGHT = " weight" ;
28
28
my $RECURRENCE = " recurrence" ;
29
29
my $UNIQUE = " unique" ;
30
+ my $PVALUEDEFAULT = 0.05;
31
+ my $DISTANCEDEFAULT = 10;
32
+ my $MAXDISTANCE = 100;
30
33
31
34
sub new {
32
35
my $class = shift ;
@@ -35,7 +38,8 @@ sub new {
35
38
$this -> {' collapsed_file' } = ' 3D_Proximity.pairwise.singleprotein.collapsed' ;
36
39
$this -> {' drug_clean_file' } = undef ;
37
40
$this -> {' output_prefix' } = undef ;
38
- $this -> {' p_value_cutoff' } = 0.05;
41
+ $this -> {' p_value_cutoff' } = undef ;
42
+ $this -> {' 3d_distance_cutoff' } = undef ;
39
43
$this -> {' linear_cutoff' } = 0;
40
44
$this -> {' max_radius' } = 10;
41
45
$this -> {' vertex_type' } = $RECURRENCE ;
@@ -58,6 +62,7 @@ sub process {
58
62
' collapsed-file=s' => \$this -> {' collapsed_file' },
59
63
' drug-clean-file=s' => \$this -> {' drug_clean_file' },
60
64
' p-value-cutoff=f' => \$this -> {' p_value_cutoff' },
65
+ ' 3d-distance-cutoff=f' => \$this -> {' 3d_distance_cutoff' },
61
66
' linear-cutoff=f' => \$this -> {' linear_cutoff' },
62
67
' max-radius=f' => \$this -> {' max_radius' },
63
68
' vertex-type=s' => \$this -> {' vertex_type' },
@@ -69,6 +74,19 @@ sub process {
69
74
);
70
75
if ( $help ) { print STDERR help_text(); exit 0; }
71
76
unless ( $options ) { die $this -> help_text(); }
77
+ if ( not defined $this -> {' p_value_cutoff' } ) {
78
+ if ( not defined $this -> {' 3d_distance_cutoff' } ) {
79
+ warn " HotSpot3D::Cluster warning: no pair distance limit given, setting to default p-value cutoff = 0.05\n " ;
80
+ $this -> {' p_value_cutoff' } = $PVALUEDEFAULT ;
81
+ $this -> {' 3d_distance_cutoff' } = $MAXDISTANCE ;
82
+ } else {
83
+ $this -> {' p_value_cutoff' } = 1;
84
+ }
85
+ } else {
86
+ if ( not defined $this -> {' 3d_distance_cutoff' } ) {
87
+ $this -> {' 3d_distance_cutoff' } = $MAXDISTANCE ;
88
+ }
89
+ }
72
90
if ( ( not defined $this -> {' collapsed_file' } ) and ( not defined $this -> {' drug_clean_file' } ) ) {
73
91
warn ' You must provide a collapsed pairs file or drug pairs file! ' , " \n " ;
74
92
die $this -> help_text();
@@ -184,8 +202,8 @@ sub process {
184
202
$second = $gene2 ." :" .$m2 ;
185
203
push @mutations , $second ; # @mus2;
186
204
my ( $dist , $pval ) = split " :" , $master {$first }{$second };
187
- $this -> AHC( $pval , $this -> { ' p_value_cutoff ' } , \%clusterings , \@mutations );
188
- if ( $pval < $this -> {' p_value_cutoff' } ) {
205
+ $this -> AHC( $pval , $dist , \%clusterings , \@mutations );
206
+ if ( $pval < $this -> {' p_value_cutoff' } or $dist < $this -> { ' 3d_distance_cutoff ' } ) {
189
207
$distance_matrix {$first }{$second } = $dist ;
190
208
$distance_matrix {$second }{$first } = $dist ;
191
209
}
@@ -274,7 +292,7 @@ sub process {
274
292
} # foreach transcript representation of mutations
275
293
my @mutations = @gm1 ;
276
294
push @mutations , @gm2 ;
277
- $this -> AHC( $pval , $this -> { ' p_value_cutoff ' } , \%clusterings , \@mutations );
295
+ $this -> AHC( $pval , $dist , \%clusterings , \@mutations );
278
296
} $fh -> getlines;
279
297
$fh -> close ();
280
298
} # if using collapsed pairs file
@@ -399,7 +417,18 @@ sub process {
399
417
}
400
418
}
401
419
push @outFilename , $this -> {' linear_cutoff' };
402
- push @outFilename , $this -> {' p_value_cutoff' };
420
+ if ( $this -> {' 3d_distance_cutoff' } != $MAXDISTANCE ) {
421
+ if ( $this -> {' p_value_cutoff' } != 1 ) {
422
+ push @outFilename , $this -> {' p_value_cutoff' };
423
+ push @outFilename , $this -> {' 3d_distance_cutoff' };
424
+ } else {
425
+ push @outFilename , $this -> {' 3d_distance_cutoff' };
426
+ }
427
+ } else {
428
+ if ( $this -> {' p_value_cutoff' } != 1 ) {
429
+ push @outFilename , $this -> {' p_value_cutoff' };
430
+ }
431
+ }
403
432
push @outFilename , $this -> {' max_radius' };
404
433
}
405
434
push @outFilename , " clusters" ;
@@ -541,8 +570,8 @@ sub centroid{
541
570
542
571
# # CLUSTERING FUNCTION - AGGLOMERATIVE HIERARCHICAL CLUSTERING (AHC)
543
572
sub AHC {
544
- my ( $this , $pval , $pthreshold , $clusterings , $mutations ) = @_ ;
545
- if ( $pval < $pthreshold ) { # meets desired significance
573
+ my ( $this , $pval , $dist , $clusterings , $mutations ) = @_ ;
574
+ if ( $pval < $this -> { ' p_value_cutoff ' } or $dist < $this -> { ' 3d_distance_cutoff ' } ) { # meets desired significance
546
575
my ( @temp , @found , @combine );
547
576
my ( @uniq , $c );
548
577
foreach $c ( keys %{$clusterings } ) { # each cluster
@@ -661,6 +690,23 @@ sub getTranscriptInfo {
661
690
return ( $reportedTranscript , $altTranscript , $chromosome , $start , $stop );
662
691
}
663
692
693
+ sub checkPair {
694
+ my ( $this , $dist , $pval ) = @_ ;
695
+ if ( $this -> {' 3d_distance_cutoff' } == $MAXDISTANCE ) {
696
+ if ( $pval < $this -> {' p_value_cutoff' } ) {
697
+ return 1;
698
+ }
699
+ } elsif ( $this -> {' p_value_cutoff' } == 1 ) {
700
+ if ( $dist < $this -> {' 3d_distance_cutoff' } ) {
701
+ return 1;
702
+ }
703
+ } else {
704
+ if ( $dist < $this -> {' 3d_distance_cutoff' } and $pval < $this -> {' p_value_cutoff' } ) {
705
+ return 1;
706
+ }
707
+ }
708
+ return 0;
709
+ }
664
710
665
711
666
712
sub help_text{
@@ -678,7 +724,8 @@ Usage: hotspot3d cluster [options]
678
724
679
725
OPTIONAL
680
726
--output-prefix Output prefix, default: 3D_Proximity
681
- --p-value-cutoff P_value cutoff (<), default: 0.05
727
+ --p-value-cutoff P_value cutoff (<), default: 0.05 (if 3d-distance-cutoff also not set)
728
+ --3d-distance-cutoff 3D distance cutoff (<), default: 100 (if p-value-cutoff also not set)
682
729
--linear-cutoff Linear distance cutoff (> peptides), default: 20
683
730
--max-radius Maximum cluster radius (max network geodesic from centroid, <= Angstroms), default: 10
684
731
--vertex-type Graph vertex type (recurrence, unique, or weight), default: recurrence
0 commit comments