Skip to content

Commit d23084d

Browse files
committed
Merge branches 'rescore' and 'densityRec' of https://github.com/ding-lab/hotspot3d
2 parents a65f29a + 6bf3a23 commit d23084d

File tree

7 files changed

+335
-86
lines changed

7 files changed

+335
-86
lines changed
Binary file not shown.

bin/hotspot3d

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
#----------------------------------
33
# $Authors: Beifang Niu & Adam D Scott
44
# $Date: 2013-08-08 13:22:08 -0500 (Thu Aug 8 13:22:08 CDT 2013) $
5-
# $Revision: 1.8.0.2 $
5+
# $Revision: 1.8.0 $
66
# $URL: $
77
#----------------------------------
88
use strict;
99
use warnings;
1010

11-
our $VERSION = 'V1.8.0.2';
11+
our $VERSION = 'V1.8.0';
1212

1313
use Carp;
1414
use FileHandle;

dist.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = HotSpot3D
22
author = Beifang Niu, John Wallis, Adam D Scott, Sohini Sengupta, Amila Weerasinghe, & Matthew H Bailey from McDonnell Genome Institute of Washington University at St. Louis
3-
version = 1.8.0.2
3+
version = 1.8.0
44
license = Perl_5
55
copyright_holder = McDonnell Genome Institute at Washington University
66
copyright_year = 2017

lib/TGI/Mutpro/Main/Cluster.pm

+80-19
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ sub new {
110110
$this->{'MinPts'} = undef;
111111
$this->{'number_of_runs'} = undef;
112112
$this->{'probability_cut_off'} = undef;
113+
114+
$this->{'JSON_status'} = undef;
115+
$this->{'mutations_json_hash'} = undef;
116+
$this->{'distance_matrix_json_hash'} = undef;
117+
$this->{'siteVertexMap_json_hash'} = undef;
118+
113119
bless $this, $class;
114120
$this->process();
115121
return $this;
@@ -252,6 +258,10 @@ sub setOptions {
252258
'minPts=f' => \$this->{'MinPts'},
253259
'number-of-runs=f' => \$this->{'number_of_runs'},
254260
'probability-cut-off=f' => \$this->{'probability_cut_off'},
261+
'use-JSON' => \$this->{'JSON_status'},
262+
'mutations-hash-json-file=s' => \$this->{'mutations_json_hash'},
263+
'distance-matrix-json-file=s' => \$this->{'distance_matrix_json_hash'},
264+
'siteVertexMap-json-file=s' => \$this->{'siteVertexMap_json_hash'},
255265

256266
'help' => \$help,
257267
);
@@ -348,10 +358,22 @@ sub setOptions {
348358
} else {
349359
warn "HotSpot3D::Cluster::setOptions warning: no pairwise-file included (cannot produce mutation-mutation clusters)!\n";
350360
}
361+
if ( defined $this->{'JSON_status'} ) {
362+
$this->{'JSON_status'} = 1;
363+
warn "HotSpot3D::Cluster::setOptions warning: use-JSON flag used (will not look for pairwise data or maf file)\n";
364+
if ( not defined $this->{'mutations_json_hash'} or not defined $this->{'distance_matrix_json_hash'} or not defined $this->{'siteVertexMap_json_hash'} ) {
365+
die "HotSpot3D::Cluster::setOptions Error: use-JSON flag is used, but json file locations are not provided!\n";
366+
}
367+
elsif ( not -e $this->{'mutations_json_hash'} or not -e $this->{'mutations_json_hash'} or not -e $this->{'siteVertexMap_json_hash'} ) {
368+
die "HotSpot3D::Cluster::setOptions Error: use-JSON flag is used, but the provided JSON files do not exist!\n";
369+
}
370+
}
371+
else { $this->{'JSON_status'} = 0; }
372+
351373
if ( not defined $this->pairwiseFile() and
352374
not defined $this->sitePairsFile() and
353375
not defined $this->musitePairsFile() and
354-
not defined $this->drugsCleanFile() ) {
376+
not defined $this->drugsCleanFile() and not $this->{'JSON_status'} ) {
355377
warn "HotSpot3D::Cluster::setOptions error: no pair file provided. Need at least one of *.pairwise, *.clean, *.sites, *.musites.\n";
356378
die $this->help_text();
357379
}
@@ -527,8 +549,8 @@ sub launchClustering {
527549

528550
sub vertexFilter {
529551
#$this->vertexFilter( $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix );
530-
my ( $this , $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix ) = @_;
531-
if ( $this->{'vertex_type'} eq $SITE ) {
552+
my ( $this , $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix, $siteVertexMap ) = @_;
553+
if ( $this->{'vertex_type'} eq $SITE or $this->{'clustering'} eq $DENSITY ) {
532554
print STDOUT "Filtering vertices\n";
533555
#TODO if using a different .maf from search step, then some mutations can be missed
534556
my $vertexMap = {}; #a hash to map isSameProteinPosition vertices (and others to their selves)-- map=f()
@@ -539,13 +561,15 @@ sub vertexFilter {
539561
next if not exists $temp_mutations->{$mutationKey1};
540562
foreach my $mutationKey2 ( @mKeys ) {
541563
next if not exists $temp_mutations->{$mutationKey2};
542-
if ( $mutationKey1 eq $mutationKey2 ) {
564+
if ( $mutationKey1 eq $mutationKey2 ) { # this if condition is important to capture mk2=mk1 cases to $siteVertexMap hash
543565
$vertexMap->{$mutationKey2} = $mutationKey1;
566+
$siteVertexMap->{$mutationKey1}->{$mutationKey2} = $temp_mutations->{$mutationKey2};
544567
# print "ACSW::VertexFilter::Equal $mutationKey2 \=\=\> $mutationKey1\n";
545568
next;
546569
}
547570
elsif ( $this->isSameProteinPosition( $temp_mutations , $mutationKey1 , $mutationKey2 ) ) { #if same site
548571
$vertexMap->{$mutationKey2} = $mutationKey1;
572+
$siteVertexMap->{$mutationKey1}->{$mutationKey2} = $temp_mutations->{$mutationKey2};
549573
print "ACSW::VertexFilter::SameSite $mutationKey2 \=\=\> $mutationKey1\n";
550574
delete $temp_mutations->{$mutationKey2};
551575
}
@@ -555,8 +579,14 @@ sub vertexFilter {
555579

556580
#generate representative annotations
557581
foreach my $mutationKey ( keys %{$temp_mutations} ) {
558-
my ( $ra , $pk ) = $this->getARepresentativeAnnotation( $temp_mutations , $mutationKey );
559-
$mutations->{$mutationKey}->{$ra}->{$pk} = 1;
582+
my ( $ra , $pk, $highestRecurrence, $totalRecurrence ) = $this->getARepresentativeAnnotation( $temp_mutations , $mutationKey, $siteVertexMap );
583+
if ( $this->{'vertex_type'} eq $SITE ) {
584+
$mutations->{$mutationKey}->{$ra}->{$pk} = 1; # weight = 1 for SITE
585+
}
586+
else {
587+
$mutations->{$mutationKey}->{$ra}->{$pk} = $totalRecurrence; # weight = total recurrence/weight for RECURRENCE/WEIGHT
588+
}
589+
560590
}
561591
print "ACSW::VertexFilter::mutations representative annotation done\n";
562592

@@ -580,30 +610,56 @@ sub vertexFilter {
580610
}
581611
}
582612
}
613+
# print "vertex_map\n";
614+
# print Dumper $vertexMap;
583615
} else {
584616
%{$mutations} = %{$temp_mutations};
585617
%{$distance_matrix} = %{$temp_distance_matrix};
586618
}
587619
$temp_mutations = undef;
588620
$temp_distance_matrix = undef;
589-
# print "distance_matrix\n";
590-
# print Dumper $distance_matrix;
621+
591622
return;
592623
}
593624

594-
sub getARepresentativeAnnotation {
595-
my ( $this , $mutations , $mutationKey ) = @_;
596-
my $ra = ".:.";
597-
my $pk = ".:p.";
598-
foreach my $refAlt ( keys %{$mutations->{$mutationKey}} ) {
599-
$ra = $refAlt;
600-
foreach my $proteinKey ( keys %{$mutations->{$mutationKey}->{$refAlt}} ) {
601-
$pk = $proteinKey;
602-
last;
625+
sub getARepresentativeAnnotation { # choose a representative out of all the mutations detected as same protein position
626+
my ( $this , $mutations , $mutationKey, $siteVertexMap ) = @_;
627+
# my $ra = ".:.";
628+
# my $pk = ".:p.";
629+
my ($ra, $pk, $highestRecurrence ) = undef;
630+
my $totalRecurrence = 0;
631+
632+
foreach my $mk ( keys %{$siteVertexMap->{$mutationKey}} ) {
633+
foreach my $refAlt ( keys %{$siteVertexMap->{$mutationKey}->{$mk}} ) {
634+
my $lastPK; # to store one proteinKey from one ref:alt
635+
foreach my $proteinKey ( keys %{$siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}} ) {
636+
$lastPK = $proteinKey;
637+
if ( $mk eq $mutationKey ) { # to make sure our selected refAlt and pKey are members of the mutationKey used in downstream
638+
if ( not defined $highestRecurrence ) { # assigning to the first entry
639+
$ra = $refAlt;
640+
$pk = $proteinKey;
641+
$highestRecurrence = $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$proteinKey};
642+
}
643+
elsif ( $highestRecurrence < $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$proteinKey} ) { # this recurrence is larger than everything seen so far
644+
$ra = $refAlt;
645+
$pk = $proteinKey;
646+
$highestRecurrence = $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$proteinKey};
647+
}
648+
}
649+
}
650+
$totalRecurrence += $siteVertexMap->{$mutationKey}->{$mk}->{$refAlt}->{$lastPK}; # add the recurrence of one proteinKey per ref:alt
603651
}
604-
last;
605652
}
606-
return ( $ra , $pk );
653+
654+
# foreach my $refAlt ( keys %{$mutations->{$mutationKey}} ) {
655+
# $ra = $refAlt;
656+
# foreach my $proteinKey ( keys %{$mutations->{$mutationKey}->{$refAlt}} ) {
657+
# $pk = $proteinKey;
658+
# last;
659+
# }
660+
# last;
661+
# }
662+
return ( $ra , $pk, $highestRecurrence, $totalRecurrence );
607663
}
608664

609665
sub checkPartners {
@@ -1921,6 +1977,11 @@ Usage: hotspot3d cluster [options]
19211977
--max-processes Set if using parallel type local (CAUTION: make sure you know your max CPU processes)
19221978
--gene-list-file Choose mutations from the genes given in this list
19231979
--structure-list-file Choose mutations from the structures given in this list
1980+
--use-JSON Use pre-encoded mutations and distance-matrix hashes in json format, default (no flag): do not use json
1981+
--mutations-hash-json-file JSON encoded mutations hash file produced by a previous cluster run
1982+
--distance-matrix-json-file JSON encoded distance-matrix hash file produced by a previous cluster run
1983+
1984+
19241985
19251986
--help this message
19261987

lib/TGI/Mutpro/Main/ColorScore.R

+36-1
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,49 @@
11
#!/usr/bin/env Rscript
2+
3+
# Authors : Amila Weerasinghe
4+
# Generates a reachability plot colored by weight (*.Horiz.weights.pdf)
5+
26
args = commandArgs(trailingOnly=TRUE)
37
d <- read.table(args[1], header=FALSE, sep = "\t") # data table with variant,RD,genomic_annotations,weight
48

59
segData = read.table(args[2], sep = "\t") # data table with start,stop,epsilon',clusterID
610

11+
#################################################################
12+
### process the segData so that cluster labels do not overlap ###
13+
#################################################################
14+
15+
# make a column to show process status
16+
segData$Processed = 0
17+
18+
# display singleton clusters at RD=0.1
19+
segData[segData$V1==segData$V2,"V3"] = 0.1
20+
segData[segData$V1==segData$V2,"Processed"] = 1
21+
22+
segData$textOffset = 0
23+
24+
# now start from the first un-processed row and see if there are nearby levels
25+
unprocessed = segData[segData$Processed==0,]
26+
27+
while ( length(unprocessed$V1) != 0 ) {
28+
# take the first un-processed one and the nearby stuff
29+
tab = segData[segData$V3<unprocessed$V3[1]+0.5 & segData$V3>=unprocessed$V3[1] & segData$V2==unprocessed$V2[1],]
30+
offset = 0.1
31+
# go through each row in tab and add offset
32+
for (i in c(1:length(tab$V1))) {
33+
segData[segData$V3<unprocessed$V3[1]+0.5 & segData$V3>=unprocessed$V3[1] & segData$V2==unprocessed$V2[1],][i,"textOffset"] = offset
34+
offset = offset + 0.1
35+
}
36+
segData[segData$V3<unprocessed$V3[1]+0.5 & segData$V3>=unprocessed$V3[1] & segData$V2==unprocessed$V2[1],]$Processed = 1
37+
unprocessed = segData[segData$Processed==0,] # update the unprocessed table
38+
}
39+
#################################################################
40+
741
y0<-segData[[3]]
842
x0<-segData[[1]]+1
943
y1<-segData[[3]]
1044
x1<-segData[[2]]+1
1145
Cluster<-segData[[5]]
46+
labelOffset <- segData$textOffset
1247

1348
names(d)[1] <- "variant"
1449
names(d)[2] <- "RD"
@@ -37,7 +72,7 @@ for (i in values) {
3772
p <- p + geom_segment(x=x0[i], y=y0[i], xend=x1[i], yend=y1[i])
3873
}
3974

40-
p <- p + annotate("text", x = x1+1, y = y0, label = Cluster, size = 2)
75+
p <- p + annotate("text", x = x1+labelOffset, y = y0, label = Cluster, size = 2)
4176

4277
p <- p + ggtitle(paste("Reachability Plot with weights: Epsilon=",args[4],"MinPts=",args[5]))
4378

0 commit comments

Comments
 (0)