@@ -110,6 +110,12 @@ sub new {
110
110
$this -> {' MinPts' } = undef ;
111
111
$this -> {' number_of_runs' } = undef ;
112
112
$this -> {' probability_cut_off' } = undef ;
113
+
114
+ $this -> {' JSON_status' } = undef ;
115
+ $this -> {' mutations_json_hash' } = undef ;
116
+ $this -> {' distance_matrix_json_hash' } = undef ;
117
+ $this -> {' siteVertexMap_json_hash' } = undef ;
118
+
113
119
bless $this , $class ;
114
120
$this -> process();
115
121
return $this ;
@@ -252,6 +258,10 @@ sub setOptions {
252
258
' minPts=f' => \$this -> {' MinPts' },
253
259
' number-of-runs=f' => \$this -> {' number_of_runs' },
254
260
' probability-cut-off=f' => \$this -> {' probability_cut_off' },
261
+ ' use-JSON' => \$this -> {' JSON_status' },
262
+ ' mutations-hash-json-file=s' => \$this -> {' mutations_json_hash' },
263
+ ' distance-matrix-json-file=s' => \$this -> {' distance_matrix_json_hash' },
264
+ ' siteVertexMap-json-file=s' => \$this -> {' siteVertexMap_json_hash' },
255
265
256
266
' help' => \$help ,
257
267
);
@@ -348,10 +358,22 @@ sub setOptions {
348
358
} else {
349
359
warn " HotSpot3D::Cluster::setOptions warning: no pairwise-file included (cannot produce mutation-mutation clusters)!\n " ;
350
360
}
361
+ if ( defined $this -> {' JSON_status' } ) {
362
+ $this -> {' JSON_status' } = 1;
363
+ warn " HotSpot3D::Cluster::setOptions warning: use-JSON flag used (will not look for pairwise data or maf file)\n " ;
364
+ if ( not defined $this -> {' mutations_json_hash' } or not defined $this -> {' distance_matrix_json_hash' } or not defined $this -> {' siteVertexMap_json_hash' } ) {
365
+ die " HotSpot3D::Cluster::setOptions Error: use-JSON flag is used, but json file locations are not provided!\n " ;
366
+ }
367
+ elsif ( not -e $this -> {' mutations_json_hash' } or not -e $this -> {' mutations_json_hash' } or not -e $this -> {' siteVertexMap_json_hash' } ) {
368
+ die " HotSpot3D::Cluster::setOptions Error: use-JSON flag is used, but the provided JSON files do not exist!\n " ;
369
+ }
370
+ }
371
+ else { $this -> {' JSON_status' } = 0; }
372
+
351
373
if ( not defined $this -> pairwiseFile() and
352
374
not defined $this -> sitePairsFile() and
353
375
not defined $this -> musitePairsFile() and
354
- not defined $this -> drugsCleanFile() ) {
376
+ not defined $this -> drugsCleanFile() and not $this -> { ' JSON_status ' } ) {
355
377
warn " HotSpot3D::Cluster::setOptions error: no pair file provided. Need at least one of *.pairwise, *.clean, *.sites, *.musites.\n " ;
356
378
die $this -> help_text();
357
379
}
@@ -527,8 +549,8 @@ sub launchClustering {
527
549
528
550
sub vertexFilter {
529
551
# $this->vertexFilter( $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix );
530
- my ( $this , $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix ) = @_ ;
531
- if ( $this -> {' vertex_type' } eq $SITE ) {
552
+ my ( $this , $temp_mutations , $temp_distance_matrix , $mutations , $distance_matrix , $siteVertexMap ) = @_ ;
553
+ if ( $this -> {' vertex_type' } eq $SITE or $this -> { ' clustering ' } eq $DENSITY ) {
532
554
print STDOUT " Filtering vertices\n " ;
533
555
# TODO if using a different .maf from search step, then some mutations can be missed
534
556
my $vertexMap = {}; # a hash to map isSameProteinPosition vertices (and others to their selves)-- map=f()
@@ -539,13 +561,15 @@ sub vertexFilter {
539
561
next if not exists $temp_mutations -> {$mutationKey1 };
540
562
foreach my $mutationKey2 ( @mKeys ) {
541
563
next if not exists $temp_mutations -> {$mutationKey2 };
542
- if ( $mutationKey1 eq $mutationKey2 ) {
564
+ if ( $mutationKey1 eq $mutationKey2 ) { # this if condition is important to capture mk2=mk1 cases to $siteVertexMap hash
543
565
$vertexMap -> {$mutationKey2 } = $mutationKey1 ;
566
+ $siteVertexMap -> {$mutationKey1 }-> {$mutationKey2 } = $temp_mutations -> {$mutationKey2 };
544
567
# print "ACSW::VertexFilter::Equal $mutationKey2 \=\=\> $mutationKey1\n";
545
568
next ;
546
569
}
547
570
elsif ( $this -> isSameProteinPosition( $temp_mutations , $mutationKey1 , $mutationKey2 ) ) { # if same site
548
571
$vertexMap -> {$mutationKey2 } = $mutationKey1 ;
572
+ $siteVertexMap -> {$mutationKey1 }-> {$mutationKey2 } = $temp_mutations -> {$mutationKey2 };
549
573
print " ACSW::VertexFilter::SameSite $mutationKey2 \=\=\> $mutationKey1 \n " ;
550
574
delete $temp_mutations -> {$mutationKey2 };
551
575
}
@@ -555,8 +579,14 @@ sub vertexFilter {
555
579
556
580
# generate representative annotations
557
581
foreach my $mutationKey ( keys %{$temp_mutations } ) {
558
- my ( $ra , $pk ) = $this -> getARepresentativeAnnotation( $temp_mutations , $mutationKey );
559
- $mutations -> {$mutationKey }-> {$ra }-> {$pk } = 1;
582
+ my ( $ra , $pk , $highestRecurrence , $totalRecurrence ) = $this -> getARepresentativeAnnotation( $temp_mutations , $mutationKey , $siteVertexMap );
583
+ if ( $this -> {' vertex_type' } eq $SITE ) {
584
+ $mutations -> {$mutationKey }-> {$ra }-> {$pk } = 1; # weight = 1 for SITE
585
+ }
586
+ else {
587
+ $mutations -> {$mutationKey }-> {$ra }-> {$pk } = $totalRecurrence ; # weight = total recurrence/weight for RECURRENCE/WEIGHT
588
+ }
589
+
560
590
}
561
591
print " ACSW::VertexFilter::mutations representative annotation done\n " ;
562
592
@@ -580,30 +610,56 @@ sub vertexFilter {
580
610
}
581
611
}
582
612
}
613
+ # print "vertex_map\n";
614
+ # print Dumper $vertexMap;
583
615
} else {
584
616
%{$mutations } = %{$temp_mutations };
585
617
%{$distance_matrix } = %{$temp_distance_matrix };
586
618
}
587
619
$temp_mutations = undef ;
588
620
$temp_distance_matrix = undef ;
589
- # print "distance_matrix\n";
590
- # print Dumper $distance_matrix;
621
+
591
622
return ;
592
623
}
593
624
594
- sub getARepresentativeAnnotation {
595
- my ( $this , $mutations , $mutationKey ) = @_ ;
596
- my $ra = " .:." ;
597
- my $pk = " .:p." ;
598
- foreach my $refAlt ( keys %{$mutations -> {$mutationKey }} ) {
599
- $ra = $refAlt ;
600
- foreach my $proteinKey ( keys %{$mutations -> {$mutationKey }-> {$refAlt }} ) {
601
- $pk = $proteinKey ;
602
- last ;
625
+ sub getARepresentativeAnnotation { # choose a representative out of all the mutations detected as same protein position
626
+ my ( $this , $mutations , $mutationKey , $siteVertexMap ) = @_ ;
627
+ # my $ra = ".:.";
628
+ # my $pk = ".:p.";
629
+ my ($ra , $pk , $highestRecurrence ) = undef ;
630
+ my $totalRecurrence = 0;
631
+
632
+ foreach my $mk ( keys %{$siteVertexMap -> {$mutationKey }} ) {
633
+ foreach my $refAlt ( keys %{$siteVertexMap -> {$mutationKey }-> {$mk }} ) {
634
+ my $lastPK ; # to store one proteinKey from one ref:alt
635
+ foreach my $proteinKey ( keys %{$siteVertexMap -> {$mutationKey }-> {$mk }-> {$refAlt }} ) {
636
+ $lastPK = $proteinKey ;
637
+ if ( $mk eq $mutationKey ) { # to make sure our selected refAlt and pKey are members of the mutationKey used in downstream
638
+ if ( not defined $highestRecurrence ) { # assigning to the first entry
639
+ $ra = $refAlt ;
640
+ $pk = $proteinKey ;
641
+ $highestRecurrence = $siteVertexMap -> {$mutationKey }-> {$mk }-> {$refAlt }-> {$proteinKey };
642
+ }
643
+ elsif ( $highestRecurrence < $siteVertexMap -> {$mutationKey }-> {$mk }-> {$refAlt }-> {$proteinKey } ) { # this recurrence is larger than everything seen so far
644
+ $ra = $refAlt ;
645
+ $pk = $proteinKey ;
646
+ $highestRecurrence = $siteVertexMap -> {$mutationKey }-> {$mk }-> {$refAlt }-> {$proteinKey };
647
+ }
648
+ }
649
+ }
650
+ $totalRecurrence += $siteVertexMap -> {$mutationKey }-> {$mk }-> {$refAlt }-> {$lastPK }; # add the recurrence of one proteinKey per ref:alt
603
651
}
604
- last ;
605
652
}
606
- return ( $ra , $pk );
653
+
654
+ # foreach my $refAlt ( keys %{$mutations->{$mutationKey}} ) {
655
+ # $ra = $refAlt;
656
+ # foreach my $proteinKey ( keys %{$mutations->{$mutationKey}->{$refAlt}} ) {
657
+ # $pk = $proteinKey;
658
+ # last;
659
+ # }
660
+ # last;
661
+ # }
662
+ return ( $ra , $pk , $highestRecurrence , $totalRecurrence );
607
663
}
608
664
609
665
sub checkPartners {
@@ -1921,6 +1977,11 @@ Usage: hotspot3d cluster [options]
1921
1977
--max-processes Set if using parallel type local (CAUTION: make sure you know your max CPU processes)
1922
1978
--gene-list-file Choose mutations from the genes given in this list
1923
1979
--structure-list-file Choose mutations from the structures given in this list
1980
+ --use-JSON Use pre-encoded mutations and distance-matrix hashes in json format, default (no flag): do not use json
1981
+ --mutations-hash-json-file JSON encoded mutations hash file produced by a previous cluster run
1982
+ --distance-matrix-json-file JSON encoded distance-matrix hash file produced by a previous cluster run
1983
+
1984
+
1924
1985
1925
1986
--help this message
1926
1987
0 commit comments