Skip to content

Commit 9515732

Browse files
authored
Merge pull request #14 from ikmb/devel
Various code cleanups
2 parents 3ddbbf3 + e0745ae commit 9515732

38 files changed

+914
-914
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ ENV PYTHONPATH /opt/hisat-genotype/hisatgenotype_modules:$PYTHONPATH
1010
RUN apt-get -y update && apt-get -y install make wget git g++ ruby-full ruby-dev
1111

1212
RUN cd /opt && git clone --recurse-submodules https://github.com/DaehwanKimLab/hisat-genotype \
13-
&& cd hisat-genotype/hisat2 && make -j2
13+
&& cd hisat-genotype/hisat2 && make -j2
1414

1515
RUN cd /opt && mkdir hlascan && cd hlascan && wget https://github.com/SyntekabioTools/HLAscan/releases/download/v2.1.4/hla_scan_r_v2.1.4 && mv hla_scan_r_v2.1.4 hla_scan && chmod +x hla_scan
1616

assets/chr6/hg38.chr6.fna.fai

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
chr6 170805979 78 80 81
1+
chr6 170805979 78 80 81

assets/targets/genes.bed

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,47 @@
1-
chr6 29942554 29942626 HLA-A.ENST00000376809.1 100 +
2-
chr6 29942757 29943026 HLA-A.ENST00000376809.2 100 +
3-
chr6 29943268 29943543 HLA-A.ENST00000376809.3 100 +
4-
chr6 29944122 29944397 HLA-A.ENST00000376809.4 100 +
5-
chr6 29944500 29944616 HLA-A.ENST00000376809.5 100 +
6-
chr6 29945059 29945091 HLA-A.ENST00000376809.6 100 +
7-
chr6 29945234 29945281 HLA-A.ENST00000376809.7 100 +
8-
chr6 29945451 29945455 HLA-A.ENST00000376809.8 100 +
9-
chr6 31357086 31357158 HLA-B.ENST00000412585.1 100 -
10-
chr6 31356688 31356957 HLA-B.ENST00000412585.2 100 -
11-
chr6 31356167 31356442 HLA-B.ENST00000412585.3 100 -
12-
chr6 31355317 31355592 HLA-B.ENST00000412585.4 100 -
13-
chr6 31355107 31355223 HLA-B.ENST00000412585.5 100 -
14-
chr6 31354633 31354665 HLA-B.ENST00000412585.6 100 -
15-
chr6 31354483 31354526 HLA-B.ENST00000412585.7 100 -
16-
chr6 31271999 31272071 HLA-C.ENST00000376228.1 100 -
17-
chr6 31271599 31271868 HLA-C.ENST00000376228.2 100 -
18-
chr6 31271073 31271348 HLA-C.ENST00000376228.3 100 -
19-
chr6 31270210 31270485 HLA-C.ENST00000376228.4 100 -
20-
chr6 31269966 31270085 HLA-C.ENST00000376228.5 100 -
21-
chr6 31269493 31269525 HLA-C.ENST00000376228.6 100 -
22-
chr6 31269338 31269385 HLA-C.ENST00000376228.7 100 -
23-
chr6 31269169 31269173 HLA-C.ENST00000376228.8 100 -
24-
chr6 32589643 32589742 HLA-DRB1.ENST00000360004.1 100 -
25-
chr6 32584109 32584378 HLA-DRB1.ENST00000360004.2 100 -
26-
chr6 32581557 32581838 HLA-DRB1.ENST00000360004.3 100 -
27-
chr6 32580746 32580856 HLA-DRB1.ENST00000360004.4 100 -
28-
chr6 32580247 32580270 HLA-DRB1.ENST00000360004.5 100 -
29-
chr6 32579091 32579104 HLA-DRB1.ENST00000360004.6 100 -
30-
chr6 32637459 32637540 HLA-DQA1.ENST00000343139.1 100 +
31-
chr6 32641310 32641558 HLA-DQA1.ENST00000343139.2 100 +
32-
chr6 32641972 32642253 HLA-DQA1.ENST00000343139.3 100 +
33-
chr6 32642610 32642764 HLA-DQA1.ENST00000343139.4 100 +
34-
chr6 32666499 32666607 HLA-DQB1.ENST00000434651.1 100 -
35-
chr6 32664798 32665067 HLA-DQB1.ENST00000434651.2 100 -
36-
chr6 32661967 32662248 HLA-DQB1.ENST00000434651.3 100 -
37-
chr6 32661347 32661457 HLA-DQB1.ENST00000434651.4 100 -
38-
chr6 32660236 32660249 HLA-DQB1.ENST00000434651.5 100 -
39-
chr6 33073471 33073570 HLA-DPA1.ENST00000419277.2 100 -
40-
chr6 33069641 33069886 HLA-DPA1.ENST00000419277.3 100 -
41-
chr6 33069019 33069300 HLA-DPA1.ENST00000419277.4 100 -
42-
chr6 33068650 33068804 HLA-DPA1.ENST00000419277.5 100 -
43-
chr6 33076042 33076141 HLA-DPB1.ENST00000418931.1 100 +
44-
chr6 33080672 33080935 HLA-DPB1.ENST00000418931.2 100 +
45-
chr6 33084950 33085231 HLA-DPB1.ENST00000418931.3 100 +
46-
chr6 33085779 33085889 HLA-DPB1.ENST00000418931.4 100 +
47-
chr6 33086219 33086238 HLA-DPB1.ENST00000418931.5 100 +
1+
chr6 29942554 29942626 HLA-A.ENST00000376809.1 100 +
2+
chr6 29942757 29943026 HLA-A.ENST00000376809.2 100 +
3+
chr6 29943268 29943543 HLA-A.ENST00000376809.3 100 +
4+
chr6 29944122 29944397 HLA-A.ENST00000376809.4 100 +
5+
chr6 29944500 29944616 HLA-A.ENST00000376809.5 100 +
6+
chr6 29945059 29945091 HLA-A.ENST00000376809.6 100 +
7+
chr6 29945234 29945281 HLA-A.ENST00000376809.7 100 +
8+
chr6 29945451 29945455 HLA-A.ENST00000376809.8 100 +
9+
chr6 31357086 31357158 HLA-B.ENST00000412585.1 100 -
10+
chr6 31356688 31356957 HLA-B.ENST00000412585.2 100 -
11+
chr6 31356167 31356442 HLA-B.ENST00000412585.3 100 -
12+
chr6 31355317 31355592 HLA-B.ENST00000412585.4 100 -
13+
chr6 31355107 31355223 HLA-B.ENST00000412585.5 100 -
14+
chr6 31354633 31354665 HLA-B.ENST00000412585.6 100 -
15+
chr6 31354483 31354526 HLA-B.ENST00000412585.7 100 -
16+
chr6 31271999 31272071 HLA-C.ENST00000376228.1 100 -
17+
chr6 31271599 31271868 HLA-C.ENST00000376228.2 100 -
18+
chr6 31271073 31271348 HLA-C.ENST00000376228.3 100 -
19+
chr6 31270210 31270485 HLA-C.ENST00000376228.4 100 -
20+
chr6 31269966 31270085 HLA-C.ENST00000376228.5 100 -
21+
chr6 31269493 31269525 HLA-C.ENST00000376228.6 100 -
22+
chr6 31269338 31269385 HLA-C.ENST00000376228.7 100 -
23+
chr6 31269169 31269173 HLA-C.ENST00000376228.8 100 -
24+
chr6 32589643 32589742 HLA-DRB1.ENST00000360004.1 100 -
25+
chr6 32584109 32584378 HLA-DRB1.ENST00000360004.2 100 -
26+
chr6 32581557 32581838 HLA-DRB1.ENST00000360004.3 100 -
27+
chr6 32580746 32580856 HLA-DRB1.ENST00000360004.4 100 -
28+
chr6 32580247 32580270 HLA-DRB1.ENST00000360004.5 100 -
29+
chr6 32579091 32579104 HLA-DRB1.ENST00000360004.6 100 -
30+
chr6 32637459 32637540 HLA-DQA1.ENST00000343139.1 100 +
31+
chr6 32641310 32641558 HLA-DQA1.ENST00000343139.2 100 +
32+
chr6 32641972 32642253 HLA-DQA1.ENST00000343139.3 100 +
33+
chr6 32642610 32642764 HLA-DQA1.ENST00000343139.4 100 +
34+
chr6 32666499 32666607 HLA-DQB1.ENST00000434651.1 100 -
35+
chr6 32664798 32665067 HLA-DQB1.ENST00000434651.2 100 -
36+
chr6 32661967 32662248 HLA-DQB1.ENST00000434651.3 100 -
37+
chr6 32661347 32661457 HLA-DQB1.ENST00000434651.4 100 -
38+
chr6 32660236 32660249 HLA-DQB1.ENST00000434651.5 100 -
39+
chr6 33073471 33073570 HLA-DPA1.ENST00000419277.2 100 -
40+
chr6 33069641 33069886 HLA-DPA1.ENST00000419277.3 100 -
41+
chr6 33069019 33069300 HLA-DPA1.ENST00000419277.4 100 -
42+
chr6 33068650 33068804 HLA-DPA1.ENST00000419277.5 100 -
43+
chr6 33076042 33076141 HLA-DPB1.ENST00000418931.1 100 +
44+
chr6 33080672 33080935 HLA-DPB1.ENST00000418931.2 100 +
45+
chr6 33084950 33085231 HLA-DPB1.ENST00000418931.3 100 +
46+
chr6 33085779 33085889 HLA-DPB1.ENST00000418931.4 100 +
47+
chr6 33086219 33086238 HLA-DPB1.ENST00000418931.5 100 +

bin/ensembl_panel2bed.pl

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
[--help]
1111
1212
Input data
13-
[--list filename]
14-
The list of gene names to get BED coordinates for
13+
[--list filename]
14+
The list of gene names to get BED coordinates for
1515
16-
[--assembly name]
17-
Name of the genome assembly to use (GRCH37, hg19, GRCh38)
16+
[--assembly name]
17+
Name of the genome assembly to use (GRCH37, hg19, GRCh38)
1818
1919
Ouput:
2020
[--output_file filename]
@@ -45,14 +45,14 @@
4545
my $prefix = "chr";
4646

4747
if ($assembly eq "GRCh37" or $assembly eq "hg19") {
48-
$options = 3337;
48+
$options = 3337;
4949
} elsif ($assembly eq "GRCh38") {
50-
# do nothing
50+
# do nothing
5151
} else {
52-
exit 1, "Unknown assembly version provided should be one of: hg19, GRCh37 or GRCh38 (default).\n";
52+
exit 1, "Unknown assembly version provided should be one of: hg19, GRCh37 or GRCh38 (default).\n";
5353
}
5454
if ($assembly eq "GRCh37") {
55-
$prefix = "";
55+
$prefix = "";
5656
}
5757

5858
my $registry = 'Bio::EnsEMBL::Registry';
@@ -69,43 +69,43 @@
6969

7070
foreach $line (<$fh>) {
7171

72-
chomp($line);
72+
chomp($line);
7373

74-
# Some genes may have different canonical names in different assemblies
75-
my @genes = (split ",", $line);
76-
my $skip = 0;
74+
# Some genes may have different canonical names in different assemblies
75+
my @genes = (split ",", $line);
76+
my $skip = 0;
7777

78-
foreach my $gene_name (@genes) {
78+
foreach my $gene_name (@genes) {
7979

80-
printf STDERR "Searching for $gene_name ...\n";
80+
printf STDERR "Searching for $gene_name ...\n";
8181

82-
next if ($skip == 1);
82+
next if ($skip == 1);
8383

84-
# Theoretically, one HGNC can map to multiple Genes
85-
my $gene = $gene_adaptor->fetch_by_display_label($gene_name);
84+
# Theoretically, one HGNC can map to multiple Genes
85+
my $gene = $gene_adaptor->fetch_by_display_label($gene_name);
8686

87-
next if (!$gene);
87+
next if (!$gene);
8888

89-
# We have a found a match, don't need to check the alternative names, if any
90-
$skip = 1;
89+
# We have a found a match, don't need to check the alternative names, if any
90+
$skip = 1;
9191

92-
my $transcript = $gene->canonical_transcript;
92+
my $transcript = $gene->canonical_transcript;
9393

94-
my @exons = @{ $transcript->get_all_translateable_Exons() } ;
95-
foreach my $exon (@exons) {
96-
next if (!$exon->is_coding($transcript) ) ;
97-
my $ref_start = $exon->coding_region_start($transcript);
98-
my $ref_end = $exon->coding_region_end($transcript);
99-
if ($ref_start > $ref_end) {
100-
($ref_start,$ref_end) = ($ref_end,$ref_start);
101-
}
102-
my $strand = $exon->strand == 1 ? "+" : "-" ;
103-
printf $prefix . $gene->seq_region_name . "\t" . $ref_start . "\t" . $ref_end . "\t" . $line . "." . $transcript->stable_id . "." . $exon->rank($transcript) . "\t" . 100 . "\t" . $strand . "\n";
104-
}
105-
}
94+
my @exons = @{ $transcript->get_all_translateable_Exons() } ;
95+
foreach my $exon (@exons) {
96+
next if (!$exon->is_coding($transcript) ) ;
97+
my $ref_start = $exon->coding_region_start($transcript);
98+
my $ref_end = $exon->coding_region_end($transcript);
99+
if ($ref_start > $ref_end) {
100+
($ref_start,$ref_end) = ($ref_end,$ref_start);
101+
}
102+
my $strand = $exon->strand == 1 ? "+" : "-" ;
103+
printf $prefix . $gene->seq_region_name . "\t" . $ref_start . "\t" . $ref_end . "\t" . $line . "." . $transcript->stable_id . "." . $exon->rank($transcript) . "\t" . 100 . "\t" . $strand . "\n";
104+
}
105+
}
106106

107-
die "Gene not found " . $line . "\n" if ($skip == 0);
108-
107+
die "Gene not found " . $line . "\n" if ($skip == 0);
108+
109109
}
110110
close ($fh);
111111

0 commit comments

Comments
 (0)