add Ensembl mm10

stevekm · stevekm · commit 6165ad311ddc · 2017-12-22T13:56:50.000-05:00
diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@
 none:
 
 # make all sets of annotations
-all: gencode-hg19 ensembl-hg19 gencode-hg38 ensembl-hg38
+all: gencode-hg19 ensembl-hg19 gencode-hg38 ensembl-hg38 ensembl-mm10
 
 gencode-hg19: gencode.v19.annotation.genes.bed
 
@@ -15,6 +15,8 @@ ensembl-hg19: Homo_sapiens.GRCh37.82.chr.bed
 
 ensembl-hg38: Homo_sapiens.GRCh38.91.chr.bed
 
+ensembl-mm10: Mus_musculus.GRCm38.91.chr.bed
+
 
 
 
@@ -72,6 +74,26 @@ Homo_sapiens.GRCh38.91.chr.bed: Homo_sapiens.GRCh38.91.chr.gtf
 	gtf2bed < Homo_sapiens.GRCh38.91.chr.gtf > Homo_sapiens.GRCh38.91.chr.bed
 
 
+
+
+# ~~~~~ ENSEMBL mm10 ~~~~~ #
+# generate the Ensembl hg19 annotations .bed file
+Mus_musculus.GRCm38.91.chr.gtf.gz:
+	wget ftp://ftp.ensembl.org/pub/release-91/gtf/mus_musculus/Mus_musculus.GRCm38.91.chr.gtf.gz
+
+# remove comment lines
+# extract only 'gene' entries
+# add 'chr' to first entry, change 'chrMT' to 'chrM'
+Mus_musculus.GRCm38.91.chr.gtf: Mus_musculus.GRCm38.91.chr.gtf.gz
+	zcat Mus_musculus.GRCm38.91.chr.gtf.gz | grep -Ev '^#' | grep -w 'gene' | sed -e 's/^/chr/' -e 's/^chrMT/chrM/' > Mus_musculus.GRCm38.91.chr.gtf
+
+# convert to .bed
+Mus_musculus.GRCm38.91.chr.bed: Mus_musculus.GRCm38.91.chr.gtf
+	gtf2bed < Mus_musculus.GRCm38.91.chr.gtf > Mus_musculus.GRCm38.91.chr.bed
+
+
+
+
 # ~~~~~ CLEAN UP ~~~~~ #
 .INTERMEDIATE: gencode.v19.annotation.gtf.gz \
 	Homo_sapiens.GRCh37.82.gtf.gz \
@@ -82,6 +104,9 @@ Homo_sapiens.GRCh38.91.chr.bed: Homo_sapiens.GRCh38.91.chr.gtf
 	Homo_sapiens.GRCh38.91.chr.gtf \
 	Homo_sapiens.GRCh38.91.chr.gtf.gz \
 	Homo_sapiens.GRCh37.82.chr.gtf \
-	Homo_sapiens.GRCh37.82.chr.gtf.gz
+	Homo_sapiens.GRCh37.82.chr.gtf.gz \
+	Mus_musculus.GRCm38.91.chr.gtf.gz \
+	Mus_musculus.GRCm38.91.chr.gtf
+	
 	
 	
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ cd reference-annotations
 
 Generate the desired annotation files from the available entries:
 
-- `all`, `gencode-hg19`, `gencode-hg38`, `ensembl-hg19`, `ensembl-hg38`
+- `all`, `gencode-hg19`, `gencode-hg38`, `ensembl-hg19`, `ensembl-hg38`, `ensembl-mm10`
 
 ```
 make all
@@ -40,6 +40,8 @@ The following files are created:
 
 - `ensembl-hg38`: `Homo_sapiens.GRCh38.91.chr.bed`; Ensembl hg38 gene annotations & genomic regions
 
+- `ensembl-mm10`: `Mus_musculus.GRCm38.91.chr.bed`; Ensembl mm10 gene annotations & genomic regions
+
 # Notes
 
 Intermediate files are removed by default. If you want to keep them, then comment out the `.INTERMEDIATE` section in the `Makefile`.