Skip to content

Commit

Permalink
Merge pull request BD2KGenomics#284 from BD2KGenomics/issues/283-add-…
Browse files Browse the repository at this point in the history
…bamQC

Add Treehouse bamQC (resolves BD2KGenomics#283)
  • Loading branch information
jvivian authored Oct 18, 2018
2 parents d0ba497 + a5b6a69 commit c11f15b
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 0 deletions.
33 changes: 33 additions & 0 deletions bamQC/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM python:2.7

RUN apt-get update && apt-get install -y --no-install-recommends \
r-base \
zlib1g-dev \
time

RUN pip install RSeQC==2.6.4

RUN wget -qO- https://github.com/lomereiter/sambamba/releases/download/v0.6.7/sambamba_v0.6.7_linux.tar.bz2 \
| tar xj -C /usr/local/bin

RUN wget -qO- https://github.com/GregoryFaust/samblaster/releases/download/v.0.1.24/samblaster-v.0.1.24.tar.gz \
| tar xz -C /tmp \
&& cd /tmp/samblaster-v.0.1.24/ && make && mv samblaster /usr/local/bin && rm -rf /tmp/samblaster-v-0.1.24

WORKDIR /ref
RUN wget -qO- https://downloads.sourceforge.net/project/rseqc/BED/Human_Homo_sapiens/hg38_GENCODE_v23_basic.bed.gz \
| gunzip -c > /ref/hg38_GENCODE_v23_basic.bed

#WORKDIR /app
#ADD ./requirements.txt /app/requirements.txt
#RUN pip install --no-cache-dir -r /app/requirements.txt

RUN R -e 'install.packages(c("rjson"), repos="http://cran.us.r-project.org")'

RUN mkdir /app
ADD . /app

RUN mkdir /data
WORKDIR /data

ENTRYPOINT ["/bin/bash", "/app/run.sh"]
22 changes: 22 additions & 0 deletions bamQC/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
build_tool = runtime-container.DONE
git_commit ?= $(shell git log --pretty=oneline -n 1 -- ../bamQC | cut -f1 -d " ")
name = quay.io/ucsc_cgl/bamqc
tag = 1.0--${git_commit}

build: ${build_tool}

${build_tool}: Dockerfile
docker build -t ${name}:${tag} .
docker tag -f ${name}:${tag} ${name}:latest
touch ${build_tool}

push: build
# Requires ~/.dockercfg
docker push ${name}:${tag}
docker push ${name}:latest

test: build
python test.py

clean:
-rm ${build_tool}
43 changes: 43 additions & 0 deletions bamQC/parseReadDist.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/Rscript

# f="~/Documents/Dropbox/ucsc/projects/rnaQC/all_qc/all_raw_qc_info/Q10_YBL_384.md.readDist.txt"

options(stringsAsFactors=FALSE)

f <- commandArgs(TRUE)[1]
print(paste0("analyzing ", f))


if ( ! file.info(f)$size==0){

distVals=read.table(f, skip=4, sep="", nrows=10, header=T)
exonicGroups=c("CDS_Exons", "5'UTR_Exons", "3'UTR_Exons")
exonicTagCount= sum(subset(distVals, Group %in% exonicGroups)$Tag_count)

totalValsRaw=scan(f, what="list", sep="\n", comment.char="", nlines=3)
readCountTimes2=as.numeric(gsub("[^0-9]*", "", totalValsRaw[grep("Total Reads", totalValsRaw)]))
tagCount=as.numeric(gsub("[^0-9]*", "", totalValsRaw[grep("Total Tags", totalValsRaw)]))
readsPerTag= round(readCountTimes2 /tagCount, 2)

estExonicReadsTimes2= exonicTagCount*readsPerTag

# values are divided by two because read_distribution.py counts the ends of a read separately
readCount= readCountTimes2/2
estExonicReads= estExonicReadsTimes2/2

result=data.frame(input=basename(f),
uniqMappedNonDupeReadCount=readCount,
estExonicUniqMappedNonDupeReadCount=estExonicReads)

if(estExonicReads>10E6) {
result$qc="PASS"
} else {
result$qc="FAIL"
}
write.table(result, file=paste0(dirname(f), "/bam_umend_qc.tsv"), quote=FALSE, sep="\t", row.names=FALSE)

library(rjson)
sink(paste0(dirname(f), "/bam_umend_qc.json"))
cat(toJSON(result))
sink()
}
31 changes: 31 additions & 0 deletions bamQC/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash
UUID=$(cat /proc/sys/kernel/random/uuid)

#!/usr/bin/env bash
set -e

# Fix ownership of output files
finish() {
# Fix ownership of output files
user_id=$(stat -c '%u:%g' /data)
chown -R ${user_id} /data
}
trap finish EXIT

echo "Sorting by name..."
sambamba sort -t 4 --sort-by-name --out /data/$UUID.sortedByName.bam $1

echo "Marking duplicates..."
sambamba view -h /data/$UUID.sortedByName.bam | samblaster \
| sambamba view --sam-input --format bam /dev/stdin > /data/$UUID.sortedByName.md.bam
rm /data/$UUID.sortedByName.bam

echo "Sorting by coordinate..."
sambamba sort --show-progress -t 4 --out=/data/$UUID.sortedByCoord.md.bam /data/$UUID.sortedByName.md.bam
rm /data/$UUID.sortedByName.md.bam

echo "Counting reads..."
read_distribution.py -i /data/$UUID.sortedByCoord.md.bam -r /ref/hg38_GENCODE_v23_basic.bed > $2/readDist.txt
Rscript --vanilla /app/parseReadDist.R $2/readDist.txt
mv /data/$UUID.sortedByCoord.md.bam $2/sortedByCoord.md.bam
mv /data/$UUID.sortedByCoord.md.bam.bai $2/sortedByCoord.md.bam.bai
20 changes: 20 additions & 0 deletions bamQC/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env python2.7
import subprocess
import tempfile
import unittest


class TestBamQC(unittest.TestCase):

def test_docker_call(self):
out, err = check_docker_output(tool='quay.io/ucsc_cgl/bamqc')
self.assertTrue('sambamba-sort' in out)

def check_docker_output(tool):
command = 'docker run ' + tool
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
output = process.communicate()
return output

if __name__ == '__main__':
unittest.main()

0 comments on commit c11f15b

Please sign in to comment.