Skip to content

Commit

Permalink
Merge pull request #46 from BD2KGenomics/gzip-fastqs
Browse files Browse the repository at this point in the history
Properly concatenate gzipped fastq files
  • Loading branch information
wshands authored Jan 13, 2017
2 parents 17745aa + 900096c commit dcc39a7
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
2 changes: 1 addition & 1 deletion docker/rnaseq-cgl-pipeline.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ dct:creator:
requirements:
- class: DockerRequirement
dockerPull: "quay.io/ucsc_cgl/rnaseq-cgl-pipeline:3.0.2-2"
dockerPull: "quay.io/ucsc_cgl/rnaseq-cgl-pipeline:3.0.2-3"
hints:
- class: ResourceRequirement
Expand Down
22 changes: 19 additions & 3 deletions docker/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import sys
import textwrap
from uuid import uuid4
import gzip
from bd2k.util.exceptions import require
from toil.lib.bioio import addLoggingOptions, setLoggingFromOptions

Expand Down Expand Up @@ -84,6 +85,14 @@ def catFiles(outputFile, inputFiles):
outfile.write(line)
return outputFile

def gzipCatFiles(outputFile, inputFiles):
with gzip.open(outputFile, 'w') as outfile:
for fname in inputFiles:
with gzip.open(fname) as infile:
for line in infile:
outfile.write(line)
return outputFile


def fileURL(sample):
return 'file://' + sample
Expand Down Expand Up @@ -116,8 +125,12 @@ def formatPair(name):
assert len(sample_pairs) % 2 == 0
outputName = os.path.join(work_mount, os.path.basename(sample_pairs[0]))
outputFiles = formatPair(outputName)
catFiles(outputFiles[0], sample_pairs[::2])
catFiles(outputFiles[1], sample_pairs[1::2])
if not outputFiles[0].endswith('.gz'):
catFiles(outputFiles[0], sample_pairs[::2])
catFiles(outputFiles[1], sample_pairs[1::2])
else:
gzipCatFiles(outputFiles[0], sample_pairs[::2])
gzipCatFiles(outputFiles[1], sample_pairs[1::2])
return fileURL(outputFiles[0]) + ',' + fileURL(outputFiles[1])

def formatSingles(sample_singles, work_mount):
Expand All @@ -129,7 +142,10 @@ def formatSingle(single):
return baseName + ending
sample_singles = sample_singles.split(',')
output = formatSingle(os.path.join(work_mount, os.path.basename(sample_singles[0])))
catFiles(output, sample_singles)
if not output.endswith('.gz'):
catFiles(output, sample_singles)
else:
gzipCatFiles(output, sample_singles)
return fileURL(output)

def generate_config(star_path, rsem_path, kallisto_path, output_dir, disable_cutadapt, save_bam,
Expand Down

0 comments on commit dcc39a7

Please sign in to comment.