Skip to content

Commit

Permalink
Merge pull request #9 from zhaoc1/master
Browse files Browse the repository at this point in the history
python3
  • Loading branch information
ctanes authored Jan 17, 2018
2 parents 25f04fe + 6c63e87 commit 0dc3a83
Show file tree
Hide file tree
Showing 9 changed files with 25 additions and 21 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ DNAbc
Identify DNA barcodes in high-throughput sequencing data files and write
demultiplexed data in a variety of formats.

Build on python3.5
6 changes: 3 additions & 3 deletions dnabclib/assigner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@


class BarcodeAssigner(object):
def __init__(self, samples, mismatches=1, revcomp=True):
def __init__(self, samples, mismatches=0, revcomp=True):
self.samples = samples
if mismatches not in [0, 1, 2]:
if mismatches not in [0]:
raise ValueError(
"Only 0, 1, or 2 mismatches allowed (got %s)" % mismatches)
"Only 0 mismatches allowed (got %s)" % mismatches)
self.mismatches = mismatches
self.revcomp = revcomp
# Sample names assumed to be unique after validating input data
Expand Down
6 changes: 3 additions & 3 deletions dnabclib/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ def main(argv=None):
samples = list(Sample.load(args.barcode_file))

writer_cls = writers[config["output_format"]]
if os.path.exists(args.output_dir):
p.error("Output directory already exists")
os.mkdir(args.output_dir)
if not os.path.exists(args.output_dir):
#p.error("Output directory already exists")
os.mkdir(args.output_dir)
writer = writer_cls(args.output_dir)

if args.index_reads is None:
Expand Down
6 changes: 3 additions & 3 deletions dnabclib/seqfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def demultiplex(self, assigner, writer):
idxs = (FastqRead(x) for x in parse_fastq(self.index_file))
fwds = (FastqRead(x) for x in parse_fastq(self.forward_file))
revs = (FastqRead(x) for x in parse_fastq(self.reverse_file))
for idx, fwd, rev in itertools.izip(idxs, fwds, revs):
for idx, fwd, rev in zip(idxs, fwds, revs):
sample = assigner.assign(idx.seq)
writer.write((fwd, rev), sample)
return assigner.read_counts
Expand All @@ -34,7 +34,7 @@ def __init__(self, fwd, rev):
def demultiplex(self, assigner, writer):
fwds = (FastqRead(x) for x in parse_fastq(self.forward_file))
revs = (FastqRead(x) for x in parse_fastq(self.reverse_file))
for fwd, rev in itertools.izip(fwds, revs):
for fwd, rev in zip(fwds, revs):
barcode_seq = self._parse_barcode(fwd.desc)
sample = assigner.assign(barcode_seq)
writer.write((fwd, rev), sample)
Expand Down Expand Up @@ -67,7 +67,7 @@ def _grouper(iterable, n):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3) --> ABC DEF
args = [iter(iterable)] * n
return itertools.izip(*args)
return zip(*args)


def parse_fastq(f):
Expand Down
4 changes: 2 additions & 2 deletions dnabclib/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ def _get_sample_fp(self, sample):


def _get_sample_paired_fp(self, sample):
fn1 = "PCMP_%s_R1%s" % (sample.name, self.ext)
fn2 = "PCMP_%s_R2%s" % (sample.name, self.ext)
fn1 = "%s_R1%s" % (sample.name, self.ext)
fn2 = "%s_R2%s" % (sample.name, self.ext)
return (
os.path.join(self.output_dir, fn1),
os.path.join(self.output_dir, fn2))
Expand Down
2 changes: 1 addition & 1 deletion test/test_assigner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import namedtuple
from cStringIO import StringIO
from io import StringIO
import os
import shutil
import tempfile
Expand Down
6 changes: 3 additions & 3 deletions test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ class SampleNameTests(unittest.TestCase):
def test_get_sample_names_main(self):
barcode_file = tempfile.NamedTemporaryFile()
barcode_file.write(
"SampleA\tAAGGAAGG\n"
"SampleB\tACGTACGT\n")
b"SampleA\tAAGGAAGG\n"
b"SampleB\tACGTACGT\n")
barcode_file.seek(0)

output_file = tempfile.NamedTemporaryFile()
Expand All @@ -99,7 +99,7 @@ def test_get_sample_names_main(self):
output_file.seek(0)
observed_sample_names = output_file.read()

self.assertEqual(observed_sample_names, "SampleA\nSampleB\n")
self.assertEqual(observed_sample_names, b"SampleA\nSampleB\n")

if __name__ == "__main__":
unittest.main()
3 changes: 1 addition & 2 deletions test/test_seqfile.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import collections
from cStringIO import StringIO
from io import StringIO
import os.path
import shutil
import tempfile
Expand All @@ -21,7 +21,6 @@ def write(self, x, sample):
else:
self.written[sample.name].append(x)


MockSample = collections.namedtuple("MockSample", "name barcode")


Expand Down
12 changes: 8 additions & 4 deletions test/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def test_write(self):
w.close()

fp = w._get_output_fp(s1)
obs_output = open(fp).read()
with open(fp) as f:
obs_output = f.read()
self.assertEqual(obs_output, ">Read0\nACCTTGG\n")

self.assertFalse(os.path.exists(w._get_output_fp(s2)))
Expand All @@ -49,7 +50,8 @@ def test_write(self):
w.close()

fp = w._get_output_fp(s1)
obs_output = open(fp).read()
with open(fp) as f:
obs_output = f.read()

self.assertEqual(obs_output, "@Read0\nACCTTGG\n+\n#######\n")

Expand Down Expand Up @@ -79,10 +81,12 @@ def test_write(self):

fp1, fp2 = w._get_output_fp(s1)

obs1 = open(fp1).read()
with open(fp1) as f:
obs1 = f.read()
self.assertEqual(obs1, "@Read0\nACCTTGG\n+\n#######\n")

obs2 = open(fp2).read()
with open(fp2) as f:
obs2 = f.read()
self.assertEqual(obs2, "@Read1\nGCTAGCT\n+\n;342dfA\n")

self.assertFalse(any(
Expand Down

0 comments on commit 0dc3a83

Please sign in to comment.