From 1da4419c5c438aa79ee2c9f7821e191be8d22518 Mon Sep 17 00:00:00 2001 From: Zhao Date: Wed, 3 May 2017 16:53:40 -0400 Subject: [PATCH 1/3] update readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 50a55a6..6e3413b 100644 --- a/README.md +++ b/README.md @@ -4,3 +4,4 @@ DNAbc Identify DNA barcodes in high-throughput sequencing data files and write demultiplexed data in a variety of formats. +Build on python3.5 From 613cc98f83b9d5a72b99b3339cb363758c2435cc Mon Sep 17 00:00:00 2001 From: Zhao Date: Wed, 3 May 2017 16:54:21 -0400 Subject: [PATCH 2/3] update dnabclib for python3 --- dnabclib/assigner.py | 6 +++--- dnabclib/main.py | 6 +++--- dnabclib/seqfile.py | 6 +++--- dnabclib/writer.py | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/dnabclib/assigner.py b/dnabclib/assigner.py index ea5f9ed..557c076 100644 --- a/dnabclib/assigner.py +++ b/dnabclib/assigner.py @@ -2,11 +2,11 @@ class BarcodeAssigner(object): - def __init__(self, samples, mismatches=1, revcomp=True): + def __init__(self, samples, mismatches=0, revcomp=True): self.samples = samples - if mismatches not in [0, 1, 2]: + if mismatches not in [0]: raise ValueError( - "Only 0, 1, or 2 mismatches allowed (got %s)" % mismatches) + "Only 0 mismatches allowed (got %s)" % mismatches) self.mismatches = mismatches self.revcomp = revcomp # Sample names assumed to be unique after validating input data diff --git a/dnabclib/main.py b/dnabclib/main.py index 73a9165..bb5997f 100644 --- a/dnabclib/main.py +++ b/dnabclib/main.py @@ -89,9 +89,9 @@ def main(argv=None): samples = list(Sample.load(args.barcode_file)) writer_cls = writers[config["output_format"]] - if os.path.exists(args.output_dir): - p.error("Output directory already exists") - os.mkdir(args.output_dir) + if not os.path.exists(args.output_dir): + #p.error("Output directory already exists") + os.mkdir(args.output_dir) writer = writer_cls(args.output_dir) if args.index_reads is None: diff --git a/dnabclib/seqfile.py b/dnabclib/seqfile.py index 5280fa4..4b3b10b 100644 --- a/dnabclib/seqfile.py +++ b/dnabclib/seqfile.py @@ -15,7 +15,7 @@ def demultiplex(self, assigner, writer): idxs = (FastqRead(x) for x in parse_fastq(self.index_file)) fwds = (FastqRead(x) for x in parse_fastq(self.forward_file)) revs = (FastqRead(x) for x in parse_fastq(self.reverse_file)) - for idx, fwd, rev in itertools.izip(idxs, fwds, revs): + for idx, fwd, rev in zip(idxs, fwds, revs): sample = assigner.assign(idx.seq) writer.write((fwd, rev), sample) return assigner.read_counts @@ -34,7 +34,7 @@ def __init__(self, fwd, rev): def demultiplex(self, assigner, writer): fwds = (FastqRead(x) for x in parse_fastq(self.forward_file)) revs = (FastqRead(x) for x in parse_fastq(self.reverse_file)) - for fwd, rev in itertools.izip(fwds, revs): + for fwd, rev in zip(fwds, revs): barcode_seq = self._parse_barcode(fwd.desc) sample = assigner.assign(barcode_seq) writer.write((fwd, rev), sample) @@ -67,7 +67,7 @@ def _grouper(iterable, n): "Collect data into fixed-length chunks or blocks" # grouper('ABCDEFG', 3) --> ABC DEF args = [iter(iterable)] * n - return itertools.izip(*args) + return zip(*args) def parse_fastq(f): diff --git a/dnabclib/writer.py b/dnabclib/writer.py index b661a33..48fae3f 100644 --- a/dnabclib/writer.py +++ b/dnabclib/writer.py @@ -7,8 +7,8 @@ def _get_sample_fp(self, sample): def _get_sample_paired_fp(self, sample): - fn1 = "PCMP_%s_R1%s" % (sample.name, self.ext) - fn2 = "PCMP_%s_R2%s" % (sample.name, self.ext) + fn1 = "%s_R1%s" % (sample.name, self.ext) + fn2 = "%s_R2%s" % (sample.name, self.ext) return ( os.path.join(self.output_dir, fn1), os.path.join(self.output_dir, fn2)) From 6c63e878a472bc07de9ad2a98697e3ea4c2fcd9a Mon Sep 17 00:00:00 2001 From: Zhao Date: Wed, 3 May 2017 16:55:52 -0400 Subject: [PATCH 3/3] update test scripts for python3 --- test/test_assigner.py | 2 +- test/test_main.py | 6 +++--- test/test_seqfile.py | 3 +-- test/test_writer.py | 12 ++++++++---- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/test/test_assigner.py b/test/test_assigner.py index 95983cd..a79de7d 100644 --- a/test/test_assigner.py +++ b/test/test_assigner.py @@ -1,5 +1,5 @@ from collections import namedtuple -from cStringIO import StringIO +from io import StringIO import os import shutil import tempfile diff --git a/test/test_main.py b/test/test_main.py index 959896d..9cf92a1 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -85,8 +85,8 @@ class SampleNameTests(unittest.TestCase): def test_get_sample_names_main(self): barcode_file = tempfile.NamedTemporaryFile() barcode_file.write( - "SampleA\tAAGGAAGG\n" - "SampleB\tACGTACGT\n") + b"SampleA\tAAGGAAGG\n" + b"SampleB\tACGTACGT\n") barcode_file.seek(0) output_file = tempfile.NamedTemporaryFile() @@ -99,7 +99,7 @@ def test_get_sample_names_main(self): output_file.seek(0) observed_sample_names = output_file.read() - self.assertEqual(observed_sample_names, "SampleA\nSampleB\n") + self.assertEqual(observed_sample_names, b"SampleA\nSampleB\n") if __name__ == "__main__": unittest.main() diff --git a/test/test_seqfile.py b/test/test_seqfile.py index 99fc825..205c06d 100644 --- a/test/test_seqfile.py +++ b/test/test_seqfile.py @@ -1,5 +1,5 @@ import collections -from cStringIO import StringIO +from io import StringIO import os.path import shutil import tempfile @@ -21,7 +21,6 @@ def write(self, x, sample): else: self.written[sample.name].append(x) - MockSample = collections.namedtuple("MockSample", "name barcode") diff --git a/test/test_writer.py b/test/test_writer.py index 29fe989..9cfc91c 100644 --- a/test/test_writer.py +++ b/test/test_writer.py @@ -25,7 +25,8 @@ def test_write(self): w.close() fp = w._get_output_fp(s1) - obs_output = open(fp).read() + with open(fp) as f: + obs_output = f.read() self.assertEqual(obs_output, ">Read0\nACCTTGG\n") self.assertFalse(os.path.exists(w._get_output_fp(s2))) @@ -49,7 +50,8 @@ def test_write(self): w.close() fp = w._get_output_fp(s1) - obs_output = open(fp).read() + with open(fp) as f: + obs_output = f.read() self.assertEqual(obs_output, "@Read0\nACCTTGG\n+\n#######\n") @@ -79,10 +81,12 @@ def test_write(self): fp1, fp2 = w._get_output_fp(s1) - obs1 = open(fp1).read() + with open(fp1) as f: + obs1 = f.read() self.assertEqual(obs1, "@Read0\nACCTTGG\n+\n#######\n") - obs2 = open(fp2).read() + with open(fp2) as f: + obs2 = f.read() self.assertEqual(obs2, "@Read1\nGCTAGCT\n+\n;342dfA\n") self.assertFalse(any(