From 49cef09d6c371b97d5035c3827f3d7b4fd713d23 Mon Sep 17 00:00:00 2001 From: Adetunji Date: Fri, 10 Jan 2020 16:42:19 -0600 Subject: [PATCH] converted from python2 to python3 --- README.md | 2 +- bin/ROSE_bamToGFF.py | 36 ++++++------ bin/ROSE_callSuper.R | 4 +- bin/ROSE_geneMapper.py | 35 ++++++------ bin/ROSE_main.py | 40 +++++++------- lib/ROSE_utils.py | 121 +++++++++++++++++++++-------------------- 6 files changed, 118 insertions(+), 120 deletions(-) diff --git a/README.md b/README.md index 9146715..e05b5f9 100755 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ CLONED using SOURCETREE from: https://bitbucket.org/young_computation/rose/src/m * samtools * R version > 3.4 * bedtools > 2 - * python2 + * python3 3) USAGE diff --git a/bin/ROSE_bamToGFF.py b/bin/ROSE_bamToGFF.py index 9a176ad..1330f1f 100755 --- a/bin/ROSE_bamToGFF.py +++ b/bin/ROSE_bamToGFF.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 #bamToGFF.py #script to grab reads from a bam that align to a .gff file @@ -12,7 +12,7 @@ import os -from string import join,upper,maketrans +import string @@ -41,16 +41,16 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False else: MMR = 1 - print('using a MMR value of %s' % (MMR)) + print(('using a MMR value of %s' % (MMR))) - senseTrans = maketrans('-+.','+-+') + #senseTrans = maketrans('-+.','+-+') #deprecated if ROSE_utils.checkChrStatus(bamFile) == 1: - print "has chr" + print("has chr") hasChrFlag = 1 #sys.exit(); else: - print "does not have chr" + print("does not have chr") hasChrFlag = 0 #sys.exit() @@ -67,10 +67,10 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False for line in gff: line = line[0:9] if ticker%100 == 0: - print ticker + print(ticker) ticker+=1 if not hasChrFlag: - line[0] = re.sub(r"chr",r"",line[0]) + line[0] = re.sub(r"chr",r"",line[0]) gffLocus = ROSE_utils.Locus(line[0],int(line[3]),int(line[4]),line[6],line[1]) #print line[0] #sys.exit() @@ -86,11 +86,11 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False locus = ROSE_utils.Locus(locus.chr(),locus.start()-extension,locus.end(),locus.sense(),locus.ID()) extendedReads.append(locus) if gffLocus.sense() == '+' or gffLocus.sense == '.': - senseReads = filter(lambda x:x.sense() == '+' or x.sense() == '.',extendedReads) - antiReads = filter(lambda x:x.sense() == '-',extendedReads) + senseReads = [x for x in extendedReads if x.sense() == '+' or x.sense() == '.'] + antiReads = [x for x in extendedReads if x.sense() == '-'] else: - senseReads = filter(lambda x:x.sense() == '-' or x.sense() == '.',extendedReads) - antiReads = filter(lambda x:x.sense() == '+',extendedReads) + senseReads = [x for x in extendedReads if x.sense() == '-' or x.sense() == '.'] + antiReads = [x for x in extendedReads if x.sense() == '+'] senseHash = defaultdict(int) antiHash = defaultdict(int) @@ -107,12 +107,12 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False antiHash[x]+=1 #now apply flooring and filtering for coordinates - keys = ROSE_utils.uniquify(senseHash.keys()+antiHash.keys()) + keys = ROSE_utils.uniquify(list(senseHash.keys())+list(antiHash.keys())) if floor > 0: - keys = filter(lambda x: (senseHash[x]+antiHash[x]) > floor,keys) + keys = [x for x in keys if (senseHash[x]+antiHash[x]) > floor] #coordinate filtering - keys = filter(lambda x: gffLocus.start() < x < gffLocus.end(),keys) + keys = [x for x in keys if gffLocus.start() < x < gffLocus.end()] #setting up the output table @@ -131,7 +131,7 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False while n 0: if line[3] == '-': @@ -558,27 +558,26 @@ def checkChrStatus(bamFile): stats.stdout.close() chrPattern = re.compile('chr') for line in statLines: - #print line - sline = line.split("\t") - #print sline[2] - if re.search(chrPattern, sline[2]): - return 1 - else: - return 0 - + #print line + line = line.decode("utf-8") + sline = line.split("\t") + #print sline[2] + if re.search(chrPattern, sline[2]): + return 1 + else: + return 0 + def convertBitwiseFlag(flag): - if int(flag) & 16: - return "-"; - else: - return "+"; + if int(flag) & 16: + return "-" + else: + return "+" class Bam: '''A class for a sorted and indexed bam file that allows easy analysis of reads''' def __init__(self,bamFile): self._bam = bamFile - - def getTotalReads(self,readType = 'mapped'): command = 'samtools flagstat %s' % (self._bam) stats = subprocess.Popen(command,stdin = subprocess.PIPE,stderr = subprocess.PIPE,stdout = subprocess.PIPE,shell = True) @@ -586,6 +585,7 @@ def getTotalReads(self,readType = 'mapped'): stats.stdout.close() if readType == 'mapped': for line in statLines: + line = line.decode("utf-8") if line.count('mapped (') == 1: return int(line.split(' ')[0]) @@ -593,10 +593,10 @@ def getTotalReads(self,readType = 'mapped'): return int(statLines[0].split(' ')[0]) def convertBitwiseFlag(self,flag): - if flag & 16: - return "-"; - else: - return "+"; + if flag & 16: + return "-" + else: + return "+" def getRawReads(self,locus,sense,unique = False,includeJxnReads = False,printCommand = False): ''' @@ -610,10 +610,11 @@ def getRawReads(self,locus,sense,unique = False,includeJxnReads = False,printCom print(command) getReads = subprocess.Popen(command,stdin = subprocess.PIPE,stderr = subprocess.PIPE,stdout = subprocess.PIPE,shell = True) reads = getReads.communicate() - reads = reads[0].split('\n')[:-1] + reads = reads[0].decode("utf-8") + reads = reads.split('\n')[:-1] reads = [read.split('\t') for read in reads] if includeJxnReads == False: - reads = filter(lambda x: x[5].count('N') < 1,reads) + reads = [x for x in reads if x[5].count('N') < 1] #convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-'} convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-','256':'+','272':'-','99':'+','147':'-'} @@ -632,7 +633,7 @@ def getRawReads(self,locus,sense,unique = False,includeJxnReads = False,printCom strand = locus.sense() for read in reads: #readStrand = read[1].translate(convert)[0] - #print read[1], read[0] + #print read[1], read[0] #readStrand = convertDict[read[1]] readStrand = convertBitwiseFlag(read[1]) @@ -657,7 +658,7 @@ def readsToLoci(self,reads,IDtag = 'sequence,seqID,none'): return #convert = string.maketrans('160','--+') #convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-'} - #convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-','256':'+','272':'-'} + #convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-','256':'+','272':'-'} #BJA added 256 and 272, which correspond to 0 and 16 for multi-mapped reads respectively: #http://onetipperday.blogspot.com/2012/04/understand-flag-code-of-sam-format.html @@ -682,7 +683,7 @@ def readsToLoci(self,reads,IDtag = 'sequence,seqID,none'): #then it filters out the '' and converts them to integers #only works for reads that span one junction - [first,gap,second] = [int(x) for x in filter(lambda x: len(x) > 0, re.findall(numPattern,read[5]))][0:3] + [first,gap,second] = [int(x) for x in [x for x in re.findall(numPattern,read[5]) if len(x) > 0]][0:3] if IDtag == 'sequence': loci.append(Locus(chrom,start,start+first,strand,ID[0:first])) loci.append(Locus(chrom,start+first+gap,start+first+gap+second,strand,ID[first:])) @@ -771,7 +772,7 @@ def order(x, NoneIsLast = True, decreasing = False): omitNone = True n = len(x) - ix = range(n) + ix = list(range(n)) if None not in x: ix.sort(reverse = decreasing, key = lambda j : x[j]) else: @@ -783,7 +784,7 @@ def key(i, x = x): return not(elem is None), elem else: return elem is None, elem - ix = range(n) + ix = list(range(n)) ix.sort(key=key, reverse=decreasing) if omitNone: