From 49cef09d6c371b97d5035c3827f3d7b4fd713d23 Mon Sep 17 00:00:00 2001
From: Adetunji <madetunj@splprhpc05.cm.cluster>
Date: Fri, 10 Jan 2020 16:42:19 -0600
Subject: [PATCH] converted from python2 to python3

---
 README.md              |   2 +-
 bin/ROSE_bamToGFF.py   |  36 ++++++------
 bin/ROSE_callSuper.R   |   4 +-
 bin/ROSE_geneMapper.py |  35 ++++++------
 bin/ROSE_main.py       |  40 +++++++-------
 lib/ROSE_utils.py      | 121 +++++++++++++++++++++--------------------
 6 files changed, 118 insertions(+), 120 deletions(-)

diff --git a/README.md b/README.md
index 9146715..e05b5f9 100755
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ CLONED using SOURCETREE from: https://bitbucket.org/young_computation/rose/src/m
 	* samtools
 	* R version > 3.4
 	* bedtools > 2
-	* python2
+	* python3
 
 3) USAGE
 
diff --git a/bin/ROSE_bamToGFF.py b/bin/ROSE_bamToGFF.py
index 9a176ad..1330f1f 100755
--- a/bin/ROSE_bamToGFF.py
+++ b/bin/ROSE_bamToGFF.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 #bamToGFF.py
 
 #script to grab reads from a bam that align to a .gff file
@@ -12,7 +12,7 @@
 
 import os
 
-from string import join,upper,maketrans
+import string
 
 
 
@@ -41,16 +41,16 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
     else:
         MMR = 1
 
-    print('using a MMR value of %s' % (MMR))
+    print(('using a MMR value of %s' % (MMR)))
     
-    senseTrans = maketrans('-+.','+-+')
+    #senseTrans = maketrans('-+.','+-+') #deprecated
 
     if ROSE_utils.checkChrStatus(bamFile) == 1:
-      print "has chr"
+      print("has chr")
       hasChrFlag = 1
       #sys.exit();
     else:
-      print "does not have chr"
+      print("does not have chr")
       hasChrFlag = 0
       #sys.exit()
       
@@ -67,10 +67,10 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
     for line in gff:
         line = line[0:9]
         if ticker%100 == 0:
-            print ticker
+            print(ticker)
         ticker+=1
         if not hasChrFlag:
-	  line[0] = re.sub(r"chr",r"",line[0])
+            line[0] = re.sub(r"chr",r"",line[0])
         gffLocus = ROSE_utils.Locus(line[0],int(line[3]),int(line[4]),line[6],line[1])
         #print line[0]
         #sys.exit()
@@ -86,11 +86,11 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
                 locus = ROSE_utils.Locus(locus.chr(),locus.start()-extension,locus.end(),locus.sense(),locus.ID())
             extendedReads.append(locus)
         if gffLocus.sense() == '+' or gffLocus.sense == '.':
-            senseReads = filter(lambda x:x.sense() == '+' or x.sense() == '.',extendedReads)
-            antiReads = filter(lambda x:x.sense() == '-',extendedReads)
+            senseReads = [x for x in extendedReads if x.sense() == '+' or x.sense() == '.']
+            antiReads = [x for x in extendedReads if x.sense() == '-']
         else:
-            senseReads = filter(lambda x:x.sense() == '-' or x.sense() == '.',extendedReads)
-            antiReads = filter(lambda x:x.sense() == '+',extendedReads)
+            senseReads = [x for x in extendedReads if x.sense() == '-' or x.sense() == '.']
+            antiReads = [x for x in extendedReads if x.sense() == '+']
 
         senseHash = defaultdict(int)
         antiHash = defaultdict(int)
@@ -107,12 +107,12 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
                     antiHash[x]+=1
 
         #now apply flooring and filtering for coordinates
-        keys = ROSE_utils.uniquify(senseHash.keys()+antiHash.keys())
+        keys = ROSE_utils.uniquify(list(senseHash.keys())+list(antiHash.keys()))
         if floor > 0:
 
-            keys = filter(lambda x: (senseHash[x]+antiHash[x]) > floor,keys)
+            keys = [x for x in keys if (senseHash[x]+antiHash[x]) > floor]
         #coordinate filtering
-        keys = filter(lambda x: gffLocus.start() < x < gffLocus.end(),keys)
+        keys = [x for x in keys if gffLocus.start() < x < gffLocus.end()]
 
 
         #setting up the output table
@@ -131,7 +131,7 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
 
             while n <nBins:
                 n+=1
-                binKeys = filter(lambda x: i < x < i+binSize,keys)
+                binKeys = [x for x in keys if i < x < i+binSize]
                 binDen = float(sum([senseHash[x]+antiHash[x] for x in binKeys]))/binSize
                 clusterLine+=[round(binDen/MMR,4)]
                 i = i+binSize
@@ -139,7 +139,7 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
             i = gffLocus.end()
             while n < nBins:
                 n+=1
-                binKeys = filter(lambda x: i-binSize < x < i,keys)
+                binKeys = [x for x in keys if i-binSize < x < i]
                 binDen = float(sum([senseHash[x]+antiHash[x] for x in binKeys]))/binSize
                 clusterLine+=[round(binDen/MMR,4)]
                 i = i-binSize
@@ -192,7 +192,7 @@ def main():
         bamFile = options.bam
         fullPath = os.path.abspath(bamFile)
         bamName = fullPath.split('/')[-1].split('.')[0]
-        pathFolder = join(fullPath.split('/')[0:-1],'/')
+        pathFolder = '/'.join(fullPath.split('/')[0:-1])
         fileList = os.listdir(pathFolder)
         hasBai = False
         for fileName in fileList:
diff --git a/bin/ROSE_callSuper.R b/bin/ROSE_callSuper.R
index c0f1ced..540fb40 100755
--- a/bin/ROSE_callSuper.R
+++ b/bin/ROSE_callSuper.R
@@ -141,8 +141,8 @@ if(wceName == 'NONE'){
 }else{
 	plot(length(rankBy_vector):1,rankBy_vector[signalOrder], col='red',xlab=paste(rankBy_factor,'_enhancers'),ylab=paste(rankBy_factor,' Signal','- ',wceName),pch=19,cex=2)
 }
-abline(h=cutoff_options$absolute,color='grey',lty=2)
-abline(v=length(rankBy_vector)-length(superEnhancerRows),color='grey',lty=2)
+abline(h=cutoff_options$absolute,col='grey',lty=2)
+abline(v=length(rankBy_vector)-length(superEnhancerRows),col='grey',lty=2)
 lines(length(rankBy_vector):1,rankBy_vector[signalOrder],lwd=4, col='red')
 text(0,0.8*max(rankBy_vector),paste(' Cutoff used: ',cutoff_options$absolute,'\n','Super-Enhancers identified: ',length(superEnhancerRows)),pos=4)
 
diff --git a/bin/ROSE_geneMapper.py b/bin/ROSE_geneMapper.py
index 6558039..38411be 100755
--- a/bin/ROSE_geneMapper.py
+++ b/bin/ROSE_geneMapper.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 #130428
 
 #ROSE_geneMapper.py
@@ -10,14 +10,11 @@
 
 import sys
 
-
-
 import ROSE_utils
 
-
 import os
 
-from string import upper,join
+import string
 
 from collections import defaultdict
 
@@ -33,9 +30,9 @@ def mapEnhancerToGene(annotFile,enhancerFile,transcribedFile='',uniqueGenes=True
     '''
     maps genes to enhancers. if uniqueGenes, reduces to gene name only. Otherwise, gives for each refseq
     '''
-    print "Herp"
+    print("Herp")
     startDict = ROSE_utils.makeStartDict(annotFile)
-    print "Derp"
+    print("Derp")
     enhancerTable = ROSE_utils.parseTable(enhancerFile,'\t')
 
 
@@ -46,7 +43,7 @@ def mapEnhancerToGene(annotFile,enhancerFile,transcribedFile='',uniqueGenes=True
         transcribedTable = ROSE_utils.parseTable(transcribedFile,'\t')
         transcribedGenes = [line[1] for line in transcribedTable]
     else:
-        transcribedGenes = startDict.keys()
+        transcribedGenes = list(startDict.keys())
 
     print('MAKING TRANSCRIPT COLLECTION')
     transcribedCollection = ROSE_utils.makeTranscriptCollection(annotFile,0,0,500,transcribedGenes)
@@ -132,24 +129,24 @@ def mapEnhancerToGene(annotFile,enhancerFile,transcribedFile='',uniqueGenes=True
             #print distList.index(min(distList))
             #print min(distList)
             #print len(distList)
-	    #print len(allEnhancerGenes[distList.index(min(distList))])
-	    #print line
-	    #print len(startDict[allEnhancerGenes[distList.index(min(distList))]])
+	        #print len(allEnhancerGenes[distList.index(min(distList))])
+	        #print line
+	        #print len(startDict[allEnhancerGenes[distList.index(min(distList))]])
             closestGene = startDict[allEnhancerGenes[distList.index(min(distList))]]['name']
 
         #NOW WRITE THE ROW FOR THE ENHANCER TABLE
         newEnhancerLine = line[0:6]
         if byRefseq:
-            newEnhancerLine.append(join(ROSE_utils.uniquify([x for x in overlappingGenes]),','))
-            newEnhancerLine.append(join(ROSE_utils.uniquify([x for x in proximalGenes]),','))
+            newEnhancerLine.append(','.join(ROSE_utils.uniquify([x for x in overlappingGenes])))
+            newEnhancerLine.append(','.join(ROSE_utils.uniquify([x for x in proximalGenes])))
             #print newEnhancerLine
             #print len(allEnhancerGenes)
             #print distList
             closestGene = allEnhancerGenes[distList.index(min(distList))]
             newEnhancerLine.append(closestGene)
         else:
-            newEnhancerLine.append(join(ROSE_utils.uniquify([startDict[x]['name'] for x in overlappingGenes]),','))
-            newEnhancerLine.append(join(ROSE_utils.uniquify([startDict[x]['name'] for x in proximalGenes]),','))
+            newEnhancerLine.append(','.join(ROSE_utils.uniquify([startDict[x]['name'] for x in overlappingGenes])))
+            newEnhancerLine.append(','.join(ROSE_utils.uniquify([startDict[x]['name'] for x in proximalGenes])))
             closestGene = startDict[allEnhancerGenes[distList.index(min(distList))]]['name']
             newEnhancerLine.append(closestGene)
 
@@ -187,7 +184,7 @@ def mapEnhancerToGene(annotFile,enhancerFile,transcribedFile='',uniqueGenes=True
         proxEnhancers = geneDict['proximal'][refID] + geneDict['overlapping'][refID]
         
     
-        newLine = [geneName,refID,join(proxEnhancers,',')]
+        newLine = [geneName,refID,','.join(proxEnhancers)]
         geneToEnhancerTable.append(newLine)
 
     #re-sort enhancerToGeneTable
@@ -245,12 +242,12 @@ def main():
     if options.out:
         outFolder = ROSE_utils.formatFolder(options.out,True)
     else:
-        outFolder = join(enhancerFile.split('/')[0:-1],'/') + '/'
+        outFolder = '/'.join(enhancerFile.split('/')[0:-1]) + '/'
 
 
     #GETTING THE GENOME
     genome = options.genome
-    print('USING %s AS THE GENOME' % genome)
+    print(('USING %s AS THE GENOME' % genome))
 
 
     #GETTING THE CORRECT ANNOT FILE
@@ -263,7 +260,7 @@ def main():
         'MM10':'%s/annotation/mm10_refseq.ucsc' % (cwd),
         }
 
-    annotFile = genomeDict[upper(genome)]
+    annotFile = genomeDict[genome.upper()]
 
     #GETTING THE TRANSCRIBED LIST
     if options.geneList:
diff --git a/bin/ROSE_main.py b/bin/ROSE_main.py
index b0f706b..7d95ae2 100755
--- a/bin/ROSE_main.py
+++ b/bin/ROSE_main.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 
 #mapEnhancerFromFactor.py
 '''
@@ -19,7 +19,7 @@
 
 import os
 
-from string import upper,join
+import string
 
 from collections import defaultdict
 
@@ -46,7 +46,7 @@ def regionStitching(inputGFF,stitchWindow,tssWindow,annotFile,removeTSS=True):
         #this loop makes a locus centered around +/- tssWindow of transcribed genes
         #then adds it to the list tssLoci
         tssLoci = []
-        for geneID in startDict.keys():
+        for geneID in list(startDict.keys()):
             tssLoci.append(ROSE_utils.makeTSSLocus(geneID,startDict,tssWindow,tssWindow))
 
 
@@ -67,7 +67,7 @@ def regionStitching(inputGFF,stitchWindow,tssWindow,annotFile,removeTSS=True):
                 boundCollection.remove(locus)
                 debugOutput.append([locus.__str__(),locus.ID(),'CONTAINED'])
                 removeTicker+=1
-        print('REMOVED %s LOCI BECAUSE THEY WERE CONTAINED BY A TSS' % (removeTicker))
+        print(('REMOVED %s LOCI BECAUSE THEY WERE CONTAINED BY A TSS' % (removeTicker)))
 
     #boundCollection is now all enriched region loci that don't overlap an active TSS
     stitchedCollection = boundCollection.stitchCollection(stitchWindow,'both')
@@ -77,7 +77,7 @@ def regionStitching(inputGFF,stitchWindow,tssWindow,annotFile,removeTSS=True):
         #with the original loci that were there
         fixedLoci = []
         tssLoci = []
-        for geneID in startDict.keys():
+        for geneID in list(startDict.keys()):
             tssLoci.append(ROSE_utils.makeTSSLocus(geneID,startDict,50,50))
 
 
@@ -101,8 +101,8 @@ def regionStitching(inputGFF,stitchWindow,tssWindow,annotFile,removeTSS=True):
             else:
                 fixedLoci.append(stitchedLocus)
 
-        print('REMOVED %s STITCHED LOCI BECAUSE THEY OVERLAPPED MULTIPLE TSSs' % (removeTicker))
-        print('ADDED BACK %s ORIGINAL LOCI' % (originalTicker))
+        print(('REMOVED %s STITCHED LOCI BECAUSE THEY OVERLAPPED MULTIPLE TSSs' % (removeTicker)))
+        print(('ADDED BACK %s ORIGINAL LOCI' % (originalTicker)))
         fixedCollection = ROSE_utils.LocusCollection(fixedLoci,50)
         return fixedCollection,debugOutput
     else:
@@ -163,16 +163,16 @@ def mapCollection(stitchedCollection,referenceCollection,bamFileList,mappedFolde
         
         bamFileName = bamFile.split('/')[-1]
 
-        print('GETTING MAPPING DATA FOR  %s' % bamFile)
+        print(('GETTING MAPPING DATA FOR  %s' % bamFile))
         #assumes standard convention for naming enriched region gffs
         
         #opening up the mapped GFF
-        print('OPENING %s%s_%s_MAPPED.gff' % (mappedFolder,refName,bamFileName))
+        print(('OPENING %s%s_%s_MAPPED.gff' % (mappedFolder,refName,bamFileName)))
 
         mappedGFF =ROSE_utils.parseTable('%s%s_%s_MAPPED.gff' % (mappedFolder,refName,bamFileName),'\t')        
 
         signalDict = defaultdict(float)
-        print('MAKING SIGNAL DICT FOR %s' % (bamFile))
+        print(('MAKING SIGNAL DICT FOR %s' % (bamFile)))
         mappedLoci = []
         for line in mappedGFF[1:]:
 
@@ -268,13 +268,13 @@ def main():
         ROSE_utils.bedToGFF(options.input,inputGFFFile)
     elif options.input.split('.')[-1] =='gff':
         #COPY THE INPUT GFF TO THE GFF FOLDER
-	inputGFFFile = options.input
+        inputGFFFile = options.input
         os.system('cp %s %s' % (inputGFFFile,gffFolder))        
 
     else:
         print('WARNING: INPUT FILE DOES NOT END IN .gff or .bed. ASSUMING .gff FILE FORMAT')
         #COPY THE INPUT GFF TO THE GFF FOLDER
-	inputGFFFile = options.input
+        inputGFFFile = options.input
         os.system('cp %s %s' % (inputGFFFile,gffFolder))        
 
 
@@ -301,13 +301,13 @@ def main():
         removeTSS = False
 
     #GETTING THE BOUND REGION FILE USED TO DEFINE ENHANCERS
-    print('USING %s AS THE INPUT GFF' % (inputGFFFile))
+    print(('USING %s AS THE INPUT GFF' % (inputGFFFile)))
     inputName = inputGFFFile.split('/')[-1].split('.')[0]
 
 
     #GETTING THE GENOME
     genome = options.genome
-    print('USING %s AS THE GENOME' % genome)
+    print(('USING %s AS THE GENOME' % genome))
 
 
     #GETTING THE CORRECT ANNOT FILE
@@ -320,7 +320,7 @@ def main():
         'MM10':'%s/annotation/mm10_refseq.ucsc' % (cwd),
         }
 
-    annotFile = genomeDict[upper(genome)]
+    annotFile = genomeDict[genome.upper()]
 
     #MAKING THE START DICT
     print('MAKING START DICT')
@@ -354,11 +354,11 @@ def main():
     #WRITING DEBUG OUTPUT TO DISK
         
     if debug:
-        print('WRITING DEBUG OUTPUT TO DISK AS %s' % (debugOutFile))
+        print(('WRITING DEBUG OUTPUT TO DISK AS %s' % (debugOutFile)))
         ROSE_utils.unParseTable(debugOutput,debugOutFile,'\t')
 
     #WRITE THE GFF TO DISK
-    print('WRITING STITCHED GFF TO DISK AS %s' % (stitchedGFFFile))
+    print(('WRITING STITCHED GFF TO DISK AS %s' % (stitchedGFFFile)))
     ROSE_utils.unParseTable(stitchedGFF,stitchedGFFFile,'\t')
 
 
@@ -366,7 +366,7 @@ def main():
     #SETTING UP THE OVERALL OUTPUT FILE
     outputFile1 = outFolder + stitchedGFFName + '_ENHANCER_REGION_MAP.txt'
 
-    print('OUTPUT WILL BE WRITTEN TO  %s' % (outputFile1))
+    print(('OUTPUT WILL BE WRITTEN TO  %s' % (outputFile1)))
     
     #MAPPING TO THE NON STITCHED (ORIGINAL GFF)
     #MAPPING TO THE STITCHED GFF
@@ -413,7 +413,7 @@ def main():
         '''
         outputDone = True
         if ticker%6 == 0:
-            print(ticker*5)
+            print((ticker*5))
         ticker +=1
         #CHANGE THIS PARAMETER TO ALLOW MORE TIME TO MAP
         if ticker == 144:
@@ -442,7 +442,7 @@ def main():
         if outputDone == True:
             break
         time.sleep(300)
-    print('MAPPING TOOK %s MINUTES' % (ticker*5))
+    print(('MAPPING TOOK %s MINUTES' % (ticker*5)))
 
     print('BAM MAPPING COMPLETED NOW MAPPING DATA TO REGIONS')
     #CALCULATE DENSITY BY REGION
diff --git a/lib/ROSE_utils.py b/lib/ROSE_utils.py
index f43f6be..0f146b2 100755
--- a/lib/ROSE_utils.py
+++ b/lib/ROSE_utils.py
@@ -29,7 +29,7 @@ def unParseTable(table, output, sep):
     else:
         for line in table:
             line = [str(x) for x in line]
-            line = join(line,sep)
+            line = sep.join(line)
 
             fh_out.write(line)
             fh_out.write('\n')
@@ -107,7 +107,7 @@ def formatFolder(folderName,create=False):
         foo = os.listdir(folderName)
         return folderName
     except OSError:
-        print('folder %s does not exist' % (folderName))
+        print(('folder %s does not exist' % (folderName)))
         if create:
             os.system('mkdir %s' % (folderName))
             return folderName
@@ -132,13 +132,13 @@ def makeStartDict(annotFile,geneList = []):
         geneList = parseTable(geneList,'\t')
         geneList = [line[0] for line in geneList]
             
-    if upper(annotFile).count('REFSEQ') == 1:
+    if annotFile.upper().count('REFSEQ') == 1:
         refseqTable,refseqDict = importRefseq(annotFile)
         if len(geneList) == 0:
-            geneList = refseqDict.keys()
+            geneList = list(refseqDict.keys())
         startDict = {}
         for gene in geneList:
-            if refseqDict.has_key(gene) == False:
+            if (gene in refseqDict) == False:
                 continue
             startDict[gene]={}
             startDict[gene]['sense'] = refseqTable[refseqDict[gene][0]][3]
@@ -165,7 +165,7 @@ def getTSSs(geneList,refseqTable,refseqDict):
             TSS.append(line[4])
         if line[3] == '-':
             TSS.append(line[5])
-    TSS = map(int,TSS)
+    TSS = list(map(int,TSS))
     
     return TSS
 
@@ -176,7 +176,7 @@ def getTSSs(geneList,refseqTable,refseqDict):
 def refseqFromKey(refseqKeyList,refseqDict,refseqTable):
     typeRefseq = []
     for name in refseqKeyList:
-        if refseqDict.has_key(name):
+        if name in refseqDict:
             typeRefseq.append(refseqTable[refseqDict[name][0]])
     return typeRefseq
 
@@ -195,7 +195,7 @@ def importRefseq(refseqFile, returnMultiples = False):
     refseqDict = {}
     ticker = 1
     for line in refseqTable[1:]:
-        if refseqDict.has_key(line[1]):
+        if line[1] in refseqDict:
             refseqDict[line[1]].append(ticker)
         else:
             refseqDict[line[1]] = [ticker]
@@ -226,14 +226,14 @@ class Locus:
     __senseDict = {'+':'+', '-':'-', '.':'.'}
     # chr = chromosome name (string)
     # sense = '+' or '-' (or '.' for an ambidexterous locus)
-    # start,end = ints of the start and end coords of the locus;
+    # start,end = ints of the start and end coords of the locus
     #      end coord is the coord of the last nucleotide.
     def __init__(self,chr,start,end,sense,ID=''):
         coords = [int(start),int(end)]
         coords.sort()
         # this method for assigning chromosome should help avoid storage of
         # redundant strings.
-        if not(self.__chrDict.has_key(chr)): self.__chrDict[chr] = chr
+        if not(chr in self.__chrDict): self.__chrDict[chr] = chr
         self._chr = self.__chrDict[chr]
         self._sense = self.__senseDict[sense]
         self._start = int(coords[0])
@@ -299,20 +299,20 @@ def __init__(self,loci,windowSize):
         for lcs in loci: self.__addLocus(lcs)
 
     def __addLocus(self,lcs):
-        if not(self.__loci.has_key(lcs)):
+        if not(lcs in self.__loci):
             self.__loci[lcs] = None
             if lcs.sense()=='.': chrKeyList = [lcs.chr()+'+', lcs.chr()+'-']
             else: chrKeyList = [lcs.chr()+lcs.sense()]
             for chrKey in chrKeyList:
-                if not(self.__chrToCoordToLoci.has_key(chrKey)): self.__chrToCoordToLoci[chrKey] = dict()
+                if not(chrKey in self.__chrToCoordToLoci): self.__chrToCoordToLoci[chrKey] = dict()
                 for n in self.__getKeyRange(lcs):
-                    if not(self.__chrToCoordToLoci[chrKey].has_key(n)): self.__chrToCoordToLoci[chrKey][n] = []
+                    if not(n in self.__chrToCoordToLoci[chrKey]): self.__chrToCoordToLoci[chrKey][n] = []
                     self.__chrToCoordToLoci[chrKey][n].append(lcs)
 
     def __getKeyRange(self,locus):
-        start = locus.start() / self.__winSize
-        end = locus.end() / self.__winSize + 1 ## add 1 because of the range
-        return range(start,end)
+        start = locus.start() // self.__winSize
+        end = locus.end() // self.__winSize + 1 ## add 1 because of the range
+        return range(start, end)
 
     def __len__(self): return len(self.__loci)
         
@@ -320,9 +320,9 @@ def append(self,new): self.__addLocus(new)
     def extend(self,newList):
         for lcs in newList: self.__addLocus(lcs)
     def hasLocus(self,locus):
-        return self.__loci.has_key(locus)
+        return locus in self.__loci
     def remove(self,old):
-        if not(self.__loci.has_key(old)): raise ValueError("requested locus isn't in collection")
+        if not(old in self.__loci): raise ValueError("requested locus isn't in collection")
         del self.__loci[old]
         if old.sense()=='.': senseList = ['+','-']
         else: senseList = [old.sense()]
@@ -331,13 +331,13 @@ def remove(self,old):
                 self.__chrToCoordToLoci[old.chr()+sense][k].remove(old)
 
     def getWindowSize(self): return self.__winSize
-    def getLoci(self): return self.__loci.keys()
+    def getLoci(self): return list(self.__loci.keys())
     def getChrList(self):
         # i need to remove the strand info from the chromosome keys and make
         # them non-redundant.
         tempKeys = dict()
-        for k in self.__chrToCoordToLoci.keys(): tempKeys[k[:-1]] = None
-        return tempKeys.keys()
+        for k in list(self.__chrToCoordToLoci.keys()): tempKeys[k[:-1]] = None
+        return list(tempKeys.keys())
             
     def __subsetHelper(self,locus,sense):
         sense = sense.lower()
@@ -351,12 +351,12 @@ def __subsetHelper(self,locus,sense):
         else: raise ValueError("sense value was inappropriate: '"+sense+"'.")
         for s in filter(lamb, senses):
             chrKey = locus.chr()+s
-            if self.__chrToCoordToLoci.has_key(chrKey):
+            if chrKey in self.__chrToCoordToLoci:
                 for n in self.__getKeyRange(locus):
-                    if self.__chrToCoordToLoci[chrKey].has_key(n):
+                    if n in self.__chrToCoordToLoci[chrKey]:
                         for lcs in self.__chrToCoordToLoci[chrKey][n]:
                             matches[lcs] = None
-        return matches.keys()
+        return list(matches.keys())
         
     # sense can be 'sense' (default), 'antisense', or 'both'
     # returns all members of the collection that overlap the locus
@@ -365,12 +365,12 @@ def getOverlap(self,locus,sense='sense'):
         ### now, get rid of the ones that don't really overlap
         realMatches = dict()
         if sense=='sense' or sense=='both':
-            for i in filter(lambda lcs: lcs.overlaps(locus), matches):
+            for i in [lcs for lcs in matches if lcs.overlaps(locus)]:
                 realMatches[i] = None
         if sense=='antisense' or sense=='both':
-            for i in filter(lambda lcs: lcs.overlapsAntisense(locus), matches):
+            for i in [lcs for lcs in matches if lcs.overlapsAntisense(locus)]:
                 realMatches[i] = None 
-        return realMatches.keys()
+        return list(realMatches.keys())
 
     # sense can be 'sense' (default), 'antisense', or 'both'
     # returns all members of the collection that are contained by the locus
@@ -379,12 +379,12 @@ def getContained(self,locus,sense='sense'):
         ### now, get rid of the ones that don't really overlap
         realMatches = dict()
         if sense=='sense' or sense=='both':
-            for i in filter(lambda lcs: locus.contains(lcs), matches):
+            for i in [lcs for lcs in matches if locus.contains(lcs)]:
                 realMatches[i] = None
         if sense=='antisense' or sense=='both':
-            for i in filter(lambda lcs: locus.containsAntisense(lcs), matches):
+            for i in [lcs for lcs in matches if locus.containsAntisense(lcs)]:
                 realMatches[i] = None
-        return realMatches.keys()
+        return list(realMatches.keys())
 
     # sense can be 'sense' (default), 'antisense', or 'both'
     # returns all members of the collection that contain the locus
@@ -393,12 +393,12 @@ def getContainers(self,locus,sense='sense'):
         ### now, get rid of the ones that don't really overlap
         realMatches = dict()
         if sense=='sense' or sense=='both':
-            for i in filter(lambda lcs: lcs.contains(locus), matches):
+            for i in [lcs for lcs in matches if lcs.contains(locus)]:
                 realMatches[i] = None
         if sense=='antisense' or sense=='both':
-            for i in filter(lambda lcs: lcs.containsAntisense(locus), matches):
+            for i in [lcs for lcs in matches if lcs.containsAntisense(locus)]:
                 realMatches[i] = None
-        return realMatches.keys()
+        return list(realMatches.keys())
 
     def stitchCollection(self,stitchWindow=1,sense='both'):
 
@@ -495,12 +495,12 @@ def makeTranscriptCollection(annotFile,upSearch,downSearch,window = 500,geneList
     takes in a refseqfile
     '''
 
-    if upper(annotFile).count('REFSEQ') == 1:
+    if annotFile.upper().count('REFSEQ') == 1:
         refseqTable,refseqDict = importRefseq(annotFile)
         locusList = []
         ticker = 0
         if len(geneList) == 0:
-            geneList =refseqDict.keys()
+            geneList =list(refseqDict.keys())
         for line in refseqTable[1:]:
             if geneList.count(line[1]) > 0:
                 if line[3] == '-':
@@ -558,27 +558,26 @@ def checkChrStatus(bamFile):
     stats.stdout.close()
     chrPattern = re.compile('chr')
     for line in statLines:
-      #print line
-      sline = line.split("\t")
-      #print sline[2]
-      if re.search(chrPattern, sline[2]):
-	return 1
-      else:
-	return 0
-	    
+        #print line
+        line = line.decode("utf-8")
+        sline = line.split("\t")
+        #print sline[2]
+        if re.search(chrPattern, sline[2]):
+            return 1
+        else:
+            return 0
+
 def convertBitwiseFlag(flag):
-   if int(flag) & 16:
-	return "-";
-   else:
-	return "+";
+    if int(flag) & 16:
+        return "-"
+    else:
+        return "+"
            
 class Bam:
     '''A class for a sorted and indexed bam file that allows easy analysis of reads'''
     def __init__(self,bamFile):
         self._bam = bamFile
-        
 
-	  
     def getTotalReads(self,readType = 'mapped'):
         command = 'samtools flagstat %s' % (self._bam)
         stats = subprocess.Popen(command,stdin = subprocess.PIPE,stderr = subprocess.PIPE,stdout = subprocess.PIPE,shell = True)
@@ -586,6 +585,7 @@ def getTotalReads(self,readType = 'mapped'):
         stats.stdout.close()
         if readType == 'mapped':
             for line in statLines:
+                line = line.decode("utf-8")
                 if line.count('mapped (') == 1:
                     
                     return int(line.split(' ')[0])
@@ -593,10 +593,10 @@ def getTotalReads(self,readType = 'mapped'):
             return int(statLines[0].split(' ')[0])
     
     def convertBitwiseFlag(self,flag):
-      if flag & 16:
-	return "-";
-      else:
-	return "+";
+        if flag & 16:
+            return "-"
+        else:
+            return "+"
 
     def getRawReads(self,locus,sense,unique = False,includeJxnReads = False,printCommand = False):
         '''
@@ -610,10 +610,11 @@ def getRawReads(self,locus,sense,unique = False,includeJxnReads = False,printCom
             print(command)
         getReads = subprocess.Popen(command,stdin = subprocess.PIPE,stderr = subprocess.PIPE,stdout = subprocess.PIPE,shell = True)
         reads = getReads.communicate()
-        reads = reads[0].split('\n')[:-1]
+        reads = reads[0].decode("utf-8")
+        reads = reads.split('\n')[:-1]
         reads = [read.split('\t') for read in reads]
         if includeJxnReads == False:
-            reads = filter(lambda x: x[5].count('N') < 1,reads)
+            reads = [x for x in reads if x[5].count('N') < 1]
 
         #convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-'}
         convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-','256':'+','272':'-','99':'+','147':'-'}
@@ -632,7 +633,7 @@ def getRawReads(self,locus,sense,unique = False,includeJxnReads = False,printCom
             strand = locus.sense()
         for read in reads:
             #readStrand = read[1].translate(convert)[0]
-	    #print read[1], read[0]
+            #print read[1], read[0]
             #readStrand = convertDict[read[1]]
             readStrand = convertBitwiseFlag(read[1])
 
@@ -657,7 +658,7 @@ def readsToLoci(self,reads,IDtag = 'sequence,seqID,none'):
             return
         #convert = string.maketrans('160','--+')
         #convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-'}
-	#convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-','256':'+','272':'-'}
+        #convertDict = {'16':'-','0':'+','64':'+','65':'+','80':'-','81':'-','129':'+','145':'-','256':'+','272':'-'}
         
         #BJA added 256 and 272, which correspond to 0 and 16 for multi-mapped reads respectively:
         #http://onetipperday.blogspot.com/2012/04/understand-flag-code-of-sam-format.html
@@ -682,7 +683,7 @@ def readsToLoci(self,reads,IDtag = 'sequence,seqID,none'):
                 #then it filters out the '' and converts them to integers
                 #only works for reads that span one junction
                 
-                [first,gap,second] = [int(x) for x in filter(lambda x: len(x) > 0, re.findall(numPattern,read[5]))][0:3]
+                [first,gap,second] = [int(x) for x in [x for x in re.findall(numPattern,read[5]) if len(x) > 0]][0:3]
                 if IDtag == 'sequence':
                     loci.append(Locus(chrom,start,start+first,strand,ID[0:first]))
                     loci.append(Locus(chrom,start+first+gap,start+first+gap+second,strand,ID[first:]))
@@ -771,7 +772,7 @@ def order(x, NoneIsLast = True, decreasing = False):
         omitNone = True
         
     n  = len(x)
-    ix = range(n)
+    ix = list(range(n))
     if None not in x:
         ix.sort(reverse = decreasing, key = lambda j : x[j])
     else:
@@ -783,7 +784,7 @@ def key(i, x = x):
                 return not(elem is None), elem
             else:
                 return elem is None, elem
-        ix = range(n)
+        ix = list(range(n))
         ix.sort(key=key, reverse=decreasing)
             
     if omitNone: