Skip to content

Commit 2e7608c

Browse files
committed
Merge branch 'master' of https://github.com/mnshgl0110/hometools
2 parents d74d2cd + 935a84e commit 2e7608c

File tree

1 file changed

+59
-6
lines changed

1 file changed

+59
-6
lines changed

hometools/hometools.py

+59-6
Original file line numberDiff line numberDiff line change
@@ -1773,6 +1773,46 @@ def pbamrc(args):
17731773
# END
17741774

17751775

1776+
def bamrc2af(args):
1777+
"""
1778+
Reads the output of pbamrc and a corresponding VCF file and returns the allele frequencies of the alt alleles.
1779+
Currently, working for SNPs only
1780+
"""
1781+
from gzip import open as gzopen
1782+
logger = mylogger("bamrc2af")
1783+
rcfin = args.bamrc.name
1784+
vcffin = args.vcf.name
1785+
outfin = 'bamrc_af.txt' if args.out is None else args.out.name
1786+
1787+
logger.info('Reading VCF')
1788+
posdict = dict()
1789+
op = gzopen if isgzip(vcffin) else open
1790+
with op(vcffin, 'r') as vcf:
1791+
for line in vcf:
1792+
# break
1793+
if line[0] == 35: continue
1794+
line = line.decode()
1795+
line = line.strip().split()
1796+
if line[4].upper() not in 'ACGT' : continue
1797+
posdict[tuple(line[:2])] = line[3], line[4]
1798+
1799+
logger.info('Reading bamrc')
1800+
# Get AF from bam readcount
1801+
basedict = dict(zip('ACGT', range(4, 8)))
1802+
with open(rcfin, 'r') as rc, open(outfin, 'w') as out:
1803+
for line in rc:
1804+
line = line.strip().split()
1805+
try:
1806+
ref, alt = posdict[(line[0], line[1])]
1807+
except KeyError:
1808+
logger.warning(f'Position {line[0]}:{line[1]} not found in VCF. Skipping it.')
1809+
refi = basedict[ref]
1810+
alti = basedict[alt]
1811+
out.write(f'{line[0]}\t{line[1]}\t{ref}\t{alt}\t{round(int(line[refi])/int(line[3]) , 2)}\t{round(int(line[alti])/int(line[3]), 2)}\n')
1812+
logger.info('Finishe')
1813+
# END
1814+
1815+
17761816
def run_ppileup(locs, out, bam, pars):
17771817
from subprocess import Popen, PIPE
17781818
with open(out, 'w') as fout:
@@ -2569,22 +2609,29 @@ def main(cmd):
25692609
# <editor-fold desc="BAM Commands">
25702610
parser_bamcov = subparsers.add_parser("bamcov", help="BAM: Get mean read-depth for chromosomes from a BAM file", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25712611
parser_pbamrc = subparsers.add_parser("pbamrc", help="BAM: Run bam-readcount in a parallel manner by dividing the input bed file.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2612+
parser_bamrc2af = subparsers.add_parser("bamrc2af", help="BAM: Reads the output of pbamrc and a corresponding VCF file and saves the allele frequencies of the ref/alt alleles.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25722613
parser_splitbam = subparsers.add_parser("splitbam", help="BAM: Split a BAM files based on TAG value. BAM file must be sorted using the TAG.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25732614
parser_mapbp = subparsers.add_parser("mapbp", help="BAM: For a given reference coordinate get the corresponding base and position in the reads/segments mapping the reference position", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25742615
parser_bam2coords = subparsers.add_parser("bam2coords", help="BAM: Convert BAM/SAM file to alignment coords", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25752616
parser_ppileup = subparsers.add_parser("ppileup", help="BAM: Currently it is slower than just running mpileup on 1 CPU. Might be possible to optimize later. Run samtools mpileup in parallel when pileup is required for specific positions by dividing the input bed file.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25762617
# </editor-fold>
25772618

2619+
# <editor-fold desc="syri CLI">
2620+
parser_runsyri = subparsers.add_parser("runsyri", help=hyellow("syri: Parser to align and run syri on two genomes"),
2621+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2622+
parser_syriidx = subparsers.add_parser("syriidx", help=hyellow(
2623+
"syri: Generates index for syri.out. Filters non-SR annotations, then bgzip, then tabix index"),
2624+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2625+
parser_syri2bed = subparsers.add_parser("syri2bed", help=hyellow("syri: Converts syri output to bedpe format"),
2626+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2627+
# </editor-fold>
25782628

2579-
## syri
2580-
parser_runsyri = subparsers.add_parser("runsyri", help=hyellow("syri: Parser to align and run syri on two genomes"), formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2581-
parser_syriidx = subparsers.add_parser("syriidx", help=hyellow("syri: Generates index for syri.out. Filters non-SR annotations, then bgzip, then tabix index"), formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2582-
parser_syri2bed = subparsers.add_parser("syri2bed", help=hyellow("syri: Converts syri output to bedpe format"), formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2583-
2584-
## Plotting
2629+
# <editor-fold desc="Plotting">
25852630
parser_plthist = subparsers.add_parser("plthist", help="Plot: Takes frequency output (like from uniq -c) and generates a histogram plot", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25862631
parser_plotal = subparsers.add_parser("plotal", help="Plot: Visualise pairwise-whole genome alignments between multiple genomes", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25872632
parser_plotbar = subparsers.add_parser("pltbar", help="Plot: Generate barplot. Input: a two column file with first column as features and second column as values", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2633+
# </editor-fold>
2634+
25882635

25892636
## Assembly graphs
25902637
parser_asmreads = subparsers.add_parser("asmreads", help=hyellow("GFA: For a given genomic region, get reads that constitute the corresponding assembly graph"), formatter_class=argparse.ArgumentDefaultsHelpFormatter)
@@ -2607,6 +2654,12 @@ def main(cmd):
26072654
parser.print_help()
26082655
sys.exit()
26092656

2657+
# bamrc2af
2658+
parser_bamrc2af.set_defaults(func=bamrc2af)
2659+
parser_bamrc2af.add_argument("bamrc", help="BAM readcount file generated using bamrc", type=argparse.FileType('r'))
2660+
parser_bamrc2af.add_argument("vcf", help="VCF file", type=argparse.FileType('r'))
2661+
parser_bamrc2af.add_argument("out", help="Output file", type=argparse.FileType('w'))
2662+
26102663
# xls2csv
26112664
parser_xls2tsv.set_defaults(func=xls2csv)
26122665
parser_xls2tsv.add_argument("xls", help="Input excel file", type=argparse.FileType('r'))

0 commit comments

Comments
 (0)