Skip to content

Commit 29f6379

Browse files
committed
Added bamrc2af function code
1 parent feb6e69 commit 29f6379

File tree

1 file changed

+50
-4
lines changed

1 file changed

+50
-4
lines changed

hometools/hometools.py

+50-4
Original file line numberDiff line numberDiff line change
@@ -1774,6 +1774,46 @@ def pbamrc(args):
17741774

17751775

17761776
def bamrc2af(args):
1777+
"""
1778+
Reads the output of pbamrc and a corresponding VCF file and returns the allele frequencies of the alt alleles.
1779+
Currently, working for only SNPs
1780+
"""
1781+
from gzip import open as gzopen
1782+
logger = mylogger("bamrc2af")
1783+
rcfin = args.bamrc.name
1784+
vcffin = args.vcf.name
1785+
outfin = 'bamrc_af.txt' if args.out is None else args.out.name
1786+
1787+
## Get alt SNP lists
1788+
rcfin = "C:\\Users\\ra98jam\\potato_hap_example\\dm_all_sample_snp_Otava_genotype.bamrc"
1789+
vcffin = "C:\\Users\\ra98jam\\potato_hap_example\\dm_all_sample_chr2.merged.vcf.gz"
1790+
outfin = "C:\\Users\\ra98jam\\potato_hap_example\\tmp.txt"
1791+
1792+
posdict = dict()
1793+
op = gzopen if isgzip(vcffin) else open
1794+
with op(vcffin, 'r') as vcf:
1795+
for line in vcf:
1796+
# break
1797+
if line[0] == 35: continue
1798+
line = line.decode()
1799+
line = line.strip().split()
1800+
if line[4].upper() not in 'ACGT' : continue
1801+
posdict[tuple(line[:2])] = line[3], line[4]
1802+
1803+
# Get AF from bam readcount
1804+
basedict = dict(zip('ACGT', range(4, 8)))
1805+
with open(rcfin, 'r') as rc, open(outfin, 'w') as out:
1806+
for line in rc:
1807+
line = line.strip().split()
1808+
try:
1809+
ref, alt = posdict[(line[0], line[1])]
1810+
except KeyError:
1811+
logger.warning(f'Position {line[0]}:{line[1]} not found in VCF. Skipping it.')
1812+
refi = basedict[ref]
1813+
alti = basedict[alt]
1814+
out.write(f'{line[0]}\t{line[1]}\t{ref}\t{alt}\t{round(int(line[refi])/int(line[3]) , 2)}\t{round(int(line[alti])/int(line[3]), 2)}\n')
1815+
1816+
17771817

17781818
# END
17791819

@@ -2575,17 +2615,23 @@ def main(cmd):
25752615
# <editor-fold desc="BAM Commands">
25762616
parser_bamcov = subparsers.add_parser("bamcov", help="BAM: Get mean read-depth for chromosomes from a BAM file", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25772617
parser_pbamrc = subparsers.add_parser("pbamrc", help="BAM: Run bam-readcount in a parallel manner by dividing the input bed file.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2618+
parser_bamrc2af = subparsers.add_parser("bamrc2af", help="BAM: Reads the output of pbamrc and a corresponding VCF file and saves the allele frequencies of the ref/alt alleles.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25782619
parser_splitbam = subparsers.add_parser("splitbam", help="BAM: Split a BAM files based on TAG value. BAM file must be sorted using the TAG.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25792620
parser_mapbp = subparsers.add_parser("mapbp", help="BAM: For a given reference coordinate get the corresponding base and position in the reads/segments mapping the reference position", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25802621
parser_bam2coords = subparsers.add_parser("bam2coords", help="BAM: Convert BAM/SAM file to alignment coords", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25812622
parser_ppileup = subparsers.add_parser("ppileup", help="BAM: Currently it is slower than just running mpileup on 1 CPU. Might be possible to optimize later. Run samtools mpileup in parallel when pileup is required for specific positions by dividing the input bed file.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25822623
# </editor-fold>
25832624

2625+
# <editor-fold desc="syri">
2626+
parser_runsyri = subparsers.add_parser("runsyri", help=hyellow("syri: Parser to align and run syri on two genomes"),
2627+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2628+
parser_syriidx = subparsers.add_parser("syriidx", help=hyellow(
2629+
"syri: Generates index for syri.out. Filters non-SR annotations, then bgzip, then tabix index"),
2630+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2631+
parser_syri2bed = subparsers.add_parser("syri2bed", help=hyellow("syri: Converts syri output to bedpe format"),
2632+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2633+
# </editor-fold>
25842634

2585-
## syri
2586-
parser_runsyri = subparsers.add_parser("runsyri", help=hyellow("syri: Parser to align and run syri on two genomes"), formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2587-
parser_syriidx = subparsers.add_parser("syriidx", help=hyellow("syri: Generates index for syri.out. Filters non-SR annotations, then bgzip, then tabix index"), formatter_class=argparse.ArgumentDefaultsHelpFormatter)
2588-
parser_syri2bed = subparsers.add_parser("syri2bed", help=hyellow("syri: Converts syri output to bedpe format"), formatter_class=argparse.ArgumentDefaultsHelpFormatter)
25892635

25902636
## Plotting
25912637
parser_plthist = subparsers.add_parser("plthist", help="Plot: Takes frequency output (like from uniq -c) and generates a histogram plot", formatter_class=argparse.ArgumentDefaultsHelpFormatter)

0 commit comments

Comments
 (0)