-
Notifications
You must be signed in to change notification settings - Fork 119
/
Copy pathencode_task_post_call_peak_chip.py
executable file
·107 lines (87 loc) · 3.73 KB
/
encode_task_post_call_peak_chip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python
# Author: Jin Lee ([email protected])
import sys
import os
import argparse
from encode_lib_common import (
assert_file_not_empty,
log,
ls_l,
mkdir_p,
)
from encode_lib_genomic import (
peak_to_bigbed,
peak_to_hammock,
get_region_size_metrics,
get_num_peaks,
peak_to_starch,
)
from encode_lib_blacklist_filter import blacklist_filter
from encode_lib_frip import frip_shifted
def parse_arguments():
parser = argparse.ArgumentParser(prog='ENCODE post_call_peak (chip)',
description='')
parser.add_argument('peak', type=str,
help='Path for PEAK file. Peak filename should be "*.*Peak.gz". '
'e.g. rep1.narrowPeak.gz')
parser.add_argument('--ta', type=str,
help='TAG-ALIGN file.')
parser.add_argument('--peak-type', type=str, required=True,
choices=['narrowPeak', 'regionPeak',
'broadPeak', 'gappedPeak'],
help='Peak file type.')
parser.add_argument('--fraglen', type=int, required=True,
help='Fragment length.')
parser.add_argument('--chrsz', type=str,
help='2-col chromosome sizes file.')
parser.add_argument('--blacklist', type=str,
help='Blacklist BED file.')
parser.add_argument('--regex-bfilt-peak-chr-name',
help='Keep chromosomes matching this pattern only '
'in .bfilt. peak files.')
parser.add_argument('--mem-gb', type=float, default=4.0,
help='Max. memory for this job in GB. '
'This will be used to determine GNU sort -S (defaulting to 0.5 of this value). '
'It should be total memory for this task (not memory per thread).')
parser.add_argument('--out-dir', default='', type=str,
help='Output directory.')
parser.add_argument('--log-level', default='INFO',
choices=['NOTSET', 'DEBUG', 'INFO',
'WARNING', 'CRITICAL', 'ERROR',
'CRITICAL'],
help='Log level')
args = parser.parse_args()
if args.blacklist is None or args.blacklist.endswith('null'):
args.blacklist = ''
log.setLevel(args.log_level)
log.info(sys.argv)
return args
def main():
# read params
args = parse_arguments()
log.info('Initializing and making output directory...')
mkdir_p(args.out_dir)
log.info('Blacklist-filtering peaks...')
bfilt_peak = blacklist_filter(
args.peak, args.blacklist, args.regex_bfilt_peak_chr_name, args.out_dir)
log.info('Checking if output is empty...')
assert_file_not_empty(bfilt_peak)
log.info('Converting peak to bigbed...')
peak_to_bigbed(bfilt_peak, args.peak_type, args.chrsz,
args.mem_gb, args.out_dir)
log.info('Converting peak to starch...')
peak_to_starch(bfilt_peak, args.out_dir)
log.info('Converting peak to hammock...')
peak_to_hammock(bfilt_peak, args.mem_gb, args.out_dir)
log.info('Shifted FRiP with fragment length...')
frip_qc = frip_shifted(args.ta, bfilt_peak,
args.chrsz, args.fraglen, args.out_dir)
log.info('Calculating (blacklist-filtered) peak region size QC/plot...')
region_size_qc, region_size_plot = get_region_size_metrics(bfilt_peak)
log.info('Calculating number of peaks (blacklist-filtered)...')
num_peak_qc = get_num_peaks(bfilt_peak)
log.info('List all files in output directory...')
ls_l(args.out_dir)
log.info('All done.')
if __name__ == '__main__':
main()