forked from VUmcCGP/wisecondor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcutoff.py
63 lines (55 loc) · 2.96 KB
/
cutoff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
##############################################################################
# #
# Find optimal cutoff for 'good' bins for a certain reference set. #
# Copyright(C) 2013 TU Delft & VU University Medical Center Amsterdam #
# Author: Roy Straver, [email protected] #
# #
# This file is part of WISECONDOR. #
# #
# WISECONDOR is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# WISECONDOR is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with WISECONDOR. If not, see <http://www.gnu.org/licenses/>. #
# #
##############################################################################
import sys
import numpy
import argparse
def getReference(lookUp, cutOff):
reference = []
removed = 0
for chrom in lookUp:
for bin in lookUp[chrom]:
if len(bin) > 0:
if float(bin[0][2]) < cutOff:
reference.append(float(bin[0][2]))
else:
removed += 1
return reference,removed
def getOptimalCutoff(lookUp, repeats, optimalCutoff):
for i in range(0,repeats):
reference,removed = getReference(lookUp, optimalCutoff)
average = numpy.average(reference)
stddev = numpy.std(reference)
optimalCutoff = average + 3 * stddev
return optimalCutoff
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Determine optimal cutoff value for the reference table provided',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('reference', type=str,
help='reference table to work on (pickle)')
parser.add_argument('-refmaxval', default=1000000, type=int,
help='start cutoff value for determining good quality reference bins')
parser.add_argument('-refmaxrep', default=3, type=int,
help='amount of improval rounds for determining good quality reference bins')
args = parser.parse_args()
getOptimalCutoff(args.reference,args.refmaxval,args.refmaxrep)