Skip to content

Commit

Permalink
option paths_limit
Browse files Browse the repository at this point in the history
  • Loading branch information
trvinh committed Dec 13, 2023
1 parent 567e4e2 commit c13f69f
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 48 deletions.
5 changes: 4 additions & 1 deletion greedyFAS/calcFAS.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ def get_options():
help="Change to define the threshold for the maximal cardinality (number) of feature paths "
"in a graph. If max. cardinality is exceeded the priority mode will be used to for "
"the path evaluation. default=500")
thresholds.add_argument("--paths_limit", default=0, type=int,
help="Specify number of maximum paths to be considered (10^n). If this threshold is exceeded, "
"the corresponding protein will be ignored. Default: 0 for no limit")
obscure.add_argument("--priority_mode", action='store_false',
help="deactivates the greedy strategy priority mode for larger architectures, NOT RECOMMENDED")
obscure.add_argument("--timelimit", default=3600, type=int,
Expand Down Expand Up @@ -191,7 +194,7 @@ def fas(opts):
"timelimit": args.timelimit, "phyloprofile": args.phyloprofile, "score_weights": [],
"tsv": args.tsv, "json": args.json, "max_overlap_percentage": 0.0, "domain": args.domain, "pairwise": None,
"eInstance": args.eInstance, "eFeature": args.eFeature, "progress": True,
"empty_as_1": args.empty_as_1, "silent": args.silent
"empty_as_1": args.empty_as_1, "silent": args.silent, "paths_limit": 10**args.paths_limit
}
seedname = '.'.join(args.seed.split('/')[-1].split('.')[:-1])
option_dict["p_path"] = [args.annotation_dir + '/' + seedname + '.json']
Expand Down
10 changes: 5 additions & 5 deletions greedyFAS/calcFASmulti.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,11 @@ def get_prot_for_taxpair(opts):
taxa_pair = ''
if len(tmp) == 4:
taxa_pair = f'{tmp[1]}#{tmp[3]}'
if paths_limit > 0:
path_p1 = calc_path_number(tmp[0], f'{annotation_dir}/{tmp[1]}.json')
path_p2 = calc_path_number(tmp[2], f'{annotation_dir}/{tmp[3]}.json')
if path_p1 > 10**paths_limit or path_p2 > 10**paths_limit:
return('')
# if paths_limit > 0:
# path_p1 = calc_path_number(tmp[0], f'{annotation_dir}/{tmp[1]}.json')
# path_p2 = calc_path_number(tmp[2], f'{annotation_dir}/{tmp[3]}.json')
# if path_p1 > 10**paths_limit or path_p2 > 10**paths_limit:
# return('')
fp = open(f'{out_dir}/{out_name}_split_inputs/{taxa_pair}.txt', 'a+')
fp.write(f'{tmp[0]}\t{tmp[2]}\n')
fp.close()
Expand Down
29 changes: 19 additions & 10 deletions greedyFAS/mainFAS/fasOutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,16 @@ def write_tsv_out(outpath, bidirectional, results):
outdict = {}
out.write('Seed\tQuery\tScore(Forward/Reverse)\tMS(Forward/Reverse)\tPS(Forward/Reverse)\tCS(Forward/Reverse)'
'\tLS(Forward/Reverse)\tMethod\n')
for result in results[0]:
outdict[result[0], result[1]] = (result[2], ('NA', 'NA', 'NA', 'NA', 'NA'), result[3])
if bidirectional:
if results[0]:
for result in results[0]:
outdict[result[0], result[1]] = (result[2], ('NA', 'NA', 'NA', 'NA', 'NA'), result[3])
if bidirectional:
for result in results[1]:
outdict[result[1], result[0]] = (outdict[result[1], result[0]][0], result[2],
outdict[result[1],result[0]][2])
else:
for result in results[1]:
outdict[result[1], result[0]] = (outdict[result[1], result[0]][0], result[2], outdict[result[1],
result[0]][2])
outdict[result[0], result[1]] = (result[2], ('NA', 'NA', 'NA', 'NA', 'NA'), result[3])
for pair in outdict:
out.write(pair[0] + '\t' + pair[1] + '\t' + f'{outdict[pair][0][0]:.4}' + '/'
+ f'{outdict[pair][1][0]:.4}' + '\t' + f'{outdict[pair][0][1]:.4}' + '/'
Expand All @@ -48,12 +52,17 @@ def write_tsv_out(outpath, bidirectional, results):

def write_json_out(outpath, bidirectional, results):
outdict = {}
for result in results[0]:
outdict[result[0], result[1]] = (result[2], ('NA', 'NA', 'NA', 'NA', 'NA'), result[3])
if bidirectional:
if results[0]:
for result in results[0]:
outdict[result[0], result[1]] = (result[2], ('NA', 'NA', 'NA', 'NA', 'NA'), result[3])
if bidirectional:
for result in results[1]:
outdict[result[1], result[0]] = (outdict[result[1], result[0]][0], result[2],
outdict[result[1],result[0]][2])
else:
for result in results[1]:
outdict[result[1], result[0]] = (outdict[result[1], result[0]][0], result[2], outdict[result[1],
result[0]][2])
outdict[result[0], result[1]] = (result[2], ('NA', 'NA', 'NA', 'NA', 'NA'), result[3])

json_dict = {}
for pair in outdict:
json_dict['_'.join(pair)] = [f'{outdict[pair][0][0]:.4}', f'{outdict[pair][1][0]:.4}']
Expand Down
72 changes: 42 additions & 30 deletions greedyFAS/mainFAS/greedyFAS.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,27 +130,30 @@ def fc_start(option):
if not option["silent"]:
print("calculating forward scores...")
f_results = fc_main(domain_count, seed_proteome, query_proteome, clan_dict, option, interprokeys, phmm)
if option["MS_uni"] == 0 and option["ref_2"]:
domain_count_2 = {}
for path in option["ref_2"]:
domain_count_2.update(read_json(path)["count"])
if option["weight_correction"]:
domain_count_2 = w_weight_correction(option["weight_correction"], domain_count_2)
option['ref_proteome'] = option['ref_2']
else:
domain_count_2 = domain_count
id_tmp = option["seed_id"]
option["reverse"] = True
option["seed_id"] = option["query_id"]
option["query_id"] = id_tmp
if option["pairwise"]:
pairtmp = []
for pair in option["pairwise"]:
pairtmp.append([pair[1], pair[0]])
option["pairwise"] = pairtmp
if not option["silent"]:
print("calculating backward scores...")
r_results = fc_main(domain_count_2, query_proteome, seed_proteome, clan_dict, option, interprokeys, phmm)
r_results = ()
if not f_results[-1] == 'NA':
if option["MS_uni"] == 0 and option["ref_2"]:
domain_count_2 = {}
for path in option["ref_2"]:
domain_count_2.update(read_json(path)["count"])
if option["weight_correction"]:
domain_count_2 = w_weight_correction(option["weight_correction"], domain_count_2)
option['ref_proteome'] = option['ref_2']
else:
domain_count_2 = domain_count
id_tmp = option["seed_id"]
option["reverse"] = True
option["seed_id"] = option["query_id"]
option["query_id"] = id_tmp
if option["pairwise"]:
pairtmp = []
for pair in option["pairwise"]:
pairtmp.append([pair[1], pair[0]])
option["pairwise"] = pairtmp
if not option["silent"]:
print("calculating backward scores...")
r_results = fc_main(domain_count_2, query_proteome, seed_proteome, clan_dict, option, interprokeys, phmm)

if option["phyloprofile"]:
phyloprofile_out(option["outpath"], True, option["phyloprofile"], (f_results, r_results))
if not option['tsv']:
Expand Down Expand Up @@ -241,15 +244,22 @@ def fc_main(domain_count, seed_proteome, query_proteome, clan_dict, option, inte
if protein not in seed_proteome:
raise Exception(protein + ' is missing in annotation!')
tmp_query = fc_prep_query(query, domain_count, query_proteome, option, clan_dict)
query_graph, all_query_paths, lin_query_set, query_features, a_q_f, query_clans, clan_dict = tmp_query[0:7]
go_priority, domain_count = tmp_query[7:9]

#### WRITE RESULTS ####################
results.append(fc_main_sub(protein, domain_count, seed_proteome, option, all_query_paths, query_features,
go_priority, a_q_f, clan_dict, query_graph, query_proteome, query, query_clans,
domain_out, interprokeys, phmm))
if option["progress"]:
progress.update(1)
if not tmp_query == None:
tmp_protein = fc_prep_query(protein, 'NA', seed_proteome, option, clan_dict)
if not tmp_protein == None:
query_graph, all_query_paths, lin_query_set, query_features, a_q_f, query_clans, clan_dict = tmp_query[0:7]
go_priority, domain_count = tmp_query[7:9]

#### WRITE RESULTS ####################
results.append(fc_main_sub(protein, domain_count, seed_proteome, option, all_query_paths, query_features,
go_priority, a_q_f, clan_dict, query_graph, query_proteome, query, query_clans,
domain_out, interprokeys, phmm))
if option["progress"]:
progress.update(1)
else:
results.append((protein, query, ('NA', 'NA', 'NA', 'NA', 'NA'), 'NA'))
else:
results.append((protein, query, ('NA', 'NA', 'NA', 'NA', 'NA'), 'NA'))

if option["progress"]:
progress.refresh()
Expand All @@ -268,6 +278,8 @@ def fc_prep_query(query, domain_count, query_proteome, option, clan_dict):
query, query_proteome, clan_dict, option)
tmp_query_graph, path_number = pb_region_paths(pb_region_mapper(
lin_query_set, query_features, option["max_overlap"], option["max_overlap_percentage"]))
if option["paths_limit"] > 1 and option["paths_limit"] < path_number:
return None
# PRIORITY CHECK: checking for number of instances - assess complexity of the feature graph
if (len(query_features) > option["priority_threshold"] or path_number > option["max_cardinality"]) and \
option["priority_mode"]:
Expand Down
4 changes: 3 additions & 1 deletion greedyFAS/tsv2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#######################################################################
# Copyright (C) 2023 Vinh Tran
#
# This file is part of FAS.
# This file is part of FAS. This script is used to convert FAS output
# from TSV (tab-delimited) to JSON formar. Only proteins IDs and their
# (pairwise) FAS scores will be stored in the JSON output.
#
# FAS is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

setup(
name='greedyFAS',
version='1.18.6',
version='1.18.7',
python_requires='>=3.7.0',
description='A tool to compare protein feature architectures',
long_description=long_description,
Expand Down

0 comments on commit c13f69f

Please sign in to comment.