Skip to content

Commit

Permalink
Merge pull request #17 from lozuponelab/skip_async
Browse files Browse the repository at this point in the history
Skip async download from kegg
  • Loading branch information
sterrettJD authored Jan 17, 2024
2 parents af97eb6 + f302c14 commit d5f2629
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 7 deletions.
2 changes: 1 addition & 1 deletion AMON/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.0.0'
__version__ = '1.0.1'
10 changes: 5 additions & 5 deletions AMON/predict_metabolites.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def make_enrichment_clustermap(pathway_enrichment_dfs: dict, key, output_loc, mi
def main(kos_loc, output_dir, other_kos_loc=None, compounds_loc=None, name1='gene_set_1', name2='gene_set_2',
keep_separated=False, samples_are_columns=False, detected_only=False, rxn_compounds_only=False,
unique_only=True, ko_file_loc=None, rn_file_loc=None, co_file_loc=None, pathway_file_loc=None,
write_json=False):
write_json=False, try_async=False):
# create output dir to throw error quick
makedirs(output_dir)
logger = Logger(path.join(output_dir, "AMON_log.txt"))
Expand All @@ -285,7 +285,7 @@ def main(kos_loc, output_dir, other_kos_loc=None, compounds_loc=None, name1='gen
logger['Number of samples'] = len(sample_kos)
logger['Total number of KOs'] = len(all_kos)

ko_dict = get_kegg_record_dict(set(all_kos), parse_ko, ko_file_loc)
ko_dict = get_kegg_record_dict(set(all_kos), parse_ko, ko_file_loc, try_async=try_async)
if write_json:
open(path.join(output_dir, 'ko_dict.json'), 'w').write(json.dumps(ko_dict))
logger['KO json location'] = path.abspath(path.join(output_dir, 'ko_dict.json'))
Expand All @@ -296,7 +296,7 @@ def main(kos_loc, output_dir, other_kos_loc=None, compounds_loc=None, name1='gen
logger['Total number of reactions'] = len(all_rns)

# get reactions from kegg
rn_dict = get_kegg_record_dict(set(all_rns), parse_rn, rn_file_loc)
rn_dict = get_kegg_record_dict(set(all_rns), parse_rn, rn_file_loc, try_async=try_async)
if write_json:
open(path.join(output_dir, 'rn_dict.json'), 'w').write(json.dumps(rn_dict))
logger['RN json location'] = path.abspath(path.join(output_dir, 'rn_dict.json'))
Expand Down Expand Up @@ -333,7 +333,7 @@ def main(kos_loc, output_dir, other_kos_loc=None, compounds_loc=None, name1='gen
logger['Number of cos produced and detected'] = len(all_cos_produced)

# Get compound data from kegg
co_dict = get_kegg_record_dict(all_cos_produced, parse_co, co_file_loc)
co_dict = get_kegg_record_dict(all_cos_produced, parse_co, co_file_loc, try_async=try_async)
if write_json:
open(path.join(output_dir, 'co_dict.json'), 'w').write(json.dumps(co_dict))

Expand All @@ -360,7 +360,7 @@ def main(kos_loc, output_dir, other_kos_loc=None, compounds_loc=None, name1='gen

# Get pathway info from pathways in compounds
all_pathways = [pathway.replace('map', 'ko') for pathway in get_pathways_from_cos(co_dict)]
pathway_dict = get_kegg_record_dict(all_pathways, parse_pathway, pathway_file_loc)
pathway_dict = get_kegg_record_dict(all_pathways, parse_pathway, pathway_file_loc, try_async=try_async)
pathway_to_compound_dict = get_pathway_to_co_dict(pathway_dict, no_glycan=False)

# calculate enrichment
Expand Down
6 changes: 5 additions & 1 deletion scripts/amon.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
parser.add_argument('--pathway_file_loc', help='Location of pathway file from KEGG FTP download')
parser.add_argument('--save_entries', help='Save json file of KEGG entries at all levels used in analysis for '
'deeper analysis', action='store_true', default=False)
parser.add_argument('--download_kegg_async', help='KEGG data should be downloaded in parallel (note: this is '
'faster for small numbers of KOs but fails with larger numbers '
'due to KEGG API restrictions', action='store_true', default=False)

args = parser.parse_args()
kos_loc = args.gene_set
Expand Down Expand Up @@ -67,10 +70,11 @@
co_file_loc = args.co_file_loc
pathway_file_loc = args.pathway_file_loc
write_json = args.save_entries
try_async=args.download_kegg_async

if detected_compounds_only and detected_compounds is None:
raise ValueError('Cannot have detected compounds only and not provide detected compounds')

main(kos_loc, output_dir, other_kos_loc, detected_compounds, name1, name2, keep_separated, samples_are_columns,
detected_compounds_only, rn_compounds_only, unique_only, ko_file_loc=ko_file_loc, rn_file_loc=rn_file_loc,
co_file_loc=co_file_loc, pathway_file_loc=pathway_file_loc, write_json=write_json)
co_file_loc=co_file_loc, pathway_file_loc=pathway_file_loc, write_json=write_json, try_async=try_async)

0 comments on commit d5f2629

Please sign in to comment.