From 51119d5f0c73007c2299467ded833aac9d26d579 Mon Sep 17 00:00:00 2001 From: John Sterrett Date: Tue, 16 Jan 2024 11:47:44 -0700 Subject: [PATCH 1/6] update get_from_kegg_api to skip async --- KEGG_parser/downloader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/KEGG_parser/downloader.py b/KEGG_parser/downloader.py index ddfc023..3364da9 100644 --- a/KEGG_parser/downloader.py +++ b/KEGG_parser/downloader.py @@ -72,8 +72,11 @@ def kegg_download_manager_synchronous(list_of_ids, wait=1): return [raw_record for raw_records in results for raw_record in raw_records.split('///')[:-1]] +def get_from_kegg_api(loop, list_of_ids, parser, try_async=True): + if try_async == False: + print("KEGG parser will try to download data sequentially.") + return [parser(raw_record) for raw_record in kegg_download_manager_synchronous(list_of_ids)] -def get_from_kegg_api(loop, list_of_ids, parser): try: return [parser(raw_record) for raw_record in loop.run_until_complete(kegg_download_manager(loop, list_of_ids))] except ValueError: @@ -82,6 +85,7 @@ def get_from_kegg_api(loop, list_of_ids, parser): time.sleep(30) return [parser(raw_record) for raw_record in kegg_download_manager_synchronous(list_of_ids)] + def get_kegg_record_dict(list_of_ids, parser, records_file_loc=None, verbose=False): if records_file_loc is None: loop = asyncio.get_event_loop() From 838720019dea311fce007bfd35fe0e87eec43eff Mon Sep 17 00:00:00 2001 From: John Sterrett Date: Tue, 16 Jan 2024 11:48:43 -0700 Subject: [PATCH 2/6] update get_kegg_record_dict with option to skip async --- KEGG_parser/downloader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/KEGG_parser/downloader.py b/KEGG_parser/downloader.py index 3364da9..5134a4e 100644 --- a/KEGG_parser/downloader.py +++ b/KEGG_parser/downloader.py @@ -86,12 +86,12 @@ def get_from_kegg_api(loop, list_of_ids, parser, try_async=True): return [parser(raw_record) for raw_record in kegg_download_manager_synchronous(list_of_ids)] -def get_kegg_record_dict(list_of_ids, parser, records_file_loc=None, verbose=False): +def get_kegg_record_dict(list_of_ids, parser, records_file_loc=None, verbose=False, try_async=True): if records_file_loc is None: loop = asyncio.get_event_loop() records = get_from_kegg_api(loop, list_of_ids, parser) else: - records = get_from_kegg_flat_file(records_file_loc, list_of_ids, parser) + records = get_from_kegg_flat_file(records_file_loc, list_of_ids, parser, try_async) if verbose: print("%s records acquired" % len(records)) return {record['ENTRY']: record for record in records} From 983bfcf1f3c42a1a7d25c8ef3d12e19f06e739b3 Mon Sep 17 00:00:00 2001 From: John Sterrett Date: Tue, 16 Jan 2024 12:03:25 -0700 Subject: [PATCH 3/6] move try_async arg to from_api --- KEGG_parser/downloader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/KEGG_parser/downloader.py b/KEGG_parser/downloader.py index 5134a4e..b967f38 100644 --- a/KEGG_parser/downloader.py +++ b/KEGG_parser/downloader.py @@ -89,9 +89,9 @@ def get_from_kegg_api(loop, list_of_ids, parser, try_async=True): def get_kegg_record_dict(list_of_ids, parser, records_file_loc=None, verbose=False, try_async=True): if records_file_loc is None: loop = asyncio.get_event_loop() - records = get_from_kegg_api(loop, list_of_ids, parser) + records = get_from_kegg_api(loop, list_of_ids, parser, try_async=try_async) else: - records = get_from_kegg_flat_file(records_file_loc, list_of_ids, parser, try_async) + records = get_from_kegg_flat_file(records_file_loc, list_of_ids, parser) if verbose: print("%s records acquired" % len(records)) return {record['ENTRY']: record for record in records} From f230366ba086ee82dee8b26dc9cb08c6ad251c39 Mon Sep 17 00:00:00 2001 From: John Sterrett Date: Tue, 16 Jan 2024 14:17:06 -0700 Subject: [PATCH 4/6] bump version --- KEGG_parser/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KEGG_parser/__init__.py b/KEGG_parser/__init__.py index 2792152..9123cf0 100644 --- a/KEGG_parser/__init__.py +++ b/KEGG_parser/__init__.py @@ -1 +1 @@ -__version__ = '0.0.7' +__version__ = '0.0.8' From 85393eb823b2ba2b72186bc1d7300f54cebae281 Mon Sep 17 00:00:00 2001 From: John Sterrett Date: Tue, 16 Jan 2024 18:52:31 -0700 Subject: [PATCH 5/6] add reaction to not captured --- KEGG_parser/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KEGG_parser/parsers.py b/KEGG_parser/parsers.py index 94e792a..7e117dd 100644 --- a/KEGG_parser/parsers.py +++ b/KEGG_parser/parsers.py @@ -194,7 +194,7 @@ def split_module_reaction(current_dict, current_entry_name, current_entry_data): 'COMPOUND': add_module_orthology, 'COMMENT': return_self, 'DBLINKS': add_nested_dict } -NOT_CAPTURED_KO_FIELDS = ('REFERENCE', 'AUTHORS', 'TITLE', 'JOURNAL', 'SEQUENCE', 'BRITE', 'SYMBOL') +NOT_CAPTURED_KO_FIELDS = ('REFERENCE', 'AUTHORS', 'TITLE', 'JOURNAL', 'SEQUENCE', 'BRITE', 'SYMBOL', 'REACTION') NOT_CAPTURED_RN_FIELDS = ('REFERENCE', 'AUTHORS', 'TITLE', 'JOURNAL') From f1ed2549eb59c8045417dc65472900404d9f4555 Mon Sep 17 00:00:00 2001 From: John Sterrett Date: Tue, 16 Jan 2024 18:55:49 -0700 Subject: [PATCH 6/6] add brite to not captured ro fields --- KEGG_parser/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KEGG_parser/parsers.py b/KEGG_parser/parsers.py index 7e117dd..4809a4a 100644 --- a/KEGG_parser/parsers.py +++ b/KEGG_parser/parsers.py @@ -196,7 +196,7 @@ def split_module_reaction(current_dict, current_entry_name, current_entry_data): NOT_CAPTURED_KO_FIELDS = ('REFERENCE', 'AUTHORS', 'TITLE', 'JOURNAL', 'SEQUENCE', 'BRITE', 'SYMBOL', 'REACTION') -NOT_CAPTURED_RN_FIELDS = ('REFERENCE', 'AUTHORS', 'TITLE', 'JOURNAL') +NOT_CAPTURED_RN_FIELDS = ('REFERENCE', 'AUTHORS', 'TITLE', 'JOURNAL', 'BRITE') NOT_CAPTURED_CO_FIELDS = ('BRITE', 'ATOM', 'BOND', 'BRACKET', 'ORIGINAL', 'REPEAT', 'NODE', 'EDGE', 'SEQUENCE', 'GENE', 'ORGANISM', 'TYPE', 'EFFICACY', 'PRODUCT', 'CLASS', 'DISEASE', 'TARGET',