From 0d8589c9501222a8ebb7b4860997844a88dc7b3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antony=20Le=20B=C3=A9chec?= Date: Thu, 12 Dec 2024 13:59:43 +0100 Subject: [PATCH] Fix when INFO/tags is a flag #307 --- .gitignore | 1 + howard/objects/variants.py | 12 ++++++------ tests/data/example.annotation_names.vcf | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 8d56aa9..c574fc0 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ tests/databases/hg19.fa tests/databases/hg19.fa.fai tests/databases/.DS_Store tests/data/TEM* +tests/*.DS_Store benchmark misc/ *.DS_Store diff --git a/howard/objects/variants.py b/howard/objects/variants.py index ff98949..c87d8d2 100644 --- a/howard/objects/variants.py +++ b/howard/objects/variants.py @@ -11686,7 +11686,7 @@ def rename_info_fields( regex_replace_dict = {} regex_replace_nb = 0 regex_replace_partition = 125 - regex_replace = "INFO" + regex_replace = "concat(INFO, ';')" # Add ';' to reduce regexp comlexity if fields_to_rename is not None and access not in ["RO"]: @@ -11713,17 +11713,17 @@ def rename_info_fields( del header.infos[field_to_rename] # Rename INFO patterns - field_pattern = rf'(^|;)({field_to_rename})($|;|=[^;]*)' + field_pattern = rf'(^|;)({field_to_rename})(=[^;]*)?;' if field_renamed is not None: - field_renamed_pattern = rf'\1{field_renamed}\3' + field_renamed_pattern = rf'\1{field_renamed}\3;' else: - field_renamed_pattern = '' + field_renamed_pattern = r'\1' # regexp replace regex_replace_nb += 1 regex_replace_key = math.floor(regex_replace_nb / regex_replace_partition) if (regex_replace_nb % regex_replace_partition) == 0: - regex_replace = "INFO" + regex_replace = "concat(INFO, ';')" regex_replace = f"regexp_replace({regex_replace}, '{field_pattern}', '{field_renamed_pattern}')" regex_replace_dict[regex_replace_key] = regex_replace @@ -11747,7 +11747,7 @@ def rename_info_fields( query = f""" UPDATE {table} SET - INFO = {regex_replace} + INFO = regexp_replace({regex_replace}, ';$', '') """ log.debug(f"query={query}") self.execute_query(query=query) diff --git a/tests/data/example.annotation_names.vcf b/tests/data/example.annotation_names.vcf index 681ebb0..911a25c 100644 --- a/tests/data/example.annotation_names.vcf +++ b/tests/data/example.annotation_names.vcf @@ -56,8 +56,8 @@ ##bcftools_viewVersion=1.15.1+htslib-1.15.1 ##bcftools_viewCommand=view tests/data/example.vcf.gz; Date=Fri Mar 10 21:25:44 2023 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 sample3 sample4 -chr1 28736 . A C 100 PASS CLNSIG=pathogenic GT:AD:DP:GQ 0/1:525,204:729:99 0/1:12659,4994:17664:99 1/1:12658,4995:17663:99 1/1:401,175:576:99 -chr1 35144 . A C 100 PASS CLNSIG=non-pathogenic GT:AD:DP:GQ ./.:.:.:. 0/1:12659,4994:17664:99 0/1:12658,4995:17663:99 0/1:401,175:576:99 +chr1 28736 . A C 100 PASS SPiP_Alt=T;CLNSIG=pathogenic GT:AD:DP:GQ 0/1:525,204:729:99 0/1:12659,4994:17664:99 1/1:12658,4995:17663:99 1/1:401,175:576:99 +chr1 35144 . A C 100 PASS CLNSIG=non-pathogenic;SPiP_Alt=T GT:AD:DP:GQ ./.:.:.:. 0/1:12659,4994:17664:99 0/1:12658,4995:17663:99 0/1:401,175:576:99 chr1 69101 . A G 100 PASS DP=50;CLNSIG=non-pathogenic;SIFT=D;SPiP_Alt GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99 chr1 768251 . A G 100 PASS CLNSIG=NP;PREFIXCLNSIG=NP;SPiP_Alt;CLNSIGSUFFIX=P GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99 chr1 768252 . A G 100 PASS PREFIXCLNSIG=NP GT:AD:DP:GQ 0/1:525,204:729:99 ./.:.:.:. 0/1:12658,4995:17663:99 0/1:401,175:576:99