Skip to content

Commit

Permalink
Merge pull request #327 from bioinfo-chru-strasbourg/fix_output_empty
Browse files Browse the repository at this point in the history
Fix empty file output #324
  • Loading branch information
antonylebechec authored Dec 11, 2024
2 parents fc99b59 + 400d037 commit 1aa23f5
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 17 deletions.
22 changes: 17 additions & 5 deletions howard/objects/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -1809,11 +1809,21 @@ def get_columns(
"bed",
"json",
]:
# Query
sql_from = self.get_sql_from(
database=database, header_file=header_file
)
sql_query = f"SELECT * FROM {sql_from} LIMIT 0"
return list(self.conn.query(sql_query).columns)

# Get columns
result_description = self.conn.execute(sql_query).description

# Extract columns' names
columns = [desc[0] for desc in result_description]

# Return columns as list
return columns

except ValueError:
return []

Expand Down Expand Up @@ -2557,8 +2567,10 @@ def export(
query_empty = False
break
if query_empty:
log.error("Export failed: Empty")
raise ValueError("Export failed: Empty")
log.warning("Export warning: Empty")
remove_header_line = False
else:
remove_header_line = True

# Schema names
schema_names = None
Expand Down Expand Up @@ -2597,7 +2609,7 @@ def export(
query_output_header_tmp = os.path.join(tmp_dir, "header")
self.get_header_file(
header_file=query_output_header_tmp,
remove_header_line=True,
remove_header_line=remove_header_line,
sql_query=query,
)

Expand Down Expand Up @@ -2834,7 +2846,7 @@ def export(
query_output_header_tmp = os.path.join(tmp_dir, "header")
tmp_files.append(query_output_header_tmp)
self.get_header_file(
header_file=query_output_header_tmp, remove_header_line=True
header_file=query_output_header_tmp, remove_header_line=remove_header_line
)

# Add tmp header file for concat and compress
Expand Down
55 changes: 55 additions & 0 deletions tests/data/example.empty.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##fileDate=20140624
##source=./export.pl release 1.5
##reference=IRC
##phasing=unknown
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
##FILTER=<ID=FSFilter,Description="FS > 200.0">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=QDFilter,Description="QD < 2.0">
##FILTER=<ID=ReadPosFilter,Description="ReadPosRankSum < -20.0">
##FILTER=<ID=TruthSensitivityTranche99.00to99.90,Description="Truth sensitivity tranche level at VSQ Lod: -3.9813 <= x < 3.448">
##FILTER=<ID=TruthSensitivityTranche99.90to100.00+,Description="Truth sensitivity tranche level at VQS Lod < -29586.8217">
##FILTER=<ID=TruthSensitivityTranche99.90to100.00,Description="Truth sensitivity tranche level at VSQ Lod: -29586.8217 <= x < -3.9813">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=AD,Number=2,Type=Integer,Description="Allelic Depth">
##FORMAT=<ID=FILTER,Number=.,Type=String,Description="Filter quality">
##FORMAT=<ID=QUAL,Number=1,Type=Integer,Description="Filter quality value">
##INFO=<ID=CLNSIG,Number=1,Type=String,Description="CLNSIG">
##contig=<ID=chr1,length=249250621,assembly=hg19>
##contig=<ID=chr7,length=159138663,assembly=hg19>
##contig=<ID=1,length=249250621,assembly=hg19>
##contig=<ID=10,length=135534747,assembly=hg19>
##contig=<ID=11,length=135006516,assembly=hg19>
##contig=<ID=12,length=133851895,assembly=hg19>
##contig=<ID=13,length=115169878,assembly=hg19>
##contig=<ID=14,length=107349540,assembly=hg19>
##contig=<ID=15,length=102531392,assembly=hg19>
##contig=<ID=16,length=90354753,assembly=hg19>
##contig=<ID=17,length=81195210,assembly=hg19>
##contig=<ID=18,length=78077248,assembly=hg19>
##contig=<ID=19,length=59128983,assembly=hg19>
##contig=<ID=2,length=243199373,assembly=hg19>
##contig=<ID=20,length=63025520,assembly=hg19>
##contig=<ID=21,length=48129895,assembly=hg19>
##contig=<ID=22,length=51304566,assembly=hg19>
##contig=<ID=3,length=198022430,assembly=hg19>
##contig=<ID=4,length=191154276,assembly=hg19>
##contig=<ID=5,length=180915260,assembly=hg19>
##contig=<ID=6,length=171115067,assembly=hg19>
##contig=<ID=7,length=159138663,assembly=hg19>
##contig=<ID=8,length=146364022,assembly=hg19>
##contig=<ID=9,length=141213431,assembly=hg19>
##contig=<ID=M,length=16571,assembly=hg19>
##contig=<ID=X,length=155270560,assembly=hg19>
##contig=<ID=Y,length=59373566,assembly=hg19>
##INFO=<ID=SIFT,Number=.,Type=String,Description="Annotation 'SIFT'">
##bcftools_viewVersion=1.15.1+htslib-1.15.1
##bcftools_viewCommand=view tests/data/example.vcf.gz; Date=Fri Mar 10 21:25:44 2023
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 sample3 sample4
1 change: 1 addition & 0 deletions tests/test_needed.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
"example_vcf": tests_data_folder + "/example.vcf",
"example_vcf_gz": tests_data_folder + "/example.vcf.gz",
"example_vcf_gzip": tests_data_folder + "/example.vcf.gzip",
"example_empty_vcf": tests_data_folder + "/example.empty.vcf",
}


Expand Down
25 changes: 25 additions & 0 deletions tests/test_objects_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,31 @@
from test_needed import *


def test_export_empty():
"""
The function tests the export functionality of an empty database.
"""

with TemporaryDirectory(dir=tests_folder) as tmp_dir:

tmp_dir = "/tmp"

# No database input
database = Database()
input_database = database_files.get("example_empty_vcf")
output_database = f"{tmp_dir}/output.tsv"

# Export database
database = Database(database=input_database)
database.export(output_database)
assert os.path.exists(output_database)

# Check if exported database is empty
database = Database(database=output_database)
results = database.query(query=f"""SELECT * FROM variants""")
assert len(results) == 0


@pytest.mark.parametrize(
"order_by, first_pos, first_qual, first_alt",
[
Expand Down
20 changes: 8 additions & 12 deletions tests/test_tools_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,14 @@ def test_query_empty():
log.debug(result)
assert len(result) == 0

# Check if VCF is in correct format with pyVCF
with pytest.raises(ValueError) as e:
variants.export_output(query=query)
assert str(e.value) == "Export failed: Empty"

# Set output
variants.set_output = output_parquet

# Check if VCF is in correct format with pyVCF
with pytest.raises(ValueError) as e:
variants.export_output(query=query)
assert str(e.value) == "Export failed: Empty"
# Export file
variants.export_output(query=query)
assert os.path.exists(output_vcf)

# Check if exported file is empty
variants_output_vcf = Variants(conn=None, input=output_vcf, load=True)
results = variants_output_vcf.get_query_to_df(query=f"""SELECT * FROM variants""")
assert len(results) == 0


def test_query():
Expand Down

0 comments on commit 1aa23f5

Please sign in to comment.