Skip to content

Commit 19b07c1

Browse files
committed
Fixed genome counting
1 parent e0c4c9b commit 19b07c1

File tree

1 file changed

+7
-9
lines changed

1 file changed

+7
-9
lines changed

helpers/post_run_sanity_check.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ def main(input_directory, domain, outfile, extra_weight_table_user_provided):
3535
metadata_table_contents = load_metadata_table(os.path.join(input_directory, "genomes-all_metadata.tsv"))
3636
gunc_failed_list = load_gunc(os.path.join(intermediate_files_path, "gunc", "gunc_failed.txt"))
3737

38-
39-
4038
# load mgyg to original accession translation
4139
mgyg_to_insdc, insdc_to_mgyg = load_name_conversion(os.path.join(input_directory, "additional_data",
4240
"intermediate_files",
@@ -126,16 +124,15 @@ def check_genome_counts(metadata_table, cluster_splits, all_genomes, intermediat
126124
gunc_failed_list = load_gunc(os.path.join(intermediate_files_path, "gunc", "gunc_failed.txt"))
127125

128126
if gunc_failed_list is None:
129-
issues.append("FILE MISSING/CHECK NOT PERFORMED: gunc_failed.txt not found. Cannot verify genome counts.")
130-
return report, issues
127+
issues.append("FILE MISSING: gunc_failed.txt not found.")
131128

132129
expected_count = len(all_genomes)
133130
if expected_count == len(metadata_table):
134131
report.append("Genome count is correct")
135132
else:
136133
issues.append(f"GENOME COUNT ERROR: the number of genomes in the metadata table is "
137-
f"{len(metadata_table)}, expected {expected_count} (number of genomes in "
138-
f"mgyg_genomes minus number of genomes filtered out by GUNC")
134+
f"{len(metadata_table)}, expected {expected_count} based on the number of genomes in the "
135+
f"'all_genomes' folder.")
139136

140137
return report, issues
141138

@@ -163,10 +160,11 @@ def check_geography(metadata_table_contents, report, issues):
163160
unknown_count = unknown_count + 1
164161
else:
165162
# if a country is known, continent should be known
166-
issues.append(f"METADATA GEOGRAPHY: (check that known country and unknown continent is expected): "
167-
f"{genome} {country} {continent}")
163+
if "ocean" not in country.lower(): # if sample is oceanic, it might not have a continent
164+
issues.append(f"METADATA GEOGRAPHY: (check that known country and unknown continent is "
165+
f"expected): {genome} {country} {continent}")
168166
else:
169-
issues.append(f"METADATA GEOGRAPHY: (uknown continent): {genome} {country} {continent}")
167+
issues.append(f"METADATA GEOGRAPHY: (unknown continent): {genome} {country} {continent}")
170168
unknown_percentage = round(100 * unknown_count / len(metadata_table_contents), 2)
171169
if unknown_percentage > 90:
172170
message = "This is high. Verify that this number is expected."

0 commit comments

Comments
 (0)