@@ -35,8 +35,6 @@ def main(input_directory, domain, outfile, extra_weight_table_user_provided):
35
35
metadata_table_contents = load_metadata_table (os .path .join (input_directory , "genomes-all_metadata.tsv" ))
36
36
gunc_failed_list = load_gunc (os .path .join (intermediate_files_path , "gunc" , "gunc_failed.txt" ))
37
37
38
-
39
-
40
38
# load mgyg to original accession translation
41
39
mgyg_to_insdc , insdc_to_mgyg = load_name_conversion (os .path .join (input_directory , "additional_data" ,
42
40
"intermediate_files" ,
@@ -126,16 +124,15 @@ def check_genome_counts(metadata_table, cluster_splits, all_genomes, intermediat
126
124
gunc_failed_list = load_gunc (os .path .join (intermediate_files_path , "gunc" , "gunc_failed.txt" ))
127
125
128
126
if gunc_failed_list is None :
129
- issues .append ("FILE MISSING/CHECK NOT PERFORMED: gunc_failed.txt not found. Cannot verify genome counts." )
130
- return report , issues
127
+ issues .append ("FILE MISSING: gunc_failed.txt not found." )
131
128
132
129
expected_count = len (all_genomes )
133
130
if expected_count == len (metadata_table ):
134
131
report .append ("Genome count is correct" )
135
132
else :
136
133
issues .append (f"GENOME COUNT ERROR: the number of genomes in the metadata table is "
137
- f"{ len (metadata_table )} , expected { expected_count } ( number of genomes in "
138
- f"mgyg_genomes minus number of genomes filtered out by GUNC " )
134
+ f"{ len (metadata_table )} , expected { expected_count } based on the number of genomes in the "
135
+ f"'all_genomes' folder. " )
139
136
140
137
return report , issues
141
138
@@ -163,10 +160,11 @@ def check_geography(metadata_table_contents, report, issues):
163
160
unknown_count = unknown_count + 1
164
161
else :
165
162
# if a country is known, continent should be known
166
- issues .append (f"METADATA GEOGRAPHY: (check that known country and unknown continent is expected): "
167
- f"{ genome } { country } { continent } " )
163
+ if "ocean" not in country .lower (): # if sample is oceanic, it might not have a continent
164
+ issues .append (f"METADATA GEOGRAPHY: (check that known country and unknown continent is "
165
+ f"expected): { genome } { country } { continent } " )
168
166
else :
169
- issues .append (f"METADATA GEOGRAPHY: (uknown continent): { genome } { country } { continent } " )
167
+ issues .append (f"METADATA GEOGRAPHY: (unknown continent): { genome } { country } { continent } " )
170
168
unknown_percentage = round (100 * unknown_count / len (metadata_table_contents ), 2 )
171
169
if unknown_percentage > 90 :
172
170
message = "This is high. Verify that this number is expected."
0 commit comments