Skip to content

Commit 14812e9

Browse files
authored
Merge pull request #99 from nf-core/52-use-datavzrd-to-create-reports-that-are-usable-with-ncbench
52 use datavzrd to create reports that are usable with ncbench
2 parents 5f9541c + fad3640 commit 14812e9

19 files changed

+659
-31
lines changed

.github/workflows/template_version_comment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
1313

1414
- name: Read template version from .nf-core.yml
15-
uses: pietrobolcato/action-read-yaml@1.0.0
15+
uses: pietrobolcato/action-read-yaml@1.1.0
1616
id: read_yml
1717
with:
1818
config: ${{ github.workspace }}/.nf-core.yml
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
datasets:
2+
report:
3+
path: CSVPATH #happy.sv.summary.csv
4+
separator: ","
5+
views:
6+
test:
7+
dataset: report
8+
render-table:
9+
columns:
10+
Tool:
11+
display-mode: normal
12+
Type:
13+
display-mode: normal
14+
Filter:
15+
display-mode: normal
16+
TP_base:
17+
display-mode: normal
18+
FN:
19+
display-mode: normal
20+
TP_call:
21+
display-mode: normal
22+
FP:
23+
display-mode: normal
24+
UNK:
25+
display-mode: normal
26+
Precision:
27+
display-mode: normal
28+
Recall:
29+
display-mode: normal
30+
F1:
31+
display-mode: normal
32+
FP_gt:
33+
display-mode: normal
34+
FP_al:
35+
display-mode: normal
36+
Frac_NA:
37+
display-mode: normal
38+
TRUTH_TiTv_ratio:
39+
display-mode: normal
40+
QUERY_TiTv_ratio:
41+
display-mode: normal
42+
TRUTH_het_hom_ratio:
43+
display-mode: normal
44+
QUERY_het_hom_ratio:
45+
display-mode: normal
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
datasets:
2+
report:
3+
path: CSVPATH #rtgtools.sv.summary.csv
4+
separator: ","
5+
views:
6+
test:
7+
dataset: report
8+
render-table:
9+
columns:
10+
Tool:
11+
display-mode: normal
12+
Threshold:
13+
display-mode: normal
14+
TP_base:
15+
display-mode: normal
16+
FN:
17+
display-mode: normal
18+
TP_call:
19+
display-mode: normal
20+
FP:
21+
display-mode: normal
22+
Precision:
23+
display-mode: normal
24+
Recall:
25+
display-mode: normal
26+
F1:
27+
display-mode: normal
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
datasets:
2+
report:
3+
path: CSVPATH #sompy.sv.summary.csv
4+
separator: ","
5+
views:
6+
test:
7+
dataset: report
8+
render-table:
9+
columns:
10+
Tool:
11+
display-mode: normal
12+
Threshold:
13+
display-mode: normal
14+
TP_base:
15+
display-mode: normal
16+
FN:
17+
display-mode: normal
18+
TP_call:
19+
display-mode: normal
20+
FP:
21+
display-mode: normal
22+
Precision:
23+
display-mode: normal
24+
Recall:
25+
display-mode: normal
26+
F1:
27+
display-mode: normal
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
datasets:
2+
report:
3+
path: CSVPATH #svbenchmark.sv.summary.csv
4+
separator: ","
5+
views:
6+
test:
7+
dataset: report
8+
render-table:
9+
columns:
10+
Tool:
11+
display-mode: normal
12+
TP_base:
13+
display-mode: normal
14+
FN:
15+
display-mode: normal
16+
TP_comp:
17+
display-mode: normal
18+
FP:
19+
display-mode: normal
20+
Precision:
21+
display-mode: normal
22+
Recall:
23+
display-mode: normal
24+
F1:
25+
display-mode: normal
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
datasets:
2+
report:
3+
path: CSVPATH #truvari.sv.summary.csv
4+
separator: ","
5+
views:
6+
test:
7+
dataset: report
8+
render-table:
9+
columns:
10+
Tool:
11+
display-mode: normal
12+
TP_base:
13+
display-mode: normal
14+
FN:
15+
display-mode: normal
16+
TP_comp:
17+
display-mode: normal
18+
FP:
19+
display-mode: normal
20+
Precision:
21+
display-mode: normal
22+
Recall:
23+
display-mode: normal
24+
F1:
25+
display-mode: normal
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
datasets:
2+
report:
3+
path: CSVPATH #wittyer.sv.summary.csv
4+
separator: ","
5+
views:
6+
test:
7+
dataset: report
8+
render-table:
9+
columns:
10+
Tool:
11+
display-mode: normal
12+
TP_base:
13+
display-mode: normal
14+
FN:
15+
display-mode: normal
16+
TP_comp:
17+
display-mode: normal
18+
FP:
19+
display-mode: normal

bin/merge_reports.py

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,17 @@ def get_svbenchmark_resuls(file_paths):
5656
# Initialize a dictionary to store the data
5757
data = {
5858
'Tool': [filename.split(".")[0]],
59-
'TP_base': [DTP_match.group(1) if DTP_match else 'NA'],
60-
'FP': [FP_match.group(1) if FP_match else 'NA'],
61-
'TP_comp': [DTP_match.group(1) if DTP_match else 'NA'],
62-
'FN': [FN_match.group(1) if FN_match else 'NA'],
59+
'TP_base': [int(DTP_match.group(1)) if DTP_match else 'NA'],
60+
'FP': [int(FP_match.group(1)) if FP_match else 'NA'],
61+
'TP_comp': [int(DTP_match.group(1)) if DTP_match else 'NA'],
62+
'FN': [int(FN_match.group(1)) if FN_match else 'NA'],
6363
'Recall': [float(recall_match.group(1))/100 if recall_match else 'NA'],
6464
'Precision': [float(precision_match.group(1))/100 if precision_match else 'NA'],
6565
'F1': [float(f1_match.group(1)) if f1_match else 'NA']}
6666

6767
df = pd.DataFrame(data)
6868

69-
merged_df = pd.concat([merged_df, df])
69+
merged_df = pd.concat([merged_df, df], ignore_index=True)
7070

7171
return merged_df
7272

@@ -85,24 +85,23 @@ def get_truvari_resuls(file_paths):
8585

8686
relevant_data = {
8787
"Tool": filename.split(".")[0],
88-
"TP_base": data["TP-base"].iloc[0],
89-
"TP_comp": data["TP-comp"].iloc[0],
90-
"FP": data["FP"].iloc[0],
91-
"FN": data["FN"].iloc[0],
92-
"Precision": data["precision"].iloc[0],
93-
"Recall": data["recall"].iloc[0],
94-
"F1": data["f1"].iloc[0]}
88+
"TP_base": int(data["TP-base"].iloc[0]),
89+
"TP_comp": int(data["TP-comp"].iloc[0]),
90+
"FP": int(data["FP"].iloc[0]),
91+
"FN": int(data["FN"].iloc[0]),
92+
"Precision": float(data["precision"].iloc[0]),
93+
"Recall": float(data["recall"].iloc[0]),
94+
"F1": float(data["f1"].iloc[0])}
9595

9696
df = pd.DataFrame([relevant_data])
97-
merged_df = pd.concat([merged_df, df])
97+
merged_df = pd.concat([merged_df, df], ignore_index=True)
9898

9999
return merged_df
100100

101101
def get_wittyer_resuls(file_paths):
102102
# Initialize an empty DataFrame to store the merged data
103103
merged_df = pd.DataFrame()
104104

105-
# Iterate over each table file
106105
for file in file_paths:
107106
# Read the json into a DataFrame
108107
filename = os.path.basename(file)
@@ -115,17 +114,17 @@ def get_wittyer_resuls(file_paths):
115114
relevant_data.append({
116115
"Tool": filename.split(".")[0],
117116
"StatsType": stats["StatsType"],
118-
"TP_base": stats["TruthTpCount"],
119-
"TP_comp": stats["QueryTpCount"],
120-
"FP": stats["QueryFpCount"],
121-
"FN": stats["TruthFnCount"],
122-
"Precision": stats["Precision"],
123-
"Recall": stats["Recall"],
124-
"F1": stats["Fscore"]}
125-
)
117+
"TP_base": int(stats["TruthTpCount"]) if pd.notna(stats["TruthTpCount"]) else 0,
118+
"TP_comp": int(stats["QueryTpCount"]) if pd.notna(stats["QueryTpCount"]) else 0,
119+
"FP": int(stats["QueryFpCount"]) if pd.notna(stats["QueryFpCount"]) else 0,
120+
"FN": int(stats["TruthFnCount"]) if pd.notna(stats["TruthFnCount"]) else 0,
121+
"Precision": float(stats["Precision"]) if pd.notna(stats["Precision"]) else float('nan'),
122+
"Recall": float(stats["Recall"]) if pd.notna(stats["Recall"]) else float('nan'),
123+
"F1": float(stats["Fscore"]) if pd.notna(stats["Fscore"]) else float('nan')
124+
})
126125

127126
df = pd.DataFrame(relevant_data)
128-
merged_df = pd.concat([merged_df, df])
127+
merged_df = pd.concat([merged_df, df], ignore_index=True)
129128

130129
return merged_df
131130

@@ -153,8 +152,14 @@ def get_rtgtools_resuls(file_paths):
153152
df['Tool'] = filename.split(".")[0]
154153
df_redesigned = df[['Tool', 'Threshold','True-pos-baseline','True-pos-call','False-pos','False-neg','Precision','Sensitivity','F-measure']]
155154
df_redesigned.columns = ['Tool', 'Threshold','TP_base','TP_call','FP','FN','Precision','Recall','F1']
155+
# Convert relevant columns to integers, handling potential NaN values
156+
int_columns = ['TP_base', 'FN', 'TP_call', 'FP']
157+
float_columns = ['Recall','Precision','F1']
158+
df_redesigned[int_columns] = df_redesigned[int_columns].fillna(0).astype(int)
159+
df_redesigned[float_columns] = df_redesigned[float_columns].fillna(0).astype(float)
160+
161+
merged_df = pd.concat([merged_df, df_redesigned], ignore_index=True)
156162

157-
merged_df = pd.concat([merged_df, df_redesigned])
158163
return merged_df
159164

160165
def get_happy_resuls(file_paths):
@@ -172,7 +177,14 @@ def get_happy_resuls(file_paths):
172177
df_redesigned = df[['Tool', 'Type','Filter','TRUTH.TOTAL','TRUTH.TP','TRUTH.FN','QUERY.TOTAL','QUERY.FP','QUERY.UNK','FP.gt','FP.al','METRIC.Recall','METRIC.Precision','METRIC.Frac_NA','METRIC.F1_Score','TRUTH.TOTAL.TiTv_ratio','QUERY.TOTAL.TiTv_ratio','TRUTH.TOTAL.het_hom_ratio','QUERY.TOTAL.het_hom_ratio']]
173178
df_redesigned.columns = ['Tool', 'Type','Filter','TP_base','TP','FN','TP_call','FP','UNK','FP_gt','FP_al','Recall','Precision','Frac_NA','F1','TRUTH_TiTv_ratio','QUERY_TiTv_ratio','TRUTH_het_hom_ratio','QUERY_het_hom_ratio']
174179

175-
merged_df = pd.concat([merged_df, df_redesigned])
180+
# Convert relevant columns to integers, handling potential NaN values
181+
int_columns = ['TP_base', 'TP', 'FN', 'TP_call', 'FP', 'UNK', 'FP_gt', 'FP_al']
182+
float_columns = ['Recall','Precision','Frac_NA','F1','TRUTH_TiTv_ratio','QUERY_TiTv_ratio','TRUTH_het_hom_ratio','QUERY_het_hom_ratio']
183+
df_redesigned[int_columns] = df_redesigned[int_columns].fillna(0).astype(int)
184+
df_redesigned[float_columns] = df_redesigned[float_columns].fillna(0).astype(float)
185+
186+
# Concatenate with the merged DataFrame
187+
merged_df = pd.concat([merged_df, df_redesigned], ignore_index=True)
176188

177189
return merged_df
178190

@@ -189,8 +201,13 @@ def get_sompy_resuls(file_paths, vartype):
189201
df['Tool'] = filename.split(".")[0]
190202
df_redesigned = df[['Tool','type','total.truth','tp','fn','total.query','fp','unk','recall','precision','recall_lower','recall_upper','recall2','precision_lower','precision_upper','na','ambiguous','fp.region.size','fp.rate']]
191203
df_redesigned.columns = ['Tool','Type','TP_base','TP','FN','TP_call','FP','UNK','Recall','Precision','recall_lower','recall_upper','recall2','precision_lower','precision_upper','na','ambiguous','fp.region.size','fp.rate']
204+
# Convert relevant columns to integers, handling potential NaN values
205+
int_columns = ['TP_base', 'TP', 'FN', 'TP_call', 'FP', 'UNK']
206+
float_columns = ['Recall','Precision','recall_lower','recall_upper','recall2','precision_lower','precision_upper','na','ambiguous','fp.region.size','fp.rate']
207+
df_redesigned[int_columns] = df_redesigned[int_columns].fillna(0).astype(int)
208+
df_redesigned[float_columns] = df_redesigned[float_columns].fillna(0).astype(float)
192209

193-
merged_df = pd.concat([merged_df, df_redesigned])
210+
merged_df = pd.concat([merged_df, df_redesigned], ignore_index=True)
194211

195212
if vartype == "snv":
196213
merged_df1 = merged_df[merged_df["Type"] == 'SNVs']

conf/modules.config

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,14 @@ process {
277277
mode: params.publish_dir_mode
278278
]
279279
}
280+
withName: DATAVZRD {
281+
ext.prefix = {"${meta.id}"}
282+
publishDir = [
283+
path: {"${params.outdir}/summary/datavzrd/${meta.vartype}"},
284+
pattern: "*",
285+
mode: params.publish_dir_mode
286+
]
287+
}
280288
// compare vcf results
281289
withName: "TABIX_BGZIP*"{
282290
ext.prefix = {input.toString() - ".vcf.gz"}
@@ -347,7 +355,7 @@ process {
347355
// Don't publish results for these processes
348356
//
349357
process {
350-
withName: 'TABIX_TABIX|TABIX_BGZIP|TABIX_BGZIPTABIX|BGZIP_TABIX|SURVIVOR_MERGE|BCFTOOLS_MERGE|REFORMAT_HEADER|BCFTOOLS_NORM|BCFTOOLS_DEDUP|BCFTOOLS_REHEADER|SORT_BED|UCSC_LIFTOVER|PICARD_LIFTOVERVCF|BCFTOOLS_VIEW_SUBSAMPLE' {
358+
withName: 'TABIX_TABIX|TABIX_BGZIP|TABIX_BGZIPTABIX|BGZIP_TABIX|SURVIVOR_MERGE|BCFTOOLS_MERGE|REFORMAT_HEADER|BCFTOOLS_NORM|BCFTOOLS_DEDUP|BCFTOOLS_REHEADER|SORT_BED|UCSC_LIFTOVER|PICARD_LIFTOVERVCF|BCFTOOLS_VIEW_SUBSAMPLE|CREATE_DATAVZRD_INPUT' {
351359
publishDir = [
352360
path: { "${params.outdir}/test" },
353361
enabled: false

modules.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@
5555
"git_sha": "a5377837fe9013bde89de8689829e83e84086536",
5656
"installed_by": ["modules"]
5757
},
58+
"datavzrd": {
59+
"branch": "master",
60+
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
61+
"installed_by": ["modules"]
62+
},
5863
"happy/happy": {
5964
"branch": "master",
6065
"git_sha": "41fc46dfd94dddf4fdee633629090c1c3bc9f668",
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
process CREATE_DATAVZRD_INPUT {
2+
tag "$meta.id"
3+
label 'process_single'
4+
5+
input:
6+
tuple val(meta), path(csv), path(template)
7+
8+
output:
9+
tuple val(meta), path("*.yaml"), path(csv), emit: config
10+
11+
script:
12+
"""
13+
#!/bin/bash
14+
15+
cat "$template" | sed "s|CSVPATH|$csv|g" > config.yaml
16+
"""
17+
}

modules/nf-core/datavzrd/environment.yml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)