Skip to content

Commit 0ca66e3

Browse files
authored
Merge pull request #103 from nf-core/100-refactoring-input-truth-files
100 refactoring input truth files
2 parents 14812e9 + bf091c6 commit 0ca66e3

37 files changed

+374
-763
lines changed

Diff for: assets/datavzrd/sompy.datavzrd.template.yaml

+16-2
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ views:
99
columns:
1010
Tool:
1111
display-mode: normal
12-
Threshold:
12+
Type:
1313
display-mode: normal
1414
TP_base:
1515
display-mode: normal
16+
TP:
17+
display-mode: normal
1618
FN:
1719
display-mode: normal
1820
TP_call:
@@ -23,5 +25,17 @@ views:
2325
display-mode: normal
2426
Recall:
2527
display-mode: normal
26-
F1:
28+
recall_lower:
29+
display-mode: normal
30+
recall_upper:
31+
display-mode: normal
32+
recall2:
33+
display-mode: normal
34+
precision_lower:
35+
display-mode: normal
36+
precision_upper:
37+
display-mode: normal
38+
fp.region.size:
39+
display-mode: normal
40+
fp.rate:
2741
display-mode: normal

Diff for: assets/samplesheet.csv

-6
This file was deleted.

Diff for: assets/samplesheet_full.csv

-5
This file was deleted.

Diff for: assets/samplesheet_full_small.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,test_vcf,caller,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,dup_to_ins,pctsize,pctseq,pctovl,evaluationmode,subsample
2+
test1,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/SevenBridges_GraphGATKRefine_05052017/HG002-NA24385-50x.union_170414.split.vcf.gz",graph,,,,,,,,,,,,,
3+
test2,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/OsloUniversityHospital_Exome_GATK_jointVC_11242015/HG002-HG003-HG004.jointVC.filter.vcf",gatk,,,,,,,,,,,,,"Sample_Diag-excap51-HG002-EEogPU"

Diff for: assets/samplesheet_full_sv.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,test_vcf,caller,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,dup_to_ins,pctsize,pctseq,pctovl,evaluationmode,subsample
2+
test1,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/manta_GIABv0.6/diploidSV_PASS_DUPtoINS.vcf.gz",manta,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,
3+
test2,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/delly_GIABv0.6/norm_recall_HG002.merged__filt_DUPtoINS.vcf.gz",delly,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,

Diff for: assets/samplesheet_small.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,test_vcf,caller
2+
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek/hg38/HG002.strelka.variants.chr21.vcf.gz,strelka
3+
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek/hg38/HG002.bcftools.chr21.vcf.gz,bcftools

Diff for: assets/samplesheet_somatic.csv

-7
This file was deleted.

Diff for: assets/samplesheet_somatic_indel.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,test_vcf,caller,subsample
2+
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.freebayes.chr21.vcf.gz,freebayes,
3+
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.strelka.somatic_indels.vcf.gz,strelka,"TUMOR"

Diff for: assets/samplesheet_somatic_snv.csv

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
id,test_vcf,caller,subsample
2+
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.freebayes.chr21.vcf.gz,freebayes,
3+
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.manta.somatic_sv.vcf.gz,manta,"HCC1395_HCC1395T"
4+
test3,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.strelka.somatic_snvs.vcf.gz,strelka,"TUMOR"

Diff for: assets/samplesheet_somatic_sv.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,test_vcf,caller,subsample
2+
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.tiddit_sv_merge.vcf.gz,tiddit,"HCC1395_HCC1395T"
3+
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.manta.somatic_sv.vcf.gz,manta,"HCC1395_HCC1395T"

Diff for: assets/samplesheet_sv.csv

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
id,test_vcf,caller,pctsize,pctseq,pctovl,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,evaluationmode
2+
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/manta.HG002.chr21.vcf.gz,manta,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts
3+
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/Ashkenazim_HG002.filtered.sv.chr21.vcf.gz,merged,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts
4+
test3,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/HG002_DRAGEN_SV_hg19.chr21.vcf.gz,dragen,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts

Diff for: assets/samplesheet_sv_hg37.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,test_vcf,caller
2+
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_delly_SV_hg19.chr21.vcf.gz,delly
3+
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_manta_SV_hg19_genotype.chr21.vcf.gz,manta

Diff for: assets/schema_input.json

+1-10
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,6 @@
3131
"minLength": 1,
3232
"errorMessage": "Variant caller has to be defined. Can also be unknown, undefined or merged"
3333
},
34-
"vartype": {
35-
"type": "string",
36-
"pattern": "^\\S+$",
37-
"description": "Variant type to apply benchmarking",
38-
"meta": ["vartype"],
39-
"minLength": 1,
40-
"errorMessage": "Variant type can be only one of these: small, sv, snv, indel and cnv",
41-
"enum": ["small", "sv", "snv", "indel", "cnv"]
42-
},
4334
"subsample": {
4435
"type": "string",
4536
"pattern": "^\\S+$",
@@ -165,6 +156,6 @@
165156
"default": null
166157
}
167158
},
168-
"required": ["test_vcf", "caller", "vartype", "id"]
159+
"required": ["test_vcf", "caller", "id"]
169160
}
170161
}

Diff for: conf/igenomes.config

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ params {
3737
mito_name = "chrM"
3838
macs_gsize = "2.7e9"
3939
blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
40+
4041
}
4142
'CHM13' {
4243
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa"

0 commit comments

Comments
 (0)