Skip to content

Commit 968ffbb

Browse files
authored
Add files via upload
1 parent b8f8fa1 commit 968ffbb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+2829
-0
lines changed
+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
##fileformat=VCFv4.1
2+
##fileDate=20200429
3+
##source=strelka
4+
##source_version=2.9.2
5+
##startTime=Wed Apr 29 20:57:44 2020
6+
##cmdline=./strelka-2.9.2.centos6_x86_64/bin/configureStrelkaGermlineWorkflow.py --bam SRR10971381.bam --bam SRR10903401.bam --referenceFasta covid19-refseq.fasta --runDir SRR10903401.bam.founder
7+
##reference=file:///home/cfarkas/PeerJ_review/March25_2020/SARS-CoV-2_illumina_analysis/covid19-refseq.fasta
8+
##contig=<ID=NC_045512.2,length=29903>
9+
##content=strelka germline small-variant calls
10+
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
11+
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant multi-site block. Non-variant blocks are defined independently for each sample. All sites in such a block are constrained to be non-variant, have the same filter value, and have sample values {GQX,DP,DPF} in range [x,y], y <= max(x+3,(x*1.3)).">
12+
##INFO=<ID=SNVHPOL,Number=1,Type=Integer,Description="SNV contextual homopolymer length">
13+
##INFO=<ID=CIGAR,Number=A,Type=String,Description="CIGAR alignment for each alternate indel allele">
14+
##INFO=<ID=RU,Number=A,Type=String,Description="Smallest repeating sequence unit extended or contracted in the indel allele relative to the reference. RUs are not reported if longer than 20 bases">
15+
##INFO=<ID=REFREP,Number=A,Type=Integer,Description="Number of times RU is repeated in reference">
16+
##INFO=<ID=IDREP,Number=A,Type=Integer,Description="Number of times RU is repeated in indel allele">
17+
##INFO=<ID=MQ,Number=1,Type=Integer,Description="RMS of mapping quality">
18+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
19+
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
20+
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Empirically calibrated genotype quality score for variant sites, otherwise minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">
21+
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
22+
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
23+
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum filtered basecall depth used for site genotyping within a non-variant multi-site block">
24+
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.51 or higher that read contains indicated allele vs all other intersecting indel alleles)">
25+
##FORMAT=<ID=ADF,Number=.,Type=Integer,Description="Allelic depths on the forward strand">
26+
##FORMAT=<ID=ADR,Number=.,Type=Integer,Description="Allelic depths on the reverse strand">
27+
##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, 'PASS' indicates that all filters have passed for this sample">
28+
##FORMAT=<ID=DPI,Number=1,Type=Integer,Description="Read depth associated with indel, taken from the site preceding the indel">
29+
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
30+
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set identifier">
31+
##FORMAT=<ID=SB,Number=1,Type=Float,Description="Sample site strand bias">
32+
##FILTER=<ID=IndelConflict,Description="Indel genotypes from two or more loci conflict in at least one sample">
33+
##FILTER=<ID=SiteConflict,Description="Site is filtered due to an overlapping indel call filter">
34+
##FILTER=<ID=LowGQX,Description="Locus GQX is below threshold or not present">
35+
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.4">
36+
##FILTER=<ID=HighSNVSB,Description="Sample SNV strand bias value (SB) exceeds 10">
37+
##FILTER=<ID=HighDepth,Description="Locus depth is greater than 3x the mean chromosome depth">
38+
##Depth_NC_045512.2=548.00
39+
##FILTER=<ID=LowDepth,Description="Locus depth is below 3">
40+
##FILTER=<ID=NotGenotyped,Description="Locus contains forcedGT input alleles which could not be genotyped">
41+
##FILTER=<ID=PloidyConflict,Description="Genotype call from variant caller not consistent with chromosome ploidy">
42+
##FILTER=<ID=NoPassedVariantGTs,Description="No samples at this locus pass all sample filters and have a variant genotype">
43+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2
44+
##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, 'PASS' indicates that all filters have passed for this sample">
45+
NC_045512.2 51 . T A 10 PASS SNVHPOL=2;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:42:13:8:1:6,2:5,2:1,0:0.7:PASS:44,0,117 .:.:.:0:0:0,0:0,0:0,0:0.0:LowGQX;LowDepth:.
46+
NC_045512.2 24323 . A C 160 PASS SNVHPOL=5;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/0:409:409:137:5:137,0:39,0:98,0:0.0:PASS:0,370,370 0/1:193:19:135:1:94,41:51,19:43,22:-16.0:PASS:195,0,370
+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
##fileformat=VCFv4.1
2+
##fileDate=20200429
3+
##source=strelka
4+
##source_version=2.9.2
5+
##startTime=Wed Apr 29 20:59:02 2020
6+
##cmdline=./strelka-2.9.2.centos6_x86_64/bin/configureStrelkaGermlineWorkflow.py --bam SRR10971381.bam --bam SRR10903402.bam --referenceFasta covid19-refseq.fasta --runDir SRR10903402.bam.founder
7+
##reference=file:///home/cfarkas/PeerJ_review/March25_2020/SARS-CoV-2_illumina_analysis/covid19-refseq.fasta
8+
##contig=<ID=NC_045512.2,length=29903>
9+
##content=strelka germline small-variant calls
10+
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
11+
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant multi-site block. Non-variant blocks are defined independently for each sample. All sites in such a block are constrained to be non-variant, have the same filter value, and have sample values {GQX,DP,DPF} in range [x,y], y <= max(x+3,(x*1.3)).">
12+
##INFO=<ID=SNVHPOL,Number=1,Type=Integer,Description="SNV contextual homopolymer length">
13+
##INFO=<ID=CIGAR,Number=A,Type=String,Description="CIGAR alignment for each alternate indel allele">
14+
##INFO=<ID=RU,Number=A,Type=String,Description="Smallest repeating sequence unit extended or contracted in the indel allele relative to the reference. RUs are not reported if longer than 20 bases">
15+
##INFO=<ID=REFREP,Number=A,Type=Integer,Description="Number of times RU is repeated in reference">
16+
##INFO=<ID=IDREP,Number=A,Type=Integer,Description="Number of times RU is repeated in indel allele">
17+
##INFO=<ID=MQ,Number=1,Type=Integer,Description="RMS of mapping quality">
18+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
19+
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
20+
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Empirically calibrated genotype quality score for variant sites, otherwise minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">
21+
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
22+
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
23+
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum filtered basecall depth used for site genotyping within a non-variant multi-site block">
24+
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.51 or higher that read contains indicated allele vs all other intersecting indel alleles)">
25+
##FORMAT=<ID=ADF,Number=.,Type=Integer,Description="Allelic depths on the forward strand">
26+
##FORMAT=<ID=ADR,Number=.,Type=Integer,Description="Allelic depths on the reverse strand">
27+
##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, 'PASS' indicates that all filters have passed for this sample">
28+
##FORMAT=<ID=DPI,Number=1,Type=Integer,Description="Read depth associated with indel, taken from the site preceding the indel">
29+
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
30+
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set identifier">
31+
##FORMAT=<ID=SB,Number=1,Type=Float,Description="Sample site strand bias">
32+
##FILTER=<ID=IndelConflict,Description="Indel genotypes from two or more loci conflict in at least one sample">
33+
##FILTER=<ID=SiteConflict,Description="Site is filtered due to an overlapping indel call filter">
34+
##FILTER=<ID=LowGQX,Description="Locus GQX is below threshold or not present">
35+
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.4">
36+
##FILTER=<ID=HighSNVSB,Description="Sample SNV strand bias value (SB) exceeds 10">
37+
##FILTER=<ID=HighDepth,Description="Locus depth is greater than 3x the mean chromosome depth">
38+
##Depth_NC_045512.2=894.00
39+
##FILTER=<ID=LowDepth,Description="Locus depth is below 3">
40+
##FILTER=<ID=NotGenotyped,Description="Locus contains forcedGT input alleles which could not be genotyped">
41+
##FILTER=<ID=PloidyConflict,Description="Genotype call from variant caller not consistent with chromosome ploidy">
42+
##FILTER=<ID=NoPassedVariantGTs,Description="No samples at this locus pass all sample filters and have a variant genotype">
43+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2
44+
##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, 'PASS' indicates that all filters have passed for this sample">
45+
NC_045512.2 51 . T A 10 PASS SNVHPOL=2;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:42:13:8:1:6,2:5,2:1,0:0.7:PASS:44,0,117 0/0:5:5:2:0:2,0:2,0:0,0:0.0:LowGQX;LowDepth:0,6,71
+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
##fileformat=VCFv4.1
2+
##fileDate=20200429
3+
##source=strelka
4+
##source_version=2.9.2
5+
##startTime=Wed Apr 29 21:00:36 2020
6+
##cmdline=./strelka-2.9.2.centos6_x86_64/bin/configureStrelkaGermlineWorkflow.py --bam SRR10971381.bam --bam SRR11059940.bam --referenceFasta covid19-refseq.fasta --runDir SRR11059940.bam.founder
7+
##reference=file:///home/cfarkas/PeerJ_review/March25_2020/SARS-CoV-2_illumina_analysis/covid19-refseq.fasta
8+
##contig=<ID=NC_045512.2,length=29903>
9+
##content=strelka germline small-variant calls
10+
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
11+
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant multi-site block. Non-variant blocks are defined independently for each sample. All sites in such a block are constrained to be non-variant, have the same filter value, and have sample values {GQX,DP,DPF} in range [x,y], y <= max(x+3,(x*1.3)).">
12+
##INFO=<ID=SNVHPOL,Number=1,Type=Integer,Description="SNV contextual homopolymer length">
13+
##INFO=<ID=CIGAR,Number=A,Type=String,Description="CIGAR alignment for each alternate indel allele">
14+
##INFO=<ID=RU,Number=A,Type=String,Description="Smallest repeating sequence unit extended or contracted in the indel allele relative to the reference. RUs are not reported if longer than 20 bases">
15+
##INFO=<ID=REFREP,Number=A,Type=Integer,Description="Number of times RU is repeated in reference">
16+
##INFO=<ID=IDREP,Number=A,Type=Integer,Description="Number of times RU is repeated in indel allele">
17+
##INFO=<ID=MQ,Number=1,Type=Integer,Description="RMS of mapping quality">
18+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
19+
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
20+
##FORMAT=<ID=GQX,Number=1,Type=Integer,Description="Empirically calibrated genotype quality score for variant sites, otherwise minimum of {Genotype quality assuming variant position,Genotype quality assuming non-variant position}">
21+
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Filtered basecall depth used for site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
22+
##FORMAT=<ID=DPF,Number=1,Type=Integer,Description="Basecalls filtered from input prior to site genotyping. In a non-variant multi-site block this value represents the average of all sites in the block.">
23+
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum filtered basecall depth used for site genotyping within a non-variant multi-site block">
24+
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.51 or higher that read contains indicated allele vs all other intersecting indel alleles)">
25+
##FORMAT=<ID=ADF,Number=.,Type=Integer,Description="Allelic depths on the forward strand">
26+
##FORMAT=<ID=ADR,Number=.,Type=Integer,Description="Allelic depths on the reverse strand">
27+
##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, 'PASS' indicates that all filters have passed for this sample">
28+
##FORMAT=<ID=DPI,Number=1,Type=Integer,Description="Read depth associated with indel, taken from the site preceding the indel">
29+
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
30+
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set identifier">
31+
##FORMAT=<ID=SB,Number=1,Type=Float,Description="Sample site strand bias">
32+
##FILTER=<ID=IndelConflict,Description="Indel genotypes from two or more loci conflict in at least one sample">
33+
##FILTER=<ID=SiteConflict,Description="Site is filtered due to an overlapping indel call filter">
34+
##FILTER=<ID=LowGQX,Description="Locus GQX is below threshold or not present">
35+
##FILTER=<ID=HighDPFRatio,Description="The fraction of basecalls filtered out at a site is greater than 0.4">
36+
##FILTER=<ID=HighSNVSB,Description="Sample SNV strand bias value (SB) exceeds 10">
37+
##FILTER=<ID=HighDepth,Description="Locus depth is greater than 3x the mean chromosome depth">
38+
##Depth_NC_045512.2=608.00
39+
##FILTER=<ID=LowDepth,Description="Locus depth is below 3">
40+
##FILTER=<ID=NotGenotyped,Description="Locus contains forcedGT input alleles which could not be genotyped">
41+
##FILTER=<ID=PloidyConflict,Description="Genotype call from variant caller not consistent with chromosome ploidy">
42+
##FILTER=<ID=NoPassedVariantGTs,Description="No samples at this locus pass all sample filters and have a variant genotype">
43+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2
44+
##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter, 'PASS' indicates that all filters have passed for this sample">
45+
NC_045512.2 51 . T A 10 PASS SNVHPOL=2;MQ=56 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:42:13:8:1:6,2:5,2:1,0:0.7:PASS:44,0,117 0/0:48:48:17:0:17,0:10,0:7,0:0.0:PASS:0,51,323
46+
NC_045512.2 7866 . G T 3070 PASS SNVHPOL=3;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/0:421:421:141:19:141,0:43,0:98,0:0.0:PASS:0,370,370 1/1:205:19:401:4:72,329:38,192:34,137:-99.0:PASS:370,208,0

0 commit comments

Comments
 (0)